def test_simple_eval_nonEmpty(self):
        gold_vals = [
            "oil was cool before, and hot afterwards.",
            "asparagus was dry before, and wet afterwards.",
            "pan was cool before, and hot afterwards.",
            "asparagus was crispy before, and softened afterwards."
        ]
        prediction_vals = [
            "asparagus was dry before, and wet afterwards.",
            "pan was cold before, and warm afterwards.",
            "ham was cold before, and hot afterwards."
        ]

        expected_metric_vals = [
            (0.3333333333333333, 0.25, 0.28571428571428575),
            (0.33333334168996315, 0.2500000094949585, 0.2857142949848573),
            (0.55555555555555547, 0.49999999999999994, 0.52631578947368407)
        ]
        for metric, expected_metric_val in zip(
            [self.exact_metric, self.bleu_metric, self.rouge_metric],
                expected_metric_vals):
            f1_nonempty = f1_emnlp2020(predictions=prediction_vals,
                                       gold=gold_vals,
                                       generation_metric=metric)
            print(f"metric:{metric.name()} val={f1_nonempty}")
            assert f1_nonempty == expected_metric_val
 def test_simple_eval_bothEmptyStrings(self):
     for metric in [self.exact_metric, self.bleu_metric, self.rouge_metric]:
         f1_both_empty = f1_emnlp2020(predictions=[""],
                                      gold=[""],
                                      generation_metric=metric)
         print(f"{metric.name()}\t{f1_both_empty}")
         assert f1_both_empty == (1.0, 1.0, 1.0)
    def test_simple_eval_oneEmptyString(self):
        empty_vals = [""]
        non_empty_vals = [
            "roasting pan was light before, and heavier afterwards.",
            "oil was in bottle before, and on pan afterwards."
        ]

        for metric in [self.exact_metric, self.bleu_metric, self.rouge_metric]:
            f1_one_empty = f1_emnlp2020(predictions=empty_vals,
                                        gold=non_empty_vals,
                                        generation_metric=metric)
            assert f1_one_empty == (0.0, 0.0, 0.0)

        for metric in [self.exact_metric, self.bleu_metric, self.rouge_metric]:
            f1_one_empty = f1_emnlp2020(predictions=non_empty_vals,
                                        gold=empty_vals,
                                        generation_metric=metric)
            assert f1_one_empty == (0.0, 0.0, 0.0)
    def test_simple_eval_for_normalization(self):
        gold_vals = ["oven racks"]
        prediction_vals = ["oven rack"]

        expected_metric_vals = [(1.0, 1.0, 1.0), (1.0, 1.0, 1.0)]
        for metric, expected_metric_val in zip(
            [self.exact_metric, self.bleu_metric], expected_metric_vals):
            f1 = f1_emnlp2020(predictions=prediction_vals,
                              gold=gold_vals,
                              generation_metric=metric)
            print(f"{metric.name()}\t{f1}")
            for i in range(3):
                assert f1[i] == pytest.approx(expected_metric_val[i], 0.1)
    def test_simple_eval_equal(self):
        gold_vals = [
            "oil was cool before, and hot afterwards.",
            "asparagus was dry before, and wet afterwards.",
            "pan was cool before, and hot afterwards.",
            "asparagus was crispy before, and softened afterwards."
        ]
        prediction_vals = gold_vals

        expected_metric_vals = [(1.0, 1.0, 1.0), (1.0, 1.0, 1.0)]
        for metric, expected_metric_val in zip(
            [self.exact_metric, self.bleu_metric], expected_metric_vals):
            print(metric.name())
            f1_nonempty = f1_emnlp2020(predictions=prediction_vals,
                                       gold=gold_vals,
                                       generation_metric=metric)

            for i in range(3):
                assert f1_nonempty[i] == pytest.approx(expected_metric_val[i],
                                                       0.1)
 def test_simple_eval_bothEmpty(self):
     for metric in [self.exact_metric, self.bleu_metric, self.rouge_metric]:
         f1_both_empty = f1_emnlp2020(predictions=[],
                                      gold=[],
                                      generation_metric=metric)
         assert f1_both_empty == (1.0, 1.0, 1.0)