Beispiel #1
0
    def test_multi_span_overlap_in_incorrect_cases(self):
        # only consider bags with matching numbers if they are present
        # F1 scores of:     1.0        2/3   0.0   0.0   0.0   0.0
        # Average them to get F1 of 0.28
        assert get_metrics(
            ["78-yard", "56", "28", "40", "44", "touchdown"],
            ["78-yard", "56 yard", "1 yard touchdown"],
        ) == (0.0, 0.28)

        # two copies of same value will account for only one match (using optimal 1-1 bag alignment)
        assert get_metrics(["23", "23 yard"], ["23-yard", "56 yards"]) == (0.0, 0.5)

        # matching done at individual span level and not pooled into one global bag
        assert get_metrics(["John Karman", "Joe Hardy"], ["Joe Karman", "John Hardy"]) == (0.0, 0.5)

        # macro-averaging F1 over spans
        assert get_metrics(
            ["ottoman", "Kantakouzenous"], ["ottoman", "army of Kantakouzenous"]
        ) == (0.0, 0.75)
Beispiel #2
0
 def test_simple_overlap_in_incorrect_cases(self):
     assert get_metrics([""], ["army"]) == (0.0, 0.0)
     assert get_metrics(["packers"], ["Green bay packers"]) == (0.0, 0.5)
     assert get_metrics(["packers"], ["Green bay"]) == (0.0, 0.0)
     # if the numbers in the span don't match f1 is 0
     assert get_metrics(["yard"], ["36 yard td"]) == (0.0, 0.0)
     assert get_metrics(["23 yards"], ["43 yards"]) == (0.0, 0.0)
     # however, if number matches its not given extra weight over the non-functional words
     assert get_metrics(["56 yards"], ["56 yd"]) == (0.0, 0.5)
     assert get_metrics(["26"], ["26 yard td"]) == (0.0, 0.5)
Beispiel #3
0
    def test_metric_is_length_aware(self):
        # Overall F1 should be mean([1.0, 0.0])
        assert get_metrics(predicted=["td"], gold=["td", "td"]) == (0.0, 0.5)
        assert get_metrics("td", ["td", "td"]) == (0.0, 0.5)
        # Overall F1 should be mean ([1.0, 0.0]) = 0.5
        assert get_metrics(predicted=["td", "td"], gold=["td"]) == (0.0, 0.5)
        assert get_metrics(predicted=["td", "td"], gold="td") == (0.0, 0.5)

        # F1 score is mean([0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
        assert get_metrics(
            predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"], gold=["cat"]
        ) == (0.0, 0.17)
        assert get_metrics(
            predicted=["cat"], gold=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"]
        ) == (0.0, 0.17)
        # F1 score is mean([1.0, 0.5, 0.0, 0.0, 0.0, 0.0])
        assert get_metrics(
            predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"],
            gold=["cat", "cat dog"],
        ) == (0.0, 0.25)
Beispiel #4
0
 def test_overlap_in_correct_cases(self):
     assert get_metrics(["Green bay packers"], ["Green bay packers"]) == (1.0, 1.0)
     assert get_metrics(["Green bay", "packers"], ["Green bay", "packers"]) == (1.0, 1.0)
     assert get_metrics(["Green", "bay", "packers"], ["Green", "bay", "packers"]) == (1.0, 1.0)
Beispiel #5
0
 def test_casing_is_ignored(self):
     assert get_metrics(["This was a triumph"], ["tHIS Was A TRIUMPH"]) == (1.0, 1.0)
Beispiel #6
0
 def test_splitting_on_hyphens(self):
     assert get_metrics(["78-yard"], ["78 yard"]) == (1.0, 1.0)
     assert get_metrics(["78 yard"], ["78-yard"]) == (1.0, 1.0)
     assert get_metrics(["78"], ["78-yard"]) == (0.0, 0.67)
     assert get_metrics(["78-yard"], ["78"]) == (0.0, 0.67)
Beispiel #7
0
 def test_periods_commas_and_spaces_are_ignored(self):
     assert get_metrics(["Per.i.o.d...."], [".P....e.r,,i;;;o...d,,"]) == (1.0, 1.0)
     assert get_metrics(["Spa     c  e   s     "], ["    Spa c     e s"]) == (1.0, 1.0)
Beispiel #8
0
 def test_f1_ignores_word_order(self):
     assert get_metrics(["John Elton"], ["Elton John"]) == (0.0, 1.0)
     assert get_metrics(["50 yard"], ["yard 50"]) == (0.0, 1.0)
     assert get_metrics(["order word right"], ["right word order"]) == (0.0, 1.0)
Beispiel #9
0
 def test_articles_are_ignored(self):
     assert get_metrics(["td"], ["the td"]) == (1.0, 1.0)
     assert get_metrics(["the a NOT an ARTICLE the an a"], ["NOT ARTICLE"]) == (1.0, 1.0)
Beispiel #10
0
 def test_float_numbers(self):
     assert get_metrics(["78"], ["78.0"]) == (1.0, 1.0)
Beispiel #11
0
 def test_order_invariance(self):
     assert get_metrics(["a"], ["a", "b"]) == (0, 0.5)
     assert get_metrics(["b"], ["a", "b"]) == (0, 0.5)
     assert get_metrics(["b"], ["b", "a"]) == (0, 0.5)