def test_not_embedding(self):
        self.assertEqual(
            True,
            features.not_embedding(
                Mention(None, Span(3, 3), {
                    "tokens": ["it"],
                    "type": "PRO",
                    "fine_type": "PERS"
                }),
                Mention(
                    None, Span(0, 4), {
                        "tokens": ["the", "company", "which", "it", "bought"],
                        "type": "NOM"
                    })))

        self.assertEqual(
            False,
            features.not_embedding(
                Mention(None, Span(3, 3), {
                    "tokens": ["its"],
                    "type": "PRO",
                    "fine_type": "POSS_ADJ"
                }),
                Mention(
                    None, Span(0, 4), {
                        "tokens":
                        ["the", "company", "which", "loves", "its", "success"],
                        "type":
                        "NOM"
                    })))
 def test_non_pronominal_string_match(self):
     self.assertEqual(
         True,
         features.non_pronominal_string_match(
             Mention(
                 None, Span(0, 4), {
                     "tokens":
                     ["the", "newly-elect", "leader", "'s", "wife"],
                     "pos": ["DT", "JJ", "NN", "POS", "NN"],
                     "type": "NOM"
                 }),
             Mention(
                 None, Span(5, 7), {
                     "tokens": ["newly-elect", "leader", "wife"],
                     "pos": ["JJ", "NN", "NN"],
                     "type": "NOM"
                 })))
     self.assertEqual(
         False,
         features.non_pronominal_string_match(
             Mention(
                 None, Span(0, 4), {
                     "tokens":
                     ["the", "newly-elect", "leader", "'s", "wife"],
                     "pos": ["DT", "JJ", "NN", "POS", "NN"],
                     "type": "NOM"
                 }),
             Mention(None, Span(5, 5), {
                 "tokens": ["leader"],
                 "pos": ["NN"],
                 "type": "NOM"
             })))
     self.assertEqual(
         True,
         features.non_pronominal_string_match(
             Mention(None, Span(0, 0), {
                 "tokens": ["President"],
                 "pos": ["NNP"],
                 "type": "NAM"
             }),
             Mention(
                 None, Span(1, 2), {
                     "tokens": ["the", "president"],
                     "pos": ["DT", "NN"],
                     "type": "NOM"
                 })))
     self.assertEqual(
         False,
         features.non_pronominal_string_match(
             Mention(None, Span(0, 0), {
                 "tokens": ["it"],
                 "pos": ["PRP"],
                 "type": "PRO"
             }),
             Mention(None, Span(1, 1), {
                 "tokens": ["IT"],
                 "pos": ["NNP"],
                 "type": "NAM"
             })))
 def test_get_modifiers(self):
     self.assertEqual(
         set(["long-awaited", "new", "century"]),
         features.get_modifier(
             Mention(
                 None, Span(10, 16), {
                     "tokens": [
                         "the", "long-awaited", "beginning", "of", "a",
                         "new", "century"
                     ],
                     "head_span":
                     Span(12, 12),
                     "pos": ["DT", "JJ", "NN", "IN", "DT", "JJ", "NN"]
                 })))
    def test_alias(self):
        self.assertEqual(
            False,
            features.alias(
                Mention(None, Span(0, 0), {
                    "tokens": ["he"],
                    "type": "PRO",
                    "citation_form": "he"
                }),
                Mention(None, Span(1, 1), {
                    "tokens": ["he"],
                    "type": "PRO",
                    "citation_form": "he"
                })))

        self.assertEqual(
            False,
            features.alias(
                Mention(
                    None, Span(0, 2), {
                        "head": ["International", "Business", "Machines"],
                        "type": "NAM",
                        "ner": ["ORG", "ORG", "ORG"],
                        "head_index": 2
                    }),
                Mention(
                    None, Span(3, 5), {
                        "head": ["International", "Business", "Machines"],
                        "type": "NAM",
                        "ner": ["ORG", "ORG", "ORG"],
                        "head_index": 2
                    })))

        self.assertEqual(
            True,
            features.alias(
                Mention(
                    None, Span(0, 2), {
                        "head": ["International", "Business", "Machines"],
                        "type": "NAM",
                        "ner": ["ORG", "ORG", "ORG"],
                        "head_index": 2
                    }),
                Mention(
                    None, Span(3, 3), {
                        "head": ["IBM"],
                        "type": "NAM",
                        "ner": ["ORG"],
                        "head_index": 0
                    })))
    def test_pronoun_same_canonical_form(self):
        self.assertEqual(
            True,
            features.pronoun_same_canonical_form(
                Mention(None, Span(0, 0), {
                    "tokens": ["he"],
                    "type": "PRO",
                    "citation_form": "he"
                }),
                Mention(None, Span(1, 1), {
                    "tokens": ["he"],
                    "type": "PRO",
                    "citation_form": "he"
                })))

        self.assertEqual(
            True,
            features.pronoun_same_canonical_form(
                Mention(None, Span(0, 0), {
                    "tokens": ["he"],
                    "type": "PRO",
                    "citation_form": "he"
                }),
                Mention(None, Span(1, 1), {
                    "tokens": ["him"],
                    "type": "PRO",
                    "citation_form": "he"
                })))

        self.assertEqual(
            False,
            features.pronoun_same_canonical_form(
                Mention(None, Span(0, 0), {
                    "tokens": ["US"],
                    "type": "NAM"
                }), Mention(None, Span(1, 1), {
                    "tokens": ["us"],
                    "type": "PRO"
                })))
Beispiel #6
0
    def test_get_string_representation(self):
        expected = """#begin document (/test2); part 000
test2	0	0	This	NN	(NP*	-	-	-	-	-	(0|(1)
test2	0	1	is	NN	*	-	-	-	-	-	0)
test2	0	2	just	NN	*	-	-	-	-	-	-
test2	0	3	a	NN	*	-	-	-	-	-	-
test2	0	4	test	NN	*	-	-	-	-	-	(1
test2	0	5	.	NN	*)	-	-	-	-	-	(2|1)

test2	0	0	It	NN	(NP*	-	-	-	-	-	2)
test2	0	1	shows	NN	*	-	-	-	-	-	(3)
test2	0	2	that	NN	*	-	-	-	-	-	(3)
test2	0	3	the	NN	*	-	-	-	-	-	-
test2	0	4	scorer	NN	*	-	-	-	-	-	-
test2	0	5	works	NN	*	-	-	-	-	-	-
test2	0	6	.	NN	*)	-	-	-	-	-	-
#end document
"""

        self.complicated_mention_document.system_mentions = [
            Mention(self.complicated_mention_document, Span(0, 0),
                    {"set_id": 1}),
            Mention(self.complicated_mention_document, Span(0, 1),
                    {"set_id": 0}),
            Mention(self.complicated_mention_document, Span(4, 5),
                    {"set_id": 1}),
            Mention(self.complicated_mention_document, Span(5, 6),
                    {"set_id": 2}),
            Mention(self.complicated_mention_document, Span(7, 7),
                    {"set_id": 3}),
            Mention(self.complicated_mention_document, Span(8, 8),
                    {"set_id": 3}),
        ]

        self.assertEqual(
            expected,
            self.complicated_mention_document.get_string_representation())
 def test_head_match(self):
     self.assertEqual(
         True,
         features.head_match(
             Mention(
                 None, Span(0, 4), {
                     "tokens":
                     ["the", "newly-elect", "leader", "'s", "wife"],
                     "head": ["wife"],
                     "type": "NOM",
                     "semantic_class": "PERSON"
                 }),
             Mention(
                 None, Span(5, 6), {
                     "tokens": ["the", "wife"],
                     "head": ["wife"],
                     "type": "NOM",
                     "semantic_class": "PERSON"
                 })))
     self.assertEqual(
         False,
         features.head_match(
             Mention(
                 None, Span(0, 4), {
                     "tokens":
                     ["the", "newly-elect", "leader", "'s", "wife"],
                     "head": ["wife"],
                     "type": "NOM",
                     "semantic_class": "PERSON"
                 }),
             Mention(
                 None, Span(5, 5), {
                     "tokens": ["leader"],
                     "head": ["leader"],
                     "type": "NOM",
                     "semantic_class": "PERSON"
                 })))
     self.assertEqual(
         True,
         features.head_match(
             Mention(
                 None, Span(0, 0), {
                     "tokens": ["President"],
                     "head": ["President"],
                     "type": "NAM",
                     "semantic_class": "PERSON"
                 }),
             Mention(
                 None, Span(1, 2), {
                     "tokens": ["the", "president"],
                     "head": ["president"],
                     "type": "NOM",
                     "semantic_class": "PERSON"
                 })))
     self.assertEqual(
         False,
         features.head_match(
             Mention(
                 None, Span(0, 0), {
                     "tokens": ["it"],
                     "head": ["it"],
                     "type": "PRO",
                     "semantic_class": "OBJECT"
                 }),
             Mention(
                 None, Span(1, 1), {
                     "tokens": ["it"],
                     "head": ["it"],
                     "type": "PRO",
                     "semantic_class": "OBJECT"
                 })))
     self.assertEqual(
         False,
         features.head_match(
             Mention(
                 None, Span(0, 1), {
                     "tokens": ["10", "percent"],
                     "head": ["percent"],
                     "type": "NOM",
                     "semantic_class": "NUMERIC"
                 }),
             Mention(
                 None, Span(2, 3), {
                     "tokens": ["Some", "percent"],
                     "head": ["percent"],
                     "type": "NOM",
                     "semantic_class": "NUMERIC"
                 })))
    def test_not_modifier(self):
        self.assertEqual(
            True,
            features.not_modifier(
                Mention(
                    None, Span(10, 16), {
                        "tokens": [
                            "the", "long-awaited", "beginning", "of", "a",
                            "new", "century"
                        ],
                        "type":
                        "NOM",
                        "head_span":
                        Span(12, 12),
                        "pos": ["DT", "JJ", "NN", "IN", "DT", "JJ", "NN"]
                    }),
                Mention(
                    None, Span(0, 1), {
                        "tokens": ["the", "beginning"],
                        "type": "NOM",
                        "head_span": Span(1, 1),
                        "pos": ["DT", "NN"]
                    })))

        self.assertEqual(
            False,
            features.not_modifier(
                Mention(
                    None, Span(18, 19), {
                        "tokens": ["the", "beginning"],
                        "type": "NOM",
                        "head_span": Span(19, 19),
                        "pos": ["DT", "NN"]
                    }),
                Mention(
                    None, Span(10, 16), {
                        "tokens": [
                            "the", "long-awaited", "beginning", "of", "a",
                            "new", "century"
                        ],
                        "type":
                        "NOM",
                        "head_span":
                        Span(12, 12),
                        "pos": ["DT", "JJ", "NN", "IN", "DT", "JJ", "NN"]
                    }),
            ))

        self.assertEqual(
            False,
            features.not_modifier(
                Mention(
                    None, Span(18, 19), {
                        "tokens": ["cool", "people"],
                        "type": "NOM",
                        "head_span": Span(19, 19),
                        "pos": ["JJ", "NNS"]
                    }),
                Mention(
                    None, Span(10, 11), {
                        "tokens": ["Cool", "people"],
                        "type": "NOM",
                        "head_span": Span(11, 11),
                        "pos": ["JJ", "NNS"]
                    }),
            ))
    def test_not_compatible(self):
        self.assertEqual(
            True,
            features.not_compatible(
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["he"],
                        "pos": ["PRP"],
                        "type": "PRO",
                        "number": "SINGULAR",
                        "gender": "MALE",
                        "semantic_class": "PERSON"
                    }),
                Mention(
                    None, Span(1, 1), {
                        "tokens": ["she"],
                        "pos": ["PRP"],
                        "type": "PRO",
                        "number": "SINGULAR",
                        "gender": "FEMALE",
                        "semantic_class": "PERSON"
                    })))

        self.assertEqual(
            False,
            features.not_compatible(
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["he"],
                        "pos": ["PRP"],
                        "type": "PRO",
                        "number": "SINGULAR",
                        "gender": "MALE",
                        "semantic_class": "PERSON"
                    }),
                Mention(
                    None, Span(1, 1), {
                        "tokens": ["slawabu"],
                        "pos": ["NN"],
                        "type": "NOM",
                        "number": "UNKNOWN",
                        "gender": "UNKNOWN",
                        "semantic_class": "PERSON"
                    })))

        self.assertEqual(
            False,
            features.not_compatible(
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["Jesus"],
                        "pos": ["NNP"],
                        "type": "NAM",
                        "number": "SINGULAR",
                        "gender": "MALE",
                        "semantic_class": "PERSON"
                    }),
                Mention(
                    None, Span(1, 1), {
                        "tokens": ["Jesus"],
                        "pos": ["NNP"],
                        "type": "NAM",
                        "number": "SINGULAR",
                        "gender": "UNKNOWN",
                        "semantic_class": "NORP"
                    })))
Beispiel #10
0
    def test_not_speaker(self):
        self.assertEqual(
            True,
            features.not_speaker(
                Mention(
                    None, Span(3, 3), {
                        "tokens": ["I"],
                        "type": "PRO",
                        "citation_form": "i",
                        "speaker": "snafu"
                    }),
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["I"],
                        "type": "PRO",
                        "citation_form": "i",
                        "speaker": "foo"
                    }),
            ))

        self.assertEqual(
            True,
            features.not_speaker(
                Mention(
                    None, Span(3, 3), {
                        "tokens": ["us"],
                        "type": "PRO",
                        "citation_form": "we",
                        "speaker": "snafu"
                    }),
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["we"],
                        "type": "PRO",
                        "citation_form": "we",
                        "speaker": "foo"
                    }),
            ))

        self.assertEqual(
            False,
            features.not_speaker(
                Mention(
                    None, Span(3, 3), {
                        "tokens": ["you"],
                        "type": "PRO",
                        "citation_form": "you",
                        "speaker": "snafu"
                    }),
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["I"],
                        "type": "PRO",
                        "citation_form": "i",
                        "speaker": "foo"
                    }),
            ))

        self.assertEqual(
            True,
            features.not_speaker(
                Mention(
                    None, Span(3, 3), {
                        "tokens": ["you"],
                        "type": "PRO",
                        "citation_form": "you",
                        "speaker": "snafu"
                    }),
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["I"],
                        "type": "PRO",
                        "citation_form": "i",
                        "speaker": "snafu"
                    }),
            ))
Beispiel #11
0
    def test_not_pronoun_distance(self):
        self.assertEqual(
            False,
            features.not_pronoun_distance(
                Mention(
                    None, Span(0, 0), {
                        "tokens": ["he"],
                        "type": "PRO",
                        "citation_form": "he",
                        "sentence_id": 10
                    }),
                Mention(
                    None, Span(100, 100), {
                        "tokens": ["he"],
                        "type": "PRO",
                        "citation_form": "he",
                        "sentence_id": 0
                    })))

        self.assertEqual(
            True,
            features.not_pronoun_distance(
                Mention(
                    None, Span(100, 100), {
                        "tokens": ["it"],
                        "type": "PRO",
                        "citation_form": "it",
                        "sentence_id": 10
                    }),
                Mention(None, Span(0, 0), {
                    "tokens": ["company"],
                    "type": "NOM",
                    "sentence_id": 0
                })))

        self.assertEqual(
            False,
            features.not_pronoun_distance(
                Mention(
                    None, Span(100, 100), {
                        "tokens": ["them"],
                        "type": "PRO",
                        "citation_form": "they",
                        "sentence_id": 10
                    }),
                Mention(None, Span(0, 0), {
                    "tokens": ["company"],
                    "type": "NOM",
                    "sentence_id": 0
                })))

        self.assertEqual(
            False,
            features.not_pronoun_distance(
                Mention(
                    None, Span(100, 100), {
                        "tokens": ["them"],
                        "type": "PRO",
                        "citation_form": "they",
                        "sentence_id": 1
                    }),
                Mention(None, Span(0, 0), {
                    "tokens": ["company"],
                    "type": "NOM",
                    "sentence_id": 0
                })))
Beispiel #12
0
    def test_is_coreferent_with(self):

        self.assertEqual(
            True,
            Mention(None, Span(0, 0), {
                "annotated_set_id": 1
            }).is_coreferent_with(
                Mention(None, Span(3, 4), {"annotated_set_id": 1})))

        self.assertEqual(
            False,
            Mention(None, Span(0, 0), {
                "annotated_set_id": 1
            }).is_coreferent_with(
                Mention(None, Span(3, 4), {"annotated_set_id": 0})))

        self.assertEqual(
            False,
            Mention(None, Span(0, 0), {
                "annotated_set_id": None
            }).is_coreferent_with(
                Mention(None, Span(3, 4), {"annotated_set_id": None})))

        self.assertEqual(
            True,
            Mention(self.complicated_mention_document, Span(0, 0), {
                "annotated_set_id": 1
            }).is_coreferent_with(
                Mention(self.complicated_mention_document, Span(3, 4),
                        {"annotated_set_id": 1})))

        self.assertEqual(
            False,
            Mention(self.complicated_mention_document, Span(0, 0), {
                "annotated_set_id": None
            }).is_coreferent_with(
                Mention(self.complicated_mention_document, Span(3, 4),
                        {"annotated_set_id": None})))

        self.assertEqual(
            False,
            Mention(self.complicated_mention_document, Span(0, 0), {
                "annotated_set_id": 1
            }).is_coreferent_with(
                Mention(self.real_document, Span(13, 20),
                        {"annotated_set_id": 1})))