def test_gender(self): self.assertEqual( "MALE", mention_property_computer.compute_gender( {"tokens": ["him"], "pos": ["PRP"], "type": "PRO", "citation_form": "he", "head_index": 0})) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender( {"tokens": ["its"], "pos": ["PRP$"], "type": "PRO", "citation_form": "it", "head_index": 0})) self.assertEqual( "FEMALE", mention_property_computer.compute_gender( {"tokens": ["Mrs.", "Robinson"], "pos": ["NNP", "NNP"], "type": "NAM", "head_index": 1})) self.assertEqual( "MALE", mention_property_computer.compute_gender( {"tokens": ["Mr.", "FooBar"], "pos": ["NNP", "NNP"], "type": "NAM", "head_index": 1})) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender( {"tokens": ["an", "arrow"], "head": ["arrow"], "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"], "head_index": 1})) self.assertEqual( "FEMALE", mention_property_computer.compute_gender( {"tokens": ["the", "girl"], "head": ["girl"], "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"], "head_index": 1})) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender( {"tokens": ["the", "shooting"], "head": ["shooting"], "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"], "head_index": 1})) self.assertEqual( "MALE", mention_property_computer.compute_gender( {"tokens": ["the", "groom"], "head": ["groom"], "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"], "head_index": 1})) self.assertEqual( "PLURAL", mention_property_computer.compute_gender( {"tokens": ["the", "guys"], "head": ["guys"], "pos": ["DT", "NNS"],"type": "NOM", "ner": ["-", "-"], "head_index": 1})) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender( {"tokens": ["the", "Mona", "Lisa"], "head": ["Mona", "Lisa"], "pos": ["DT", "NNP", "NNP"],"type": "NAM", "ner": ["-", "WORK_OF_ART", "WORK_OF_ART"], "head_index": 2}))
def from_document(span, document, first_in_gold_entity=False): """ Create a mention from a span in a document. All attributes of the mention are computed from the linguistic information found in the document. For information about the attributes, see the class documentation. Args: document (CoNLLDocument): The document the mention belongs to. span (Span): The span of the mention in the document. Returns: Mention: A mention extracted from the input span in the input document. """ i, sentence_span = document.get_sentence_id_and_span(span) attributes = { "tokens": document.tokens[span.begin:span.end + 1], "pos": document.pos[span.begin:span.end + 1], "ner": document.ner[span.begin:span.end + 1], "sentence_id": i, "parse_tree": mention_property_computer.get_relevant_subtree( span, document), "speaker": document.speakers[span.begin], "antecedent": None, "set_id": None, "first_in_gold_entity": first_in_gold_entity } if span in document.coref: attributes["annotated_set_id"] = document.coref[span] else: attributes["annotated_set_id"] = None attributes["is_apposition"] = \ mention_property_computer.is_apposition(attributes) attributes["grammatical_function"] = \ mention_property_computer.get_grammatical_function(attributes) (head, in_mention_span, head_index) = \ mention_property_computer.compute_head_information(attributes) attributes["head"] = head attributes["head_span"] = spans.Span( span.begin + in_mention_span.begin, span.begin + in_mention_span.end ) attributes["head_index"] = head_index attributes["type"] = mention_property_computer.get_type(attributes) attributes["fine_type"] = mention_property_computer.get_fine_type( attributes) if attributes["type"] == "PRO": attributes["citation_form"] = \ mention_property_computer.get_citation_form( attributes) attributes["number"] = \ mention_property_computer.compute_number(attributes) attributes["gender"] = \ mention_property_computer.compute_gender(attributes) attributes["semantic_class"] = \ mention_property_computer.compute_semantic_class(attributes) attributes["head_as_lowercase_string"] = " ".join(attributes[ "head"]).lower() attributes["tokens_as_lowercase_string"] = " ".join(attributes[ "tokens"]).lower() dep_tree = document.dep[i] index = span.begin + head_index - sentence_span.begin governor_id = dep_tree[index].head - 1 if governor_id == -1: attributes["governor"] = "NONE" else: attributes["governor"] = dep_tree[governor_id].form.lower() attributes["ancestry"] = Mention._get_ancestry(dep_tree, index) attributes["deprel"] = dep_tree[index].deprel return Mention(document, span, attributes)
def test_gender(self): self.assertEqual( "MALE", mention_property_computer.compute_gender({ "tokens": ["him"], "pos": ["PRP"], "type": "PRO", "citation_form": "he", "head_index": 0 })) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender({ "tokens": ["its"], "pos": ["PRP$"], "type": "PRO", "citation_form": "it", "head_index": 0 })) self.assertEqual( "FEMALE", mention_property_computer.compute_gender({ "tokens": ["Mrs.", "Robinson"], "pos": ["NNP", "NNP"], "type": "NAM", "head_index": 1 })) self.assertEqual( "MALE", mention_property_computer.compute_gender({ "tokens": ["Mr.", "FooBar"], "pos": ["NNP", "NNP"], "type": "NAM", "head_index": 1 })) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender({ "tokens": ["an", "arrow"], "head": ["arrow"], "pos": ["DT", "NN"], "type": "NOM", "ner": ["-", "-"], "head_index": 1 })) self.assertEqual( "FEMALE", mention_property_computer.compute_gender({ "tokens": ["the", "girl"], "head": ["girl"], "pos": ["DT", "NN"], "type": "NOM", "ner": ["-", "-"], "head_index": 1 })) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender({ "tokens": ["the", "shooting"], "head": ["shooting"], "pos": ["DT", "NN"], "type": "NOM", "ner": ["-", "-"], "head_index": 1 })) self.assertEqual( "MALE", mention_property_computer.compute_gender({ "tokens": ["the", "groom"], "head": ["groom"], "pos": ["DT", "NN"], "type": "NOM", "ner": ["-", "-"], "head_index": 1 })) self.assertEqual( "PLURAL", mention_property_computer.compute_gender({ "tokens": ["the", "guys"], "head": ["guys"], "pos": ["DT", "NNS"], "type": "NOM", "ner": ["-", "-"], "head_index": 1 })) self.assertEqual( "NEUTRAL", mention_property_computer.compute_gender({ "tokens": ["the", "Mona", "Lisa"], "head": ["Mona", "Lisa"], "pos": ["DT", "NNP", "NNP"], "type": "NAM", "ner": ["-", "WORK_OF_ART", "WORK_OF_ART"], "head_index": 2 }))