def test_number(self): self.assertEqual( "SINGULAR", mention_property_computer.compute_number( {"tokens": ["him"], "pos": ["PRP"], "type": "PRO", "citation_form": "he", "head_index": 0})) self.assertEqual( "SINGULAR", mention_property_computer.compute_number( {"tokens": ["the", "guy"], "pos": ["DT", "NN"], "type": "NOM", "head_index": 1})) self.assertEqual( "PLURAL", mention_property_computer.compute_number( {"tokens": ["they"], "pos": ["PRP"],"type": "PRO", "citation_form": "they", "head_index": 0})) self.assertEqual( "PLURAL", mention_property_computer.compute_number( {"tokens": ["these", "freaks"], "pos": ["DT", "NNS"], "type": "NOM", "head_index": 1})) self.assertEqual( "PLURAL", mention_property_computer.compute_number( {"tokens": ["he", "and", "she"], "pos": ["PRP", "CC", "PRP"], "type": "NOM", "head_index": 1}))
def test_number(self): self.assertEqual( "SINGULAR", mention_property_computer.compute_number({ "tokens": ["him"], "pos": ["PRP"], "type": "PRO", "citation_form": "he", "head_index": 0 })) self.assertEqual( "SINGULAR", mention_property_computer.compute_number({ "tokens": ["the", "guy"], "pos": ["DT", "NN"], "type": "NOM", "head_index": 1 })) self.assertEqual( "PLURAL", mention_property_computer.compute_number({ "tokens": ["they"], "pos": ["PRP"], "type": "PRO", "citation_form": "they", "head_index": 0 })) self.assertEqual( "PLURAL", mention_property_computer.compute_number({ "tokens": ["these", "freaks"], "pos": ["DT", "NNS"], "type": "NOM", "head_index": 1 })) self.assertEqual( "PLURAL", mention_property_computer.compute_number({ "tokens": ["he", "and", "she"], "pos": ["PRP", "CC", "PRP"], "type": "NOM", "head_index": 1 }))
def from_document(span, document, first_in_gold_entity=False): """ Create a mention from a span in a document. All attributes of the mention are computed from the linguistic information found in the document. For information about the attributes, see the class documentation. Args: document (CoNLLDocument): The document the mention belongs to. span (Span): The span of the mention in the document. Returns: Mention: A mention extracted from the input span in the input document. """ i, sentence_span = document.get_sentence_id_and_span(span) attributes = { "tokens": document.tokens[span.begin:span.end + 1], "pos": document.pos[span.begin:span.end + 1], "ner": document.ner[span.begin:span.end + 1], "sentence_id": i, "parse_tree": mention_property_computer.get_relevant_subtree( span, document), "speaker": document.speakers[span.begin], "antecedent": None, "set_id": None, "first_in_gold_entity": first_in_gold_entity } if span in document.coref: attributes["annotated_set_id"] = document.coref[span] else: attributes["annotated_set_id"] = None attributes["is_apposition"] = \ mention_property_computer.is_apposition(attributes) attributes["grammatical_function"] = \ mention_property_computer.get_grammatical_function(attributes) (head, in_mention_span, head_index) = \ mention_property_computer.compute_head_information(attributes) attributes["head"] = head attributes["head_span"] = spans.Span( span.begin + in_mention_span.begin, span.begin + in_mention_span.end ) attributes["head_index"] = head_index attributes["type"] = mention_property_computer.get_type(attributes) attributes["fine_type"] = mention_property_computer.get_fine_type( attributes) if attributes["type"] == "PRO": attributes["citation_form"] = \ mention_property_computer.get_citation_form( attributes) attributes["number"] = \ mention_property_computer.compute_number(attributes) attributes["gender"] = \ mention_property_computer.compute_gender(attributes) attributes["semantic_class"] = \ mention_property_computer.compute_semantic_class(attributes) attributes["head_as_lowercase_string"] = " ".join(attributes[ "head"]).lower() attributes["tokens_as_lowercase_string"] = " ".join(attributes[ "tokens"]).lower() dep_tree = document.dep[i] index = span.begin + head_index - sentence_span.begin governor_id = dep_tree[index].head - 1 if governor_id == -1: attributes["governor"] = "NONE" else: attributes["governor"] = dep_tree[governor_id].form.lower() attributes["ancestry"] = Mention._get_ancestry(dep_tree, index) attributes["deprel"] = dep_tree[index].deprel return Mention(document, span, attributes)