def test_number(self):
     self.assertEqual(
         "SINGULAR",
         mention_property_computer.compute_number(
             {"tokens": ["him"], "pos": ["PRP"], "type": "PRO",
              "citation_form": "he", "head_index": 0}))
     self.assertEqual(
         "SINGULAR",
         mention_property_computer.compute_number(
             {"tokens": ["the", "guy"], "pos": ["DT", "NN"], "type": "NOM",
              "head_index": 1}))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number(
             {"tokens": ["they"], "pos": ["PRP"],"type": "PRO",
              "citation_form": "they", "head_index": 0}))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number(
             {"tokens": ["these", "freaks"], "pos": ["DT", "NNS"],
              "type": "NOM", "head_index": 1}))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number(
             {"tokens": ["he", "and", "she"], "pos": ["PRP", "CC", "PRP"],
              "type": "NOM", "head_index": 1}))
Esempio n. 2
0
 def test_number(self):
     self.assertEqual(
         "SINGULAR",
         mention_property_computer.compute_number({
             "tokens": ["him"],
             "pos": ["PRP"],
             "type": "PRO",
             "citation_form": "he",
             "head_index": 0
         }))
     self.assertEqual(
         "SINGULAR",
         mention_property_computer.compute_number({
             "tokens": ["the", "guy"],
             "pos": ["DT", "NN"],
             "type": "NOM",
             "head_index": 1
         }))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number({
             "tokens": ["they"],
             "pos": ["PRP"],
             "type": "PRO",
             "citation_form": "they",
             "head_index": 0
         }))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number({
             "tokens": ["these", "freaks"],
             "pos": ["DT", "NNS"],
             "type":
             "NOM",
             "head_index":
             1
         }))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_number({
             "tokens": ["he", "and", "she"],
             "pos": ["PRP", "CC", "PRP"],
             "type":
             "NOM",
             "head_index":
             1
         }))
Esempio n. 3
0
    def from_document(span, document, first_in_gold_entity=False):
        """
        Create a mention from a span in a document.

        All attributes of the mention are computed from the linguistic
        information found in the document. For information about the
        attributes, see the class documentation.

        Args:
            document (CoNLLDocument): The document the mention belongs to.
            span (Span): The span of the mention in the document.

        Returns:
            Mention: A mention extracted from the input span in the input
            document.
        """

        i, sentence_span = document.get_sentence_id_and_span(span)

        attributes = {
            "tokens": document.tokens[span.begin:span.end + 1],
            "pos": document.pos[span.begin:span.end + 1],
            "ner": document.ner[span.begin:span.end + 1],
            "sentence_id": i,
            "parse_tree": mention_property_computer.get_relevant_subtree(
                span, document),
            "speaker": document.speakers[span.begin],
            "antecedent": None,
            "set_id": None,
            "first_in_gold_entity": first_in_gold_entity
        }

        if span in document.coref:
            attributes["annotated_set_id"] = document.coref[span]
        else:
            attributes["annotated_set_id"] = None

        attributes["is_apposition"] = \
            mention_property_computer.is_apposition(attributes)

        attributes["grammatical_function"] = \
            mention_property_computer.get_grammatical_function(attributes)

        (head, in_mention_span, head_index) = \
            mention_property_computer.compute_head_information(attributes)

        attributes["head"] = head
        attributes["head_span"] = spans.Span(
            span.begin + in_mention_span.begin,
            span.begin + in_mention_span.end
        )
        attributes["head_index"] = head_index

        attributes["type"] = mention_property_computer.get_type(attributes)
        attributes["fine_type"] = mention_property_computer.get_fine_type(
            attributes)

        if attributes["type"] == "PRO":
            attributes["citation_form"] = \
                mention_property_computer.get_citation_form(
                    attributes)

        attributes["number"] = \
            mention_property_computer.compute_number(attributes)
        attributes["gender"] = \
            mention_property_computer.compute_gender(attributes)

        attributes["semantic_class"] = \
            mention_property_computer.compute_semantic_class(attributes)

        attributes["head_as_lowercase_string"] = " ".join(attributes[
            "head"]).lower()

        attributes["tokens_as_lowercase_string"] = " ".join(attributes[
            "tokens"]).lower()

        dep_tree = document.dep[i]

        index = span.begin + head_index - sentence_span.begin

        governor_id = dep_tree[index].head - 1

        if governor_id == -1:
            attributes["governor"] = "NONE"
        else:
            attributes["governor"] = dep_tree[governor_id].form.lower()

        attributes["ancestry"] = Mention._get_ancestry(dep_tree, index)

        attributes["deprel"] = dep_tree[index].deprel

        return Mention(document, span, attributes)