def test_gender(self):
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender(
             {"tokens": ["him"], "pos": ["PRP"], "type": "PRO",
              "citation_form": "he", "head_index": 0}))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender(
             {"tokens": ["its"], "pos": ["PRP$"], "type": "PRO",
              "citation_form": "it", "head_index": 0}))
     self.assertEqual(
         "FEMALE",
         mention_property_computer.compute_gender(
             {"tokens": ["Mrs.", "Robinson"], "pos": ["NNP", "NNP"],
              "type": "NAM", "head_index": 1}))
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender(
             {"tokens": ["Mr.", "FooBar"], "pos": ["NNP", "NNP"],
              "type": "NAM", "head_index": 1}))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender(
             {"tokens": ["an", "arrow"], "head": ["arrow"],
              "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"],
              "head_index": 1}))
     self.assertEqual(
         "FEMALE",
         mention_property_computer.compute_gender(
             {"tokens": ["the", "girl"], "head": ["girl"],
              "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"],
              "head_index": 1}))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender(
             {"tokens": ["the", "shooting"], "head": ["shooting"],
              "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"],
              "head_index": 1}))
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender(
             {"tokens": ["the", "groom"], "head": ["groom"],
              "pos": ["DT", "NN"],"type": "NOM", "ner": ["-", "-"],
              "head_index": 1}))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_gender(
             {"tokens": ["the", "guys"], "head": ["guys"],
              "pos": ["DT", "NNS"],"type": "NOM", "ner": ["-", "-"],
              "head_index": 1}))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender(
             {"tokens": ["the", "Mona", "Lisa"], "head": ["Mona", "Lisa"],
              "pos": ["DT", "NNP", "NNP"],"type": "NAM",
              "ner": ["-", "WORK_OF_ART", "WORK_OF_ART"], "head_index": 2}))
Example #2
0
    def from_document(span, document, first_in_gold_entity=False):
        """
        Create a mention from a span in a document.

        All attributes of the mention are computed from the linguistic
        information found in the document. For information about the
        attributes, see the class documentation.

        Args:
            document (CoNLLDocument): The document the mention belongs to.
            span (Span): The span of the mention in the document.

        Returns:
            Mention: A mention extracted from the input span in the input
            document.
        """

        i, sentence_span = document.get_sentence_id_and_span(span)

        attributes = {
            "tokens": document.tokens[span.begin:span.end + 1],
            "pos": document.pos[span.begin:span.end + 1],
            "ner": document.ner[span.begin:span.end + 1],
            "sentence_id": i,
            "parse_tree": mention_property_computer.get_relevant_subtree(
                span, document),
            "speaker": document.speakers[span.begin],
            "antecedent": None,
            "set_id": None,
            "first_in_gold_entity": first_in_gold_entity
        }

        if span in document.coref:
            attributes["annotated_set_id"] = document.coref[span]
        else:
            attributes["annotated_set_id"] = None

        attributes["is_apposition"] = \
            mention_property_computer.is_apposition(attributes)

        attributes["grammatical_function"] = \
            mention_property_computer.get_grammatical_function(attributes)

        (head, in_mention_span, head_index) = \
            mention_property_computer.compute_head_information(attributes)

        attributes["head"] = head
        attributes["head_span"] = spans.Span(
            span.begin + in_mention_span.begin,
            span.begin + in_mention_span.end
        )
        attributes["head_index"] = head_index

        attributes["type"] = mention_property_computer.get_type(attributes)
        attributes["fine_type"] = mention_property_computer.get_fine_type(
            attributes)

        if attributes["type"] == "PRO":
            attributes["citation_form"] = \
                mention_property_computer.get_citation_form(
                    attributes)

        attributes["number"] = \
            mention_property_computer.compute_number(attributes)
        attributes["gender"] = \
            mention_property_computer.compute_gender(attributes)

        attributes["semantic_class"] = \
            mention_property_computer.compute_semantic_class(attributes)

        attributes["head_as_lowercase_string"] = " ".join(attributes[
            "head"]).lower()

        attributes["tokens_as_lowercase_string"] = " ".join(attributes[
            "tokens"]).lower()

        dep_tree = document.dep[i]

        index = span.begin + head_index - sentence_span.begin

        governor_id = dep_tree[index].head - 1

        if governor_id == -1:
            attributes["governor"] = "NONE"
        else:
            attributes["governor"] = dep_tree[governor_id].form.lower()

        attributes["ancestry"] = Mention._get_ancestry(dep_tree, index)

        attributes["deprel"] = dep_tree[index].deprel

        return Mention(document, span, attributes)
Example #3
0
 def test_gender(self):
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender({
             "tokens": ["him"],
             "pos": ["PRP"],
             "type": "PRO",
             "citation_form": "he",
             "head_index": 0
         }))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender({
             "tokens": ["its"],
             "pos": ["PRP$"],
             "type": "PRO",
             "citation_form": "it",
             "head_index": 0
         }))
     self.assertEqual(
         "FEMALE",
         mention_property_computer.compute_gender({
             "tokens": ["Mrs.", "Robinson"],
             "pos": ["NNP", "NNP"],
             "type":
             "NAM",
             "head_index":
             1
         }))
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender({
             "tokens": ["Mr.", "FooBar"],
             "pos": ["NNP", "NNP"],
             "type":
             "NAM",
             "head_index":
             1
         }))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender({
             "tokens": ["an", "arrow"],
             "head": ["arrow"],
             "pos": ["DT", "NN"],
             "type": "NOM",
             "ner": ["-", "-"],
             "head_index": 1
         }))
     self.assertEqual(
         "FEMALE",
         mention_property_computer.compute_gender({
             "tokens": ["the", "girl"],
             "head": ["girl"],
             "pos": ["DT", "NN"],
             "type": "NOM",
             "ner": ["-", "-"],
             "head_index": 1
         }))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender({
             "tokens": ["the", "shooting"],
             "head": ["shooting"],
             "pos": ["DT", "NN"],
             "type":
             "NOM",
             "ner": ["-", "-"],
             "head_index":
             1
         }))
     self.assertEqual(
         "MALE",
         mention_property_computer.compute_gender({
             "tokens": ["the", "groom"],
             "head": ["groom"],
             "pos": ["DT", "NN"],
             "type": "NOM",
             "ner": ["-", "-"],
             "head_index": 1
         }))
     self.assertEqual(
         "PLURAL",
         mention_property_computer.compute_gender({
             "tokens": ["the", "guys"],
             "head": ["guys"],
             "pos": ["DT", "NNS"],
             "type": "NOM",
             "ner": ["-", "-"],
             "head_index": 1
         }))
     self.assertEqual(
         "NEUTRAL",
         mention_property_computer.compute_gender({
             "tokens": ["the", "Mona", "Lisa"],
             "head": ["Mona", "Lisa"],
             "pos": ["DT", "NNP", "NNP"],
             "type":
             "NAM",
             "ner": ["-", "WORK_OF_ART", "WORK_OF_ART"],
             "head_index":
             2
         }))