Exemple #1
0
    def test_entity_graph_partition(self):
        annotated_mentions = \
            self.complicated_mention_document.annotated_mentions

        graph = data_structures.EntityGraph({
            annotated_mentions[4]:
            [annotated_mentions[2], annotated_mentions[0]],
            annotated_mentions[2]: [annotated_mentions[0]]
        })

        system_output = [
            mentions.Mention(self.complicated_mention_document,
                             spans.Span(0, 0), {"set_id": 0}),
            mentions.Mention(self.complicated_mention_document,
                             spans.Span(2, 3), {"set_id": 1}),
            mentions.Mention(self.complicated_mention_document,
                             spans.Span(6, 10), {"set_id": 0}),
            mentions.Mention(self.complicated_mention_document,
                             spans.Span(5, 5), {"set_id": 0})
        ]

        expected_edges = defaultdict(list)
        expected_edges[annotated_mentions[4]].append(annotated_mentions[0])
        expected = data_structures.EntityGraph(expected_edges)

        self.assertEqual(
            expected,
            graph.partition(
                data_structures.EntityGraph.from_mentions(
                    system_output, "set_id")))
    def test_post_process_embedded_head_largest_span(self):
        all_mentions_1 = {
            mentions.Mention(
                None, spans.Span(0, 3), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(3, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(2, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(1, 1)
                }),
            mentions.Mention(
                None, spans.Span(5, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(5, 6)
                })
        }

        expected_mentions_1 = sorted([
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(2, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(1, 1)
                }),
            mentions.Mention(
                None, spans.Span(5, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(5, 6)
                })
        ])

        self.assertEqual(
            expected_mentions_1,
            mention_extractor.post_process_embedded_head_largest_span(
                all_mentions_1))
    def setUp(self):
        self.gold_first_cluster = [
            mentions.Mention(None, spans.Span(0, 0), {
                "tokens": ["a"],
                "type": "NOM",
                "annotated_set_id": 0
            }),
            mentions.Mention(None, spans.Span(1, 1), {
                "tokens": ["US"],
                "type": "NAM",
                "annotated_set_id": 0
            }),
            mentions.Mention(
                None, spans.Span(2, 3), {
                    "tokens": ["angry", "salesman"],
                    "type": "PRO",
                    "annotated_set_id": 0
                }),
            mentions.Mention(None, spans.Span(4, 5), {
                "tokens": ["the", "rainbow"],
                "type": "NAM",
                "annotated_set_id": 0
            }),
            mentions.Mention(None, spans.Span(5, 6), {
                "tokens": ["and", "far"],
                "type": "NOM",
                "annotated_set_id": 0
            }),
            mentions.Mention(None, spans.Span(7, 7), {
                "tokens": ["neypmd"],
                "type": "NOM",
                "annotated_set_id": 0
            }),
        ]

        self.gold_second_cluster = [
            mentions.Mention(None, spans.Span(7, 8), {
                "type": "NOM",
                "annotated_set_id": 1
            }),
            mentions.Mention(None, spans.Span(9, 9), {
                "type": "NAM",
                "annotated_set_id": 1
            }),
            mentions.Mention(None, spans.Span(10, 10), {
                "type": "PRO",
                "annotated_set_id": 1
            }),
        ]

        self.system1_mentions = [
            mentions.Mention(None, spans.Span(0, 0), {"set_id": 0}),
            mentions.Mention(None, spans.Span(2, 3), {"set_id": 0}),
            mentions.Mention(None, spans.Span(4, 5), {"set_id": 2}),
            mentions.Mention(None, spans.Span(5, 6), {"set_id": 2}),
            mentions.Mention(None, spans.Span(3, 4), {"set_id": 1}),
            mentions.Mention(None, spans.Span(7, 8), {"set_id": 1}),
        ]

        self.system2_cluster = [
            mentions.Mention(None, spans.Span(0, 0), {
                "tokens": ["a"],
                "set_id": 0
            }),
            mentions.Mention(None, spans.Span(2, 3), {
                "tokens": ["angry", "salesman"],
                "set_id": 0
            }),
            mentions.Mention(None, spans.Span(7, 8), {
                "tokens": ["snafu", "foo"],
                "set_id": 0
            }),
            mentions.Mention(None, spans.Span(9, 9), {
                "tokens": ["bar"],
                "set_id": 0
            }),
        ]
        self.system2_cluster[1].attributes["antecedent"] = \
            self.system2_cluster[0]
        self.system2_cluster[2].attributes["antecedent"] = \
            self.system2_cluster[0]
        self.system2_cluster[3].attributes["antecedent"] = \
            self.system2_cluster[2]

        self.maxDiff = None
    def test_post_process_appositions(self):
        three_children_tree = nltk.ParentedTree.fromstring(
            "(NP (NP (NP (NP (DT The) (NNP ROC) (POS 's)) (NN ambassador)) "
            "(PP (IN to) (NP (NNP Nicaragua)))) (, ,) (NP (NNP Antonio) "
            "(NNP Tsai)) (, ,))")

        three_children_all_mentions = {
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [
                        "The", "ROC", "'s", "ambassador", "to", "Nicaragua",
                        ",", "Antonio", "Tsai"
                    ],
                    "is_apposition":
                    True,
                    "type":
                    "NAM",
                    "parse_tree":
                    three_children_tree
                }),
            mentions.Mention(
                None, spans.Span(0, 4), {
                    "tokens":
                    ["The", "ROC", "'s", "ambassador", "to", "Nicaragua"],
                    "is_apposition": False,
                    "type": "NOM",
                    "parse_tree": three_children_tree[0]
                }),
            mentions.Mention(
                None, spans.Span(0, 3), {
                    "tokens": ["The", "ROC", "'s", "ambassador"],
                    "is_apposition": False,
                    "type": "NOM",
                    "parse_tree": three_children_tree[0][0]
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": ["The", "ROC", "'s"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": three_children_tree[0][0][0]
                }),
            mentions.Mention(
                None, spans.Span(4, 4), {
                    "tokens": ["Nicaragua"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": three_children_tree[0][1][1]
                }),
            mentions.Mention(
                None, spans.Span(5, 6), {
                    "tokens": ["Antonio", "Tsai"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": three_children_tree[2]
                })
        }

        three_children_expected = sorted([
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [
                        "The", "ROC", "'s", "ambassador", "to", "Nicaragua",
                        ",", "Antonio", "Tsai"
                    ],
                    "is_apposition":
                    True,
                    "type":
                    "NAM",
                    "parse_tree":
                    three_children_tree
                }),
            mentions.Mention(
                None, spans.Span(0, 3), {
                    "tokens": ["The", "ROC", "'s", "ambassador"],
                    "is_apposition": False,
                    "type": "NOM",
                    "parse_tree": three_children_tree[0][0]
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": ["The", "ROC", "'s"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": three_children_tree[0][0][0]
                }),
            mentions.Mention(
                None, spans.Span(4, 4), {
                    "tokens": ["Nicaragua"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": three_children_tree[0][1][1]
                }),
        ])

        self.assertEqual(
            three_children_expected,
            mention_extractor.post_process_appositions(
                three_children_all_mentions))

        two_children_tree = nltk.ParentedTree.fromstring(
            "(NP (NP (NP (NNP Secretary)) (PP (IN of) (NP (NNP State)))) "
            "(NP (NNP Madeleine) (NNP Albright)))")

        two_children_all_mentions = {
            mentions.Mention(
                None, spans.Span(0, 4), {
                    "tokens":
                    ["Secretary", "of", "Sate", "Madeleine", "Albright"],
                    "is_apposition": True,
                    "type": "NAM",
                    "parse_tree": two_children_tree
                }),
            mentions.Mention(
                None, spans.Span(0, 0), {
                    "tokens": ["Secretary"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": two_children_tree[0][0]
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": ["Secretary", "of", "State"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": two_children_tree[0]
                }),
            mentions.Mention(
                None, spans.Span(2, 2), {
                    "tokens": ["State"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": two_children_tree[0][1][1]
                }),
            mentions.Mention(
                None, spans.Span(2, 2), {
                    "tokens": ["Madeleine", "Albright"],
                    "is_apposition": False,
                    "type": "NAM",
                    "parse_tree": two_children_tree[1]
                })
        }

        two_children_expected = sorted([
            mentions.Mention(
                None, spans.Span(0, 4), {
                    "tokens":
                    ["Secretary", "of", "Sate", "Madeleine", "Albright"],
                    "is_apposition": True,
                    "type": "NAM",
                    "parse_tree": two_children_tree
                })
        ])

        self.assertEqual(
            two_children_expected,
            mention_extractor.post_process_appositions(
                two_children_all_mentions))
    def test_post_process_same_head_largest_span(self):
        all_mentions = {
            mentions.Mention(
                None, spans.Span(0, 3), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(3, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(3, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(1, 1)
                }),
            mentions.Mention(
                None, spans.Span(5, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(5, 6)
                }),
            mentions.Mention(
                None, spans.Span(0, 0), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(0, 0)
                })
        }

        expected_mentions = sorted([
            mentions.Mention(
                None, spans.Span(0, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(3, 3)
                }),
            mentions.Mention(
                None, spans.Span(0, 2), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(1, 1)
                }),
            mentions.Mention(
                None, spans.Span(5, 6), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(5, 6)
                }),
            mentions.Mention(
                None, spans.Span(0, 0), {
                    "tokens": [],
                    "type": "NOM",
                    "head_index": 0,
                    "head_span": spans.Span(0, 0)
                })
        ])

        self.assertEqual(
            expected_mentions,
            mention_extractor.post_process_same_head_largest_span(
                all_mentions))

        all_mentions_2 = {
            mentions.Mention(
                None, spans.Span(0, 1), {
                    "tokens": ["Taiwan", "'s"],
                    "type": "NAM",
                    "head_index": 0,
                    "head_span": spans.Span(0, 0)
                }),
            mentions.Mention(
                None, spans.Span(0, 0), {
                    "tokens": ["Taiwan"],
                    "type": "NAM",
                    "head_index": 0,
                    "head_span": spans.Span(0, 0)
                }),
            mentions.Mention(
                None, spans.Span(2, 3), {
                    "tokens": ["the", "CCP"],
                    "type": "NAM",
                    "head_index": 1,
                    "head_span": spans.Span(3, 3)
                }),
            mentions.Mention(
                None, spans.Span(3, 3), {
                    "tokens": ["CCP"],
                    "type": "NAM",
                    "head_index": 0,
                    "head_span": spans.Span(3, 3)
                })
        }

        expected_mentions_2 = sorted([
            mentions.Mention(
                None, spans.Span(0, 1), {
                    "tokens": ["Taiwan", "'s"],
                    "type": "NAM",
                    "head_index": 0,
                    "head_span": spans.Span(0, 0)
                }),
            mentions.Mention(
                None, spans.Span(2, 3), {
                    "tokens": ["the", "CCP"],
                    "type": "NAM",
                    "head_index": 1,
                    "head_span": spans.Span(3, 3)
                }),
        ])

        self.assertEqual(
            expected_mentions_2,
            mention_extractor.post_process_same_head_largest_span(
                all_mentions_2))
Exemple #6
0
    def setUp(self):
        self.first_cluster = [
            mentions.Mention(
                None,
                spans.Span(0, 0),
                {"tokens": ["a"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(1, 1),
                {"tokens": ["b"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(2, 3),
                {"tokens": ["c", "d"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(4, 5),
                {"tokens": ["e", "f"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(5, 6),
                {"tokens": ["f", "g"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(7, 7),
                {"tokens": ["h"], "annotated_set_id": 0}),
        ]

        self.second_cluster = [
            mentions.Mention(
                None,
                spans.Span(3, 4),
                {"tokens": ["d", "e"], "annotated_set_id": 1}),

            mentions.Mention(
                None,
                spans.Span(7, 8),
                {"tokens": ["h", "i"], "annotated_set_id": 1}),

            mentions.Mention(
                None,
                spans.Span(10, 10),
                {"tokens": ["k"], "annotated_set_id": 1})
        ]

        self.system_cluster = [
            mentions.Mention(
                None,
                spans.Span(0, 0),
                {"tokens": ["a"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(2, 3),
                {"tokens": ["c", "d"], "annotated_set_id": 0}),

            mentions.Mention(
                None,
                spans.Span(4, 5),
                {"tokens": ["e", "f"], "annotated_set_id": 2}),

            mentions.Mention(
                None,
                spans.Span(5, 6),
                {"tokens": ["f", "g"], "annotated_set_id": 2}),

            mentions.Mention(
                None,
                spans.Span(7, 7),
                {"tokens": ["h"], "annotated_set_id": 1}),

            mentions.Mention(
                None,
                spans.Span(10, 10),
                {"tokens": ["k"], "annotated_set_id": 1})
        ]

        self.maxDiff = None