def test_entity_graph_partition(self): annotated_mentions = \ self.complicated_mention_document.annotated_mentions graph = data_structures.EntityGraph({ annotated_mentions[4]: [annotated_mentions[2], annotated_mentions[0]], annotated_mentions[2]: [annotated_mentions[0]] }) system_output = [ mentions.Mention(self.complicated_mention_document, spans.Span(0, 0), {"set_id": 0}), mentions.Mention(self.complicated_mention_document, spans.Span(2, 3), {"set_id": 1}), mentions.Mention(self.complicated_mention_document, spans.Span(6, 10), {"set_id": 0}), mentions.Mention(self.complicated_mention_document, spans.Span(5, 5), {"set_id": 0}) ] expected_edges = defaultdict(list) expected_edges[annotated_mentions[4]].append(annotated_mentions[0]) expected = data_structures.EntityGraph(expected_edges) self.assertEqual( expected, graph.partition( data_structures.EntityGraph.from_mentions( system_output, "set_id")))
def test_post_process_embedded_head_largest_span(self): all_mentions_1 = { mentions.Mention( None, spans.Span(0, 3), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(3, 3) }), mentions.Mention( None, spans.Span(0, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(2, 3) }), mentions.Mention( None, spans.Span(0, 2), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(1, 1) }), mentions.Mention( None, spans.Span(5, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(5, 6) }) } expected_mentions_1 = sorted([ mentions.Mention( None, spans.Span(0, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(2, 3) }), mentions.Mention( None, spans.Span(0, 2), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(1, 1) }), mentions.Mention( None, spans.Span(5, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(5, 6) }) ]) self.assertEqual( expected_mentions_1, mention_extractor.post_process_embedded_head_largest_span( all_mentions_1))
def setUp(self): self.gold_first_cluster = [ mentions.Mention(None, spans.Span(0, 0), { "tokens": ["a"], "type": "NOM", "annotated_set_id": 0 }), mentions.Mention(None, spans.Span(1, 1), { "tokens": ["US"], "type": "NAM", "annotated_set_id": 0 }), mentions.Mention( None, spans.Span(2, 3), { "tokens": ["angry", "salesman"], "type": "PRO", "annotated_set_id": 0 }), mentions.Mention(None, spans.Span(4, 5), { "tokens": ["the", "rainbow"], "type": "NAM", "annotated_set_id": 0 }), mentions.Mention(None, spans.Span(5, 6), { "tokens": ["and", "far"], "type": "NOM", "annotated_set_id": 0 }), mentions.Mention(None, spans.Span(7, 7), { "tokens": ["neypmd"], "type": "NOM", "annotated_set_id": 0 }), ] self.gold_second_cluster = [ mentions.Mention(None, spans.Span(7, 8), { "type": "NOM", "annotated_set_id": 1 }), mentions.Mention(None, spans.Span(9, 9), { "type": "NAM", "annotated_set_id": 1 }), mentions.Mention(None, spans.Span(10, 10), { "type": "PRO", "annotated_set_id": 1 }), ] self.system1_mentions = [ mentions.Mention(None, spans.Span(0, 0), {"set_id": 0}), mentions.Mention(None, spans.Span(2, 3), {"set_id": 0}), mentions.Mention(None, spans.Span(4, 5), {"set_id": 2}), mentions.Mention(None, spans.Span(5, 6), {"set_id": 2}), mentions.Mention(None, spans.Span(3, 4), {"set_id": 1}), mentions.Mention(None, spans.Span(7, 8), {"set_id": 1}), ] self.system2_cluster = [ mentions.Mention(None, spans.Span(0, 0), { "tokens": ["a"], "set_id": 0 }), mentions.Mention(None, spans.Span(2, 3), { "tokens": ["angry", "salesman"], "set_id": 0 }), mentions.Mention(None, spans.Span(7, 8), { "tokens": ["snafu", "foo"], "set_id": 0 }), mentions.Mention(None, spans.Span(9, 9), { "tokens": ["bar"], "set_id": 0 }), ] self.system2_cluster[1].attributes["antecedent"] = \ self.system2_cluster[0] self.system2_cluster[2].attributes["antecedent"] = \ self.system2_cluster[0] self.system2_cluster[3].attributes["antecedent"] = \ self.system2_cluster[2] self.maxDiff = None
def test_post_process_appositions(self): three_children_tree = nltk.ParentedTree.fromstring( "(NP (NP (NP (NP (DT The) (NNP ROC) (POS 's)) (NN ambassador)) " "(PP (IN to) (NP (NNP Nicaragua)))) (, ,) (NP (NNP Antonio) " "(NNP Tsai)) (, ,))") three_children_all_mentions = { mentions.Mention( None, spans.Span(0, 6), { "tokens": [ "The", "ROC", "'s", "ambassador", "to", "Nicaragua", ",", "Antonio", "Tsai" ], "is_apposition": True, "type": "NAM", "parse_tree": three_children_tree }), mentions.Mention( None, spans.Span(0, 4), { "tokens": ["The", "ROC", "'s", "ambassador", "to", "Nicaragua"], "is_apposition": False, "type": "NOM", "parse_tree": three_children_tree[0] }), mentions.Mention( None, spans.Span(0, 3), { "tokens": ["The", "ROC", "'s", "ambassador"], "is_apposition": False, "type": "NOM", "parse_tree": three_children_tree[0][0] }), mentions.Mention( None, spans.Span(0, 2), { "tokens": ["The", "ROC", "'s"], "is_apposition": False, "type": "NAM", "parse_tree": three_children_tree[0][0][0] }), mentions.Mention( None, spans.Span(4, 4), { "tokens": ["Nicaragua"], "is_apposition": False, "type": "NAM", "parse_tree": three_children_tree[0][1][1] }), mentions.Mention( None, spans.Span(5, 6), { "tokens": ["Antonio", "Tsai"], "is_apposition": False, "type": "NAM", "parse_tree": three_children_tree[2] }) } three_children_expected = sorted([ mentions.Mention( None, spans.Span(0, 6), { "tokens": [ "The", "ROC", "'s", "ambassador", "to", "Nicaragua", ",", "Antonio", "Tsai" ], "is_apposition": True, "type": "NAM", "parse_tree": three_children_tree }), mentions.Mention( None, spans.Span(0, 3), { "tokens": ["The", "ROC", "'s", "ambassador"], "is_apposition": False, "type": "NOM", "parse_tree": three_children_tree[0][0] }), mentions.Mention( None, spans.Span(0, 2), { "tokens": ["The", "ROC", "'s"], "is_apposition": False, "type": "NAM", "parse_tree": three_children_tree[0][0][0] }), mentions.Mention( None, spans.Span(4, 4), { "tokens": ["Nicaragua"], "is_apposition": False, "type": "NAM", "parse_tree": three_children_tree[0][1][1] }), ]) self.assertEqual( three_children_expected, mention_extractor.post_process_appositions( three_children_all_mentions)) two_children_tree = nltk.ParentedTree.fromstring( "(NP (NP (NP (NNP Secretary)) (PP (IN of) (NP (NNP State)))) " "(NP (NNP Madeleine) (NNP Albright)))") two_children_all_mentions = { mentions.Mention( None, spans.Span(0, 4), { "tokens": ["Secretary", "of", "Sate", "Madeleine", "Albright"], "is_apposition": True, "type": "NAM", "parse_tree": two_children_tree }), mentions.Mention( None, spans.Span(0, 0), { "tokens": ["Secretary"], "is_apposition": False, "type": "NAM", "parse_tree": two_children_tree[0][0] }), mentions.Mention( None, spans.Span(0, 2), { "tokens": ["Secretary", "of", "State"], "is_apposition": False, "type": "NAM", "parse_tree": two_children_tree[0] }), mentions.Mention( None, spans.Span(2, 2), { "tokens": ["State"], "is_apposition": False, "type": "NAM", "parse_tree": two_children_tree[0][1][1] }), mentions.Mention( None, spans.Span(2, 2), { "tokens": ["Madeleine", "Albright"], "is_apposition": False, "type": "NAM", "parse_tree": two_children_tree[1] }) } two_children_expected = sorted([ mentions.Mention( None, spans.Span(0, 4), { "tokens": ["Secretary", "of", "Sate", "Madeleine", "Albright"], "is_apposition": True, "type": "NAM", "parse_tree": two_children_tree }) ]) self.assertEqual( two_children_expected, mention_extractor.post_process_appositions( two_children_all_mentions))
def test_post_process_same_head_largest_span(self): all_mentions = { mentions.Mention( None, spans.Span(0, 3), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(3, 3) }), mentions.Mention( None, spans.Span(0, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(3, 3) }), mentions.Mention( None, spans.Span(0, 2), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(1, 1) }), mentions.Mention( None, spans.Span(5, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(5, 6) }), mentions.Mention( None, spans.Span(0, 0), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(0, 0) }) } expected_mentions = sorted([ mentions.Mention( None, spans.Span(0, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(3, 3) }), mentions.Mention( None, spans.Span(0, 2), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(1, 1) }), mentions.Mention( None, spans.Span(5, 6), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(5, 6) }), mentions.Mention( None, spans.Span(0, 0), { "tokens": [], "type": "NOM", "head_index": 0, "head_span": spans.Span(0, 0) }) ]) self.assertEqual( expected_mentions, mention_extractor.post_process_same_head_largest_span( all_mentions)) all_mentions_2 = { mentions.Mention( None, spans.Span(0, 1), { "tokens": ["Taiwan", "'s"], "type": "NAM", "head_index": 0, "head_span": spans.Span(0, 0) }), mentions.Mention( None, spans.Span(0, 0), { "tokens": ["Taiwan"], "type": "NAM", "head_index": 0, "head_span": spans.Span(0, 0) }), mentions.Mention( None, spans.Span(2, 3), { "tokens": ["the", "CCP"], "type": "NAM", "head_index": 1, "head_span": spans.Span(3, 3) }), mentions.Mention( None, spans.Span(3, 3), { "tokens": ["CCP"], "type": "NAM", "head_index": 0, "head_span": spans.Span(3, 3) }) } expected_mentions_2 = sorted([ mentions.Mention( None, spans.Span(0, 1), { "tokens": ["Taiwan", "'s"], "type": "NAM", "head_index": 0, "head_span": spans.Span(0, 0) }), mentions.Mention( None, spans.Span(2, 3), { "tokens": ["the", "CCP"], "type": "NAM", "head_index": 1, "head_span": spans.Span(3, 3) }), ]) self.assertEqual( expected_mentions_2, mention_extractor.post_process_same_head_largest_span( all_mentions_2))
def setUp(self): self.first_cluster = [ mentions.Mention( None, spans.Span(0, 0), {"tokens": ["a"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(1, 1), {"tokens": ["b"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(2, 3), {"tokens": ["c", "d"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(4, 5), {"tokens": ["e", "f"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(5, 6), {"tokens": ["f", "g"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(7, 7), {"tokens": ["h"], "annotated_set_id": 0}), ] self.second_cluster = [ mentions.Mention( None, spans.Span(3, 4), {"tokens": ["d", "e"], "annotated_set_id": 1}), mentions.Mention( None, spans.Span(7, 8), {"tokens": ["h", "i"], "annotated_set_id": 1}), mentions.Mention( None, spans.Span(10, 10), {"tokens": ["k"], "annotated_set_id": 1}) ] self.system_cluster = [ mentions.Mention( None, spans.Span(0, 0), {"tokens": ["a"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(2, 3), {"tokens": ["c", "d"], "annotated_set_id": 0}), mentions.Mention( None, spans.Span(4, 5), {"tokens": ["e", "f"], "annotated_set_id": 2}), mentions.Mention( None, spans.Span(5, 6), {"tokens": ["f", "g"], "annotated_set_id": 2}), mentions.Mention( None, spans.Span(7, 7), {"tokens": ["h"], "annotated_set_id": 1}), mentions.Mention( None, spans.Span(10, 10), {"tokens": ["k"], "annotated_set_id": 1}) ] self.maxDiff = None