def get_noun_chains(doc): try: rels = doc.relations except ValueError: return None chains = collect_chains(rels) chains = [get_noun_chain(chain) for chain in chains] return [chain for chain in chains if chain.entities]
def test_2_chains(self): rels = { _create_rel(0, 1), _create_rel(1, 2), _create_rel(3, 4), } got_chains = collect_chains(rels) expected_chains = [ _create_chain([0, 1, 2]), _create_chain([3, 4]), ] self.assertEqual(got_chains, expected_chains)
def apply(self, pairs, rels): if rels is None: return DefaultCandidateMaker().apply(pairs, rels) chains = collect_chains(rels) ret = [] for e1, e2 in pairs: if any(map(lambda x: e1 in x.entities and e2 in x.entities, chains)): ret.append((e1, e2, self.label)) else: ret.append((e1, e2, None)) return ret
def chain_similar_entities( doc: Document, entities: List[Entity], entity_comparator: Callable[[Document, Entity, Entity], bool] = compare_entities_by_tokens) \ -> List[CoreferenceChain]: relations = set() for i, e1 in enumerate(entities): for e2 in entities[:i]: if entity_comparator(doc, e1, e2): relations.add(Relation(e1, e2, "match")) return collect_chains(relations, entities)
def test_many_entities(self): rels = { _create_rel(0, 1), _create_rel(1, 2), _create_rel(2, 1), _create_rel(2, 0), _create_rel(6, 2), } got_chains = collect_chains(rels) expected_chains = [ _create_chain([0, 1, 2, 6]), ] self.assertEqual(got_chains, expected_chains)
def test_chain_common_mention(self): rels = { _create_rel(0, 1), _create_rel(1, 2), _create_rel(3, 4), _create_rel(4, 5), _create_rel(2, 5), } got_chains = collect_chains(rels) expected_chains = [ _create_chain([0, 1, 2, 3, 4, 5]), ] self.assertEqual(got_chains, expected_chains)
def test_1_chain(self): entites = [ _create_entity(0), _create_entity(1), _create_entity(2), _create_entity(3) ] rels = { _create_rel(0, 1), _create_rel(1, 2), } got_chains = collect_chains(rels, entites) expected_chains = [_create_chain([0, 1, 2]), _create_chain([3])] self.assertEqual(got_chains, expected_chains)
def serialize_doc(self, doc: Document, fp: IO): fp.write('#begin document (' + doc.name + '); \n') chains = collect_chains(doc.relations, doc.entities) group_positions = self._get_group_positions(chains) for sentence in doc.sentences: for i, token in enumerate( doc.tokens[sentence.start_token:sentence.end_token]): idx = i + sentence.start_token group_info = self._get_group_info(idx, group_positions) fp.write('\t'.join( [doc.name, str(idx), str(i), token, group_info]) + '\n') fp.write('\n') fp.write('#end document')
def test_3_chains(self): rels = { _create_rel(0, 1), _create_rel(1, 2), _create_rel(3, 4), _create_rel(2, 5), _create_rel(6, 3), _create_rel(10, 11), _create_rel(20, 10), } got_chains = collect_chains(rels) expected_chains = [ _create_chain([0, 1, 2, 5]), _create_chain([3, 4, 6]), _create_chain([10, 11, 20]), ] self.assertEqual(got_chains, expected_chains)
def test_chain_multiple_common_mention(self): rels = { _create_rel(0, 1), _create_rel(1, 2), _create_rel(3, 4), _create_rel(4, 5), _create_rel(2, 5), _create_rel(10, 20), _create_rel(20, 30), _create_rel(100, 200), _create_rel(40, 50), _create_rel(50, 60), _create_rel(60, 70), _create_rel(70, 30), } got_chains = collect_chains(rels) expected_chains = [ _create_chain([0, 1, 2, 3, 4, 5]), _create_chain([10, 20, 30, 40, 50, 60, 70]), _create_chain([100, 200]), ] self.assertEqual(got_chains, expected_chains)
def _convert_to_rel_chains(rels, entities): chains = collect_chains(rels, entities) return chains2rels(chains)
def test_no_chains(self): rels = set() got_chains = collect_chains(rels) expected_chains = [] self.assertEqual(got_chains, expected_chains)