def input_edge(self, edge): if (not edge.is_atom() and len(edge) == 2 and edge[0].is_atom() and edge[0].root() == 'the' and has_proper_concept(edge[1])): make_corefs(self.hg, edge, edge[1]) self.corefs += 1
def test_connect_coref_sets(self): concepts = self.concepts # paris set make_corefs(self.hg, concepts[0], concepts[1]) make_corefs(self.hg, concepts[1], concepts[2]) # berlin set make_corefs(self.hg, concepts[3], concepts[4]) make_corefs(self.hg, concepts[4], concepts[5]) self.assertTrue(are_corefs(self.hg, concepts[0], concepts[1])) self.assertTrue(are_corefs(self.hg, concepts[0], concepts[2])) self.assertEqual(coref_id(self.hg, concepts[0]), coref_id(self.hg, concepts[1])) self.assertEqual(coref_id(self.hg, concepts[0]), coref_id(self.hg, concepts[2])) self.assertIsNotNone(coref_id(self.hg, concepts[0])) self.assertIsNotNone(coref_id(self.hg, concepts[1])) self.assertIsNotNone(coref_id(self.hg, concepts[2])) self.assertEqual(coref_set(self.hg, concepts[0]), {concepts[0], concepts[1], concepts[2]}) self.assertTrue(are_corefs(self.hg, concepts[3], concepts[4])) self.assertTrue(are_corefs(self.hg, concepts[3], concepts[5])) self.assertEqual(coref_id(self.hg, concepts[3]), coref_id(self.hg, concepts[4])) self.assertEqual(coref_id(self.hg, concepts[3]), coref_id(self.hg, concepts[5])) self.assertIsNotNone(coref_id(self.hg, concepts[3])) self.assertIsNotNone(coref_id(self.hg, concepts[4])) self.assertIsNotNone(coref_id(self.hg, concepts[5])) self.assertEqual(coref_set(self.hg, concepts[3]), {concepts[3], concepts[4], concepts[5]}) self.assertFalse(are_corefs(self.hg, concepts[0], concepts[4])) self.assertFalse(are_corefs(self.hg, concepts[1], concepts[5])) self.assertNotEqual(coref_id(self.hg, concepts[0]), coref_id(self.hg, concepts[4])) self.assertNotEqual(coref_id(self.hg, concepts[1]), coref_id(self.hg, concepts[5])) # connect both make_corefs(self.hg, concepts[0], concepts[5]) self.assertTrue(are_corefs(self.hg, concepts[0], concepts[4])) self.assertTrue(are_corefs(self.hg, concepts[1], concepts[5])) self.assertEqual(coref_id(self.hg, concepts[0]), coref_id(self.hg, concepts[4])) self.assertEqual(coref_id(self.hg, concepts[1]), coref_id(self.hg, concepts[5])) for concept in concepts: self.assertEqual(coref_set(self.hg, concept), set(concepts))
def test_make_2_corefs(self): concepts = self.concepts make_corefs(self.hg, concepts[0], concepts[1]) self.assertTrue(are_corefs(self.hg, concepts[0], concepts[1])) self.assertFalse(are_corefs(self.hg, concepts[0], concepts[2])) self.assertEqual(coref_id(self.hg, concepts[0]), coref_id(self.hg, concepts[1])) self.assertIsNotNone(coref_id(self.hg, concepts[0])) self.assertIsNone(coref_id(self.hg, concepts[2])) self.assertEqual(coref_set(self.hg, concepts[0]), {concepts[0], concepts[1]})
def test_main_coref(self): concepts = self.concepts self.assertEqual(main_coref(self.hg, concepts[0]), concepts[0]) self.assertEqual(main_coref(self.hg, concepts[1]), concepts[1]) self.assertEqual(main_coref(self.hg, concepts[2]), concepts[2]) make_corefs(self.hg, concepts[0], concepts[1]) make_corefs(self.hg, concepts[1], concepts[2]) self.assertEqual(main_coref(self.hg, concepts[0]), concepts[1]) self.assertEqual(main_coref(self.hg, concepts[1]), concepts[1]) self.assertEqual(main_coref(self.hg, concepts[2]), concepts[1])
def input_edge(self, edge): if edge.type()[0] == 'c': subs = tuple(subtypes(self.hg, edge)) # check if the concept should be assigned to a synonym set if len(subs) > 0: # find set with the highest degree and normalize set # degrees by total degree sub_degs = [self.hg.degree(sub) for sub in subs] total_deg = sum(sub_degs) total_deg = 1 if total_deg == 0 else total_deg sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs] max_ratio = 0. best_pos = -1 for pos, ratio in enumerate(sub_ratios): if ratio > max_ratio: max_ratio = ratio best_pos = pos # compute some degree-related metrics sdd = self.hg.deep_degree(subs[best_pos]) _, rdd = self.hg.root_degrees(edge) sub_to_root_dd = \ 0. if rdd == 0 else float(sdd) / float(rdd) d = self.hg.degree(edge) dd = self.hg.deep_degree(edge) r = float(d) / float(dd) ld, ldd = self.hg.lemma_degrees(edge) lr = float(ld) / float(ldd) # use metric to decide if (rdd > 5 and max_ratio >= .7 and r >= .05 and lr >= .05 and sub_to_root_dd >= .1 and (not edge.is_atom() or len(edge.root()) > 2)): make_corefs(self.hg, edge, subs[best_pos]) self.corefs += 1
def input_edge(self, edge): uedge = unidecode_edge(edge) if uedge != edge and self.hg.exists(uedge): make_corefs(self.hg, edge, uedge) self.corefs += 1