def end(self): i = 0 print('processing seeds') with progressbar.ProgressBar(max_value=len(self.seeds)) as bar: for seed in self.seeds: crefs = self._corefs_from_seed(seed) # print(seed) # print(crefs) # check if the seed should be assigned to a synonym set if len(crefs) > 0: # find set with the highest degree and normalize set # degrees by total degree cref_degs = [self.hg.sum_degree(cref) for cref in crefs] total_deg = sum(cref_degs) cref_ratios = [ cref_deg / total_deg for cref_deg in cref_degs ] max_ratio = 0. best_pos = -1 for pos, ratio in enumerate(cref_ratios): if ratio > max_ratio: max_ratio = ratio best_pos = pos # compute some degree-related metrics sdd = self.hg.sum_deep_degree(crefs[best_pos]) # print('sdd: {}'.format(sdd)) rd, rdd = self.hg.root_degrees(seed) # print('rd: {}'.format(rd)) # print('rdd: {}'.format(rdd)) cref_to_root_dd = \ 0. if rdd == 0 else float(sdd) / float(rdd) d = self.hg.degree(seed) dd = self.hg.deep_degree(seed) r = float(d) / float(dd) ld, ldd = lemma_degrees(self.hg, seed) lr = float(ld) / float(ldd) # print('max_ratio: {}'.format(max_ratio)) # print('r: {}'.format(r)) # print('lr: {}'.format(lr)) # print('cref_to_root_dd: {}'.format(cref_to_root_dd)) # use metric to decide if (max_ratio >= .7 and r >= .05 and lr >= .05 and cref_to_root_dd >= .1 and (not seed.is_atom() or len(seed.root()) > 2)): crefs[best_pos].add(seed) for cref in crefs: for edge1, edge2 in combinations(cref, 2): make_corefs(self.hg, edge1, edge2) self.corefs += 1 i += 1 bar.update(i)
def input_edge(self, edge): if edge.type()[0] == 'c': subs = tuple(subtypes(self.hg, edge)) # check if the concept should be assigned to a synonym set if len(subs) > 0: # find set with the highest degree and normalize set # degrees by total degree sub_degs = [self.hg.degree(sub) for sub in subs] total_deg = sum(sub_degs) total_deg = 1 if total_deg == 0 else total_deg sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs] max_ratio = 0. best_pos = -1 for pos, ratio in enumerate(sub_ratios): if ratio > max_ratio: max_ratio = ratio best_pos = pos # compute some degree-related metrics sdd = self.hg.deep_degree(subs[best_pos]) _, rdd = self.hg.root_degrees(edge) sub_to_root_dd = \ 0. if rdd == 0 else float(sdd) / float(rdd) d = self.hg.degree(edge) dd = self.hg.deep_degree(edge) r = float(d) / float(dd) ld, ldd = lemma_degrees(self.hg, edge) lr = float(ld) / float(ldd) # use metric to decide if (rdd > 5 and max_ratio >= .7 and r >= .05 and lr >= .05 and sub_to_root_dd >= .1 and (not edge.is_atom() or len(edge.root()) > 2)): make_corefs(self.hg, edge, subs[best_pos]) self.corefs += 1