コード例 #1
0
    def end(self):
        i = 0
        print('processing seeds')
        with progressbar.ProgressBar(max_value=len(self.seeds)) as bar:
            for seed in self.seeds:
                crefs = self._corefs_from_seed(seed)

                # print(seed)
                # print(crefs)

                # check if the seed should be assigned to a synonym set
                if len(crefs) > 0:
                    # find set with the highest degree and normalize set
                    # degrees by total degree
                    cref_degs = [self.hg.sum_degree(cref) for cref in crefs]
                    total_deg = sum(cref_degs)
                    cref_ratios = [
                        cref_deg / total_deg for cref_deg in cref_degs
                    ]
                    max_ratio = 0.
                    best_pos = -1
                    for pos, ratio in enumerate(cref_ratios):
                        if ratio > max_ratio:
                            max_ratio = ratio
                            best_pos = pos

                    # compute some degree-related metrics
                    sdd = self.hg.sum_deep_degree(crefs[best_pos])
                    # print('sdd: {}'.format(sdd))
                    rd, rdd = self.hg.root_degrees(seed)
                    # print('rd: {}'.format(rd))
                    # print('rdd: {}'.format(rdd))
                    cref_to_root_dd = \
                        0. if rdd == 0 else float(sdd) / float(rdd)
                    d = self.hg.degree(seed)
                    dd = self.hg.deep_degree(seed)
                    r = float(d) / float(dd)
                    ld, ldd = lemma_degrees(self.hg, seed)
                    lr = float(ld) / float(ldd)

                    # print('max_ratio: {}'.format(max_ratio))
                    # print('r: {}'.format(r))
                    # print('lr: {}'.format(lr))
                    # print('cref_to_root_dd: {}'.format(cref_to_root_dd))

                    # use metric to decide
                    if (max_ratio >= .7 and r >= .05 and lr >= .05
                            and cref_to_root_dd >= .1
                            and (not seed.is_atom() or len(seed.root()) > 2)):

                        crefs[best_pos].add(seed)

                    for cref in crefs:
                        for edge1, edge2 in combinations(cref, 2):
                            make_corefs(self.hg, edge1, edge2)
                            self.corefs += 1
                i += 1
                bar.update(i)
コード例 #2
0
    def input_edge(self, edge):
        if edge.type()[0] == 'c':
            subs = tuple(subtypes(self.hg, edge))

            # check if the concept should be assigned to a synonym set
            if len(subs) > 0:
                # find set with the highest degree and normalize set
                # degrees by total degree
                sub_degs = [self.hg.degree(sub) for sub in subs]
                total_deg = sum(sub_degs)
                total_deg = 1 if total_deg == 0 else total_deg
                sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs]
                max_ratio = 0.
                best_pos = -1
                for pos, ratio in enumerate(sub_ratios):
                    if ratio > max_ratio:
                        max_ratio = ratio
                        best_pos = pos

                # compute some degree-related metrics
                sdd = self.hg.deep_degree(subs[best_pos])
                _, rdd = self.hg.root_degrees(edge)
                sub_to_root_dd = \
                    0. if rdd == 0 else float(sdd) / float(rdd)
                d = self.hg.degree(edge)
                dd = self.hg.deep_degree(edge)
                r = float(d) / float(dd)
                ld, ldd = lemma_degrees(self.hg, edge)
                lr = float(ld) / float(ldd)

                # use metric to decide
                if (rdd > 5 and max_ratio >= .7 and r >= .05 and lr >= .05
                        and sub_to_root_dd >= .1
                        and (not edge.is_atom() or len(edge.root()) > 2)):

                    make_corefs(self.hg, edge, subs[best_pos])
                    self.corefs += 1