예제 #1
0
    def process_edge(self, edge, depth):
        hg = self.system.get_hg(self)

        uedge = unidecode_edge(edge)
        if uedge != edge and hg.exists(uedge):
            self.corefs += 1
            for op in make_corefs_ops(hg, edge, uedge):
                yield op
예제 #2
0
    def process_edge(self, edge, depth):
        hg = self.system.get_hg(self)

        if (not edge.is_atom() and len(edge) == 2 and edge[0].is_atom()
                and edge[0].root() == 'the' and has_proper_concept(edge[1])):
            self.corefs += 1
            for op in make_corefs_ops(hg, edge, edge[1]):
                yield op
예제 #3
0
    def on_end(self):
        hg = self.system.get_hg(self)

        i = 0
        self.logger.info('processing seeds')
        with progressbar.ProgressBar(max_value=len(self.seeds)) as bar:
            for seed in self.seeds:
                crefs = self.corefs_from_seed(seed)

                # check if the seed should be assigned to a synonym set
                if len(crefs) > 0:
                    # find set with the highest degree and normalize set
                    # degrees by total degree
                    cref_degs = [hg.sum_deep_degree(cref) for cref in crefs]
                    total_deg = sum(cref_degs)
                    if total_deg == 0:
                        continue
                    cref_ratios = [
                        cref_deg / total_deg for cref_deg in cref_degs
                    ]
                    max_ratio = 0.
                    best_pos = -1
                    for pos, ratio in enumerate(cref_ratios):
                        if ratio > max_ratio:
                            max_ratio = ratio
                            best_pos = pos

                    dd = hg.deep_degree(seed)

                    # ensure that the seed is used by itself
                    if total_deg < dd:
                        self.logger.debug('seed: {}'.format(seed))
                        self.logger.debug('crefs: {}'.format(crefs))
                        self.logger.debug('max_ratio: {}'.format(max_ratio))
                        self.logger.debug(
                            'total coref dd: {}'.format(total_deg))
                        self.logger.debug('seed dd: {}'.format(dd))

                        # add seed if coreference set is sufficiently dominant
                        if max_ratio >= .7:
                            crefs[best_pos].add(seed)
                            self.logger.debug('seed added to cref: {}'.format(
                                crefs[best_pos]))

                    for cref in crefs:
                        for edge1, edge2 in itertools.combinations(cref, 2):
                            self.logger.debug('are corefs: {} | {}'.format(
                                edge1.to_str(), edge2.to_str()))
                            self.corefs += 1
                            for op in make_corefs_ops(hg, edge1, edge2):
                                yield op

                i += 1
                bar.update(i)
예제 #4
0
    def process_edge(self, edge, depth):
        hg = self.system.get_hg(self)

        if edge.type()[0] == 'C' and edge not in self.done:
            self.done.add(edge)

            subs = tuple(subtypes(hg, edge))

            # check if the concept should be assigned to a synonym set
            if len(subs) > 0:
                # find set with the highest degree and normalize set
                # degrees by total degree
                sub_degs = [hg.deep_degree(sub) for sub in subs]
                total_deg = sum(sub_degs)
                total_deg = 1 if total_deg == 0 else total_deg
                sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs]
                max_ratio = 0.
                best_pos = -1
                for pos, ratio in enumerate(sub_ratios):
                    if ratio > max_ratio:
                        max_ratio = ratio
                        best_pos = pos

                # compute some degree-related metrics
                sdd = hg.deep_degree(subs[best_pos])
                dd = hg.deep_degree(edge)

                if dd > sdd:
                    sdd_dd = float(sdd) / float(dd)

                    self.logger.debug('concept: {}'.format(edge.to_str()))
                    self.logger.debug('subconcepts: {}'.format(subs))
                    self.logger.debug('# subs: {}'.format(len(subs)))
                    self.logger.debug('max_ratio: {}'.format(max_ratio))
                    self.logger.debug('sdd: {}'.format(sdd))
                    self.logger.debug('dd: {}'.format(dd))
                    self.logger.debug('sdd_dd: {}'.format(sdd_dd))

                    if max_ratio >= .7:  # and sdd_dd < .5:
                        edge1 = edge
                        edge2 = subs[best_pos]

                        self.logger.debug('are corefs: {} | {}'.format(
                            edge1.to_str(), edge2.to_str()))

                        self.corefs += 1
                        for op in make_corefs_ops(hg, edge1, edge2):
                            yield op
예제 #5
0
    def _make_singular_plural_relation(self, singular, plural):
        hg = self.system.get_hg(self)

        self.logger.debug('singular: {}; plural: {}'.format(singular, plural))

        for op in make_singular_plural_ops(hg, singular, plural):
            yield op
        self.sng_pl += 1

        for op in make_corefs_ops(hg, singular, plural):
            yield op
        self.corefs += 1

        for subtype in subtypes(hg, singular):
            plural_edge = subtype.replace_main_concept(plural)
            if plural_edge and hg.exists(plural_edge):
                for op in self._make_singular_plural_relation(subtype,
                                                              plural_edge):
                    yield op