Beispiel #1
0
def edge2str(edge, namespaces=True):
    """Convert an edge to its string representation."""
    if sym.sym_type(edge) == sym.SymbolType.EDGE:
        return '(%s)' % nodes2str(edge, namespaces)
    else:
        if namespaces:
            return str(edge)
        else:
            return str(sym.root(edge))
Beispiel #2
0
def nodes2str(edge, namespaces=True):
    """Convert a collection of nodes to a string representation (no outer parenthesis)."""
    node_strings = []
    for node in edge:
        if sym.sym_type(node) == sym.SymbolType.EDGE:
            node_strings.append(edge2str(node, namespaces))
        else:
            if namespaces:
                node_strings.append(str(node))
            else:
                node_strings.append(str(sym.root(node)))
    return ' '.join(node_strings)
Beispiel #3
0
 def test_root(self):
     self.assertEqual(sym.root('graphbrain/1'), 'graphbrain')
     self.assertEqual(sym.root('graphbrain'), 'graphbrain')
     self.assertEqual(sym.root('http://graphbrain.org'), 'http://graphbrain.org')
     self.assertEqual(sym.root(1), 1)
     self.assertEqual(sym.root(1.), 1.)
Beispiel #4
0
 def test_root(self):
     self.assertEqual(sym.root('graphbrain/1'), 'graphbrain')
     self.assertEqual(sym.root('graphbrain'), 'graphbrain')
     self.assertEqual(sym.root('http://graphbrain.org'), 'http://graphbrain.org')
     self.assertEqual(sym.root(1), 1)
     self.assertEqual(sym.root(1.), 1.)
Beispiel #5
0
    def process_entity(self, entity_id, exclude):
        start = time.time()
        entity = self.output.tree.get(entity_id)

        # profiling
        prof_key = entity.as_text()
        self.profiling[prof_key] = {}
        self.profiling[prof_key]['candidates'] = 0
        self.profiling[prof_key]['words1'] = 0
        self.profiling[prof_key]['words2'] = 0

        roots = {sym.str2symbol(entity.as_text())}
        if entity.is_leaf():
            roots.add(sym.str2symbol(entity.token.lemma))
        else:
            words = entity.as_label_list()
            lemmas = entity.as_label_list(lemmas=True)
            lemma_at_end = ' '.join(words[:-1] + [lemmas[-1]])
            roots.add(sym.str2symbol(lemma_at_end))
        namespaces = None
        if force_wordnet(entity):
            namespaces = ('wn.', 'lem.wn.')

        if entity.is_leaf() and entity.token.pos in {'ADP', 'CONJ'}:
            disamb_ent = None
            metrics = CandidateMetrics()
        else:
            disamb_ent, metrics = self.disamb.best_sense(
                roots, self.aux_text, namespaces)
            # profiling
            self.profiling[prof_key]['candidates'] = self.disamb.candidates
            self.profiling[prof_key]['words1'] = self.disamb.words1
            self.profiling[prof_key]['words2'] = self.disamb.words2

        logging.info('[disamb] text: %s; entity: %s; metrics: %s' %
                     (entity.as_text(), disamb_ent, metrics))

        exclude = exclude[:]
        exclude.append(entity.as_text())

        make_entity = True
        if entity.is_node():
            for child_id in entity.children_ids:
                m = self.process_entity(child_id, exclude)
                if m.better_than(metrics):
                    make_entity = False
                    metrics = m

        if make_entity:
            if disamb_ent is None:
                entity.generate_namespace()
            else:
                if entity.as_text() == sym.root(disamb_ent):
                    entity.namespace = sym.nspace(disamb_ent)
                # entity with shared lemma
                else:
                    entity.namespace = '%s.%s' % (
                        const.lemma_derived_namespace, sym.nspace(disamb_ent))
                    # additional edge for shared lemma
                    self.output.edges.append(
                        (const.have_same_lemma, entity.to_hyperedge(),
                         disamb_ent))
            if entity.is_node():
                entity.compound = True
        elif entity.is_node():
            if self.is_compound(entity):
                entity.compound = True

        # profiling
        self.profiling[prof_key]['time'] = time.time() - start

        return metrics