def edge2str(edge, namespaces=True): """Convert an edge to its string representation.""" if sym.sym_type(edge) == sym.SymbolType.EDGE: return '(%s)' % nodes2str(edge, namespaces) else: if namespaces: return str(edge) else: return str(sym.root(edge))
def nodes2str(edge, namespaces=True): """Convert a collection of nodes to a string representation (no outer parenthesis).""" node_strings = [] for node in edge: if sym.sym_type(node) == sym.SymbolType.EDGE: node_strings.append(edge2str(node, namespaces)) else: if namespaces: node_strings.append(str(node)) else: node_strings.append(str(sym.root(node))) return ' '.join(node_strings)
def test_root(self): self.assertEqual(sym.root('graphbrain/1'), 'graphbrain') self.assertEqual(sym.root('graphbrain'), 'graphbrain') self.assertEqual(sym.root('http://graphbrain.org'), 'http://graphbrain.org') self.assertEqual(sym.root(1), 1) self.assertEqual(sym.root(1.), 1.)
def process_entity(self, entity_id, exclude): start = time.time() entity = self.output.tree.get(entity_id) # profiling prof_key = entity.as_text() self.profiling[prof_key] = {} self.profiling[prof_key]['candidates'] = 0 self.profiling[prof_key]['words1'] = 0 self.profiling[prof_key]['words2'] = 0 roots = {sym.str2symbol(entity.as_text())} if entity.is_leaf(): roots.add(sym.str2symbol(entity.token.lemma)) else: words = entity.as_label_list() lemmas = entity.as_label_list(lemmas=True) lemma_at_end = ' '.join(words[:-1] + [lemmas[-1]]) roots.add(sym.str2symbol(lemma_at_end)) namespaces = None if force_wordnet(entity): namespaces = ('wn.', 'lem.wn.') if entity.is_leaf() and entity.token.pos in {'ADP', 'CONJ'}: disamb_ent = None metrics = CandidateMetrics() else: disamb_ent, metrics = self.disamb.best_sense( roots, self.aux_text, namespaces) # profiling self.profiling[prof_key]['candidates'] = self.disamb.candidates self.profiling[prof_key]['words1'] = self.disamb.words1 self.profiling[prof_key]['words2'] = self.disamb.words2 logging.info('[disamb] text: %s; entity: %s; metrics: %s' % (entity.as_text(), disamb_ent, metrics)) exclude = exclude[:] exclude.append(entity.as_text()) make_entity = True if entity.is_node(): for child_id in entity.children_ids: m = self.process_entity(child_id, exclude) if m.better_than(metrics): make_entity = False metrics = m if make_entity: if disamb_ent is None: entity.generate_namespace() else: if entity.as_text() == sym.root(disamb_ent): entity.namespace = sym.nspace(disamb_ent) # entity with shared lemma else: entity.namespace = '%s.%s' % ( const.lemma_derived_namespace, sym.nspace(disamb_ent)) # additional edge for shared lemma self.output.edges.append( (const.have_same_lemma, entity.to_hyperedge(), disamb_ent)) if entity.is_node(): entity.compound = True elif entity.is_node(): if self.is_compound(entity): entity.compound = True # profiling self.profiling[prof_key]['time'] = time.time() - start return metrics