def to_hyperedge(self, with_namespaces=True): if not with_namespaces: s = sym.str2symbol(self.token.word) else: s = sym.build(self.token.word, self.namespace) if self.connector: s = '+%s' % s return s
def to_hyperedge(self, with_namespaces=True): if self.compound: words = [leaf.token.word for leaf in self.natural_leaf_sequence()] if not with_namespaces: s = sym.str2symbol('_'.join(words)) else: if not self.namespace: self.generate_namespace() s = sym.build('_'.join(words), self.namespace) return s else: return tuple([ child.to_hyperedge(with_namespaces=with_namespaces) for child in self.children() ])
def command_search(self, params): term = ' '.join(params) root = sym.str2symbol(term) symbols = self.hg.symbols_with_root(root) [print(str(symbol)) for symbol in symbols]
def process_entity(self, entity_id, exclude): start = time.time() entity = self.output.tree.get(entity_id) # profiling prof_key = entity.as_text() self.profiling[prof_key] = {} self.profiling[prof_key]['candidates'] = 0 self.profiling[prof_key]['words1'] = 0 self.profiling[prof_key]['words2'] = 0 roots = {sym.str2symbol(entity.as_text())} if entity.is_leaf(): roots.add(sym.str2symbol(entity.token.lemma)) else: words = entity.as_label_list() lemmas = entity.as_label_list(lemmas=True) lemma_at_end = ' '.join(words[:-1] + [lemmas[-1]]) roots.add(sym.str2symbol(lemma_at_end)) namespaces = None if force_wordnet(entity): namespaces = ('wn.', 'lem.wn.') if entity.is_leaf() and entity.token.pos in {'ADP', 'CONJ'}: disamb_ent = None metrics = CandidateMetrics() else: disamb_ent, metrics = self.disamb.best_sense( roots, self.aux_text, namespaces) # profiling self.profiling[prof_key]['candidates'] = self.disamb.candidates self.profiling[prof_key]['words1'] = self.disamb.words1 self.profiling[prof_key]['words2'] = self.disamb.words2 logging.info('[disamb] text: %s; entity: %s; metrics: %s' % (entity.as_text(), disamb_ent, metrics)) exclude = exclude[:] exclude.append(entity.as_text()) make_entity = True if entity.is_node(): for child_id in entity.children_ids: m = self.process_entity(child_id, exclude) if m.better_than(metrics): make_entity = False metrics = m if make_entity: if disamb_ent is None: entity.generate_namespace() else: if entity.as_text() == sym.root(disamb_ent): entity.namespace = sym.nspace(disamb_ent) # entity with shared lemma else: entity.namespace = '%s.%s' % ( const.lemma_derived_namespace, sym.nspace(disamb_ent)) # additional edge for shared lemma self.output.edges.append( (const.have_same_lemma, entity.to_hyperedge(), disamb_ent)) if entity.is_node(): entity.compound = True elif entity.is_node(): if self.is_compound(entity): entity.compound = True # profiling self.profiling[prof_key]['time'] = time.time() - start return metrics
def add(self, wikidata_id, label): cur = self.grab_cursor() symbol = sym.str2symbol(label) cur.execute('INSERT INTO entities (wikidata_id, symbol) VALUES (?, ?)', (wikidata_id, symbol)) self.release_cursor()