words2 = self.words_around_symbol(candidate) self.words2 += len(words1) cm = CandidateMetrics() cm.score = self.words_similarity(words1, words2, exclude) cm.degree = ksyn.degree(self.hg, candidate) logging.info('%s %s' % (candidate, cm)) if cm.better_than(best_cm): best_cm = cm best = candidate self.best_sense_t += time.time() - start return best, best_cm if __name__ == '__main__': hgr = hyperg.HyperGraph({ 'backend': 'leveldb', 'hg': 'wordnet_wikidata.hg' }) p = par.Parser() d = Disambiguation(hgr, p) r1 = ['stocks', 'stock'] text1 = "Chinese stocks end year with double-digit losses" r2 = ['cambridge'] text2 = "Cambridge near Boston in the United States." text3 = "Cambridge near London in England." print(d.best_sense(r2, text2))
# GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with GraphBrain. If not, see <http://www.gnu.org/licenses/>. import operator import numpy as np from sklearn.cluster import DBSCAN import gb.tools.json as json_tools import gb.hypergraph.edge as ed import gb.nlp.parser as par from gb.explore.similarity import edge_similarity if __name__ == '__main__': print('creating parser...') par = par.Parser() print('parser created.') edge_data = json_tools.read('edges_similar_concepts.json') extra_edges = {} for item in edge_data: edge = ed.str2edge(item['edge']) matched = [ed.str2edge(match[1]) for match in item['matches']] for part in edge[1:]: if part not in matched: key = ed.edge2str(part) if key in extra_edges: extra_edges[key] += 1 else: extra_edges[key] = 1
def headlines_inference(hg, predicates_file): parser = par.Parser() Headlines(hg, parser, predicates_file).process()
self.entities[entity]['total_conflict_from'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_to'] + self.entities[entity]['conflict_from']) self.entities[entity]['h_conflict'] = h self.entities[entity]['total_conflict'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_over']) self.entities[entity]['h_conflict_over'] = h self.entities[entity]['total_conflict_over'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_for']) self.entities[entity]['h_conflict_for'] = h self.entities[entity]['total_conflict_for'] = total self.write_metrics(entity) i += 1 bar.update(i) def process(self): self.find_actors() self.infer() self.compute_metrics() if __name__ == '__main__': hgr = HyperGraph({'backend': 'leveldb', 'hg': 'infer.hg'}) parse = par.Parser() Headlines(hgr, parse, 'predicate_patterns.csv').process()
def generate(hg): print('starting parser...') parser = par.Parser() mer = Meronomy(hg, parser) print('reading edges...') total_edges = 0 total_beliefs = 0 total_verts = hg.symbol_count() + hg.edge_count() i = 0 with progressbar.ProgressBar(max_value=total_verts) as bar: for vertex in hg.all(): if sym.is_edge(vertex): edge = vertex total_edges += 1 if hg.is_belief(edge): mer.add_edge(edge) total_beliefs += 1 i += 1 if (i % 1000) == 0: bar.update(i) print('edges: %s; beliefs: %s' % (total_edges, total_beliefs)) print('post assignments...') i = 0 with progressbar.ProgressBar(max_value=total_verts) as bar: for vertex in hg.all(): if sym.is_edge(vertex): edge = vertex if hg.is_belief(edge): mer.post_assignments(edge) i += 1 if (i % 1000) == 0: bar.update(i) print('generating meronomy graph...') mer.generate() print('normalizing meronomy graph...') mer.normalize_graph() print('generating synonyms...') mer.generate_synonyms() print('writing synonyms...') i = 0 with progressbar.ProgressBar(max_value=len(mer.synonym_sets)) as bar: for syn_id in mer.synonym_sets: edges = set() for atom in mer.synonym_sets[syn_id]: if atom in mer.edge_map: edges |= mer.edge_map[atom] best_count = -1 best_label_edge = None for edge in edges: if mer.edge_counts[edge] > best_count: best_count = mer.edge_counts[edge] best_label_edge = edge label = hg.get_label(best_label_edge) syn_symbol = sym.build(label, 'syn%s' % syn_id) for edge in edges: syn_edge = (cons.are_synonyms, edge, syn_symbol) hg.add(syn_edge) label_symbol = sym.build(label, cons.label_namespace) label_edge = (cons.has_label, syn_symbol, label_symbol) hg.add(label_edge) i += 1 if i % 1000 == 0: bar.update(i) bar.update(i) print('%s synonym sets created' % len(mer.synonym_sets)) print('done.')