예제 #1
0
                words2 = self.words_around_symbol(candidate)
                self.words2 += len(words1)
                cm = CandidateMetrics()
                cm.score = self.words_similarity(words1, words2, exclude)
                cm.degree = ksyn.degree(self.hg, candidate)
                logging.info('%s %s' % (candidate, cm))
                if cm.better_than(best_cm):
                    best_cm = cm
                    best = candidate

        self.best_sense_t += time.time() - start
        return best, best_cm


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({
        'backend': 'leveldb',
        'hg': 'wordnet_wikidata.hg'
    })
    p = par.Parser()
    d = Disambiguation(hgr, p)

    r1 = ['stocks', 'stock']
    text1 = "Chinese stocks end year with double-digit losses"

    r2 = ['cambridge']
    text2 = "Cambridge near Boston in the United States."
    text3 = "Cambridge near London in England."

    print(d.best_sense(r2, text2))
예제 #2
0
            output = last_stage_output.tree.to_hyperedge_str(
                with_namespaces=self.show_namespaces)
            self.outputs.append(output)
            self.debug_msg(output)

        last_stage_output.main_edge = last_stage_output.tree.to_hyperedge()
        return last_stage_output


if __name__ == '__main__':
    # test_text = "Due to its location in the European Plain, Berlin is influenced by a temperate seasonal climate."
    # test_text = "Lots of cars require lots of paved roadways and parking lots."
    # test_text = "Critics have pointed out the dangers of group forming among like-minded in Internet. "
    # test_text = "Recently online platforms such as Facebook and Google have been criticized."
    # test_text = "Koikuchi shoyu, best known as soy sauce, is the mother of all sauces in Japan."
    test_text = "Satellites from NASA and other agencies have been tracking sea ice changes since 1979."

    print(test_text)

    hgraph = hyperg.HyperGraph({
        'backend': 'leveldb',
        'hg': 'wordnet_dbpedia.hg'
    })
    extractor = Extractor(hgraph)
    extractor.debug = True
    results = extractor.read_text(test_text)
    for result in results:
        print('result: %s' % str(result[1].main_edge))
        for edge in result[1].edges:
            print('extra edge: %s' % str(edge))
예제 #3
0
                'worst_sim': e[1][0],
                'sim': e[1][1],
                'matches': e[1][2],
                'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')
            }
            result.append(edge_data)
        return result

    def write_similar_edges(self, targ_edge, file_path):
        edge_data = self.similar_edges(targ_edge)
        write_edge_data(edge_data, file_path)

    def write_edges_with_similar_concepts(self, targ_edge, file_path):
        edge_data = self.edges_with_similar_concepts(targ_edge)
        write_edge_data(edge_data, file_path)


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({'backend': 'leveldb', 'hg': 'reddit-politics.hg'})

    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    te = '(clinches/nlp.clinch.verb clinton/nlp.clinton.noun ' \
         '(+/gb democratic/nlp.democratic.adj nomination/nlp.nomination.noun))'

    s = Similarity(hgr, par)
    # s.write_edges_with_similar_concepts(ed.str2edge(te), 'edges_similar_concepts.json')
    s.write_similar_edges(ed.str2edge(te), 'similar_edges.json')
예제 #4
0
 def setUp(self):
     params = {'backend': 'leveldb',
               'hg': 'test.hg'}
     self.hg = hyperg.HyperGraph(params)
예제 #5
0
    def nsimilarity(self, edges1, edges2):
        cs1 = set()
        for edge in edges1:
            cs1 = cs1.union(self.concept_sphere(edge))
        cs2 = set()
        for edge in edges2:
            cs2 = cs2.union(self.concept_sphere(edge))

        return self.setsimilarity(cs1, cs2)

    def synonym_similarity(self, meronomy, syn_id_1, syn_id_2):
        return self.nsimilarity(meronomy.synonym_full_edges(syn_id_1),
                                meronomy.synonym_full_edges(syn_id_2))


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({
        'backend': 'leveldb',
        'hg': 'reddit-worldnews-01012013-01082017.hg'
    })
    hs = HyperSimilarity(hgr)

    # e = 'clinton/nlp.clinton.noun'

    print('starting...')

    e1 = '(+/gb prime/nlp.prime.adj minister/nlp.minister.noun)'
    e2 = 'europe/nlp.europe.noun'

    print(hs.similarity(e1, e2))