Beispiel #1
0
    def evaluate_sentence_similarity(self, dataset_name="MSRvid", metric = "wpath_graph", relatedness=True, save_results = False, database="wikidata"):
        concepts, cc, texts = get_ideas_in_format(dataset_name, database=database)
        KG = DAC(concepts=concepts, dataset=dataset_name, relatedness=relatedness, database=database)
        if(KG.graph.__len__()==0):
            print("start building knowledge graph")
            KG.build_nx_graph()

        ConSim = ConceptSimilarity(KG)
        sim_M = ConSim.similarityMatrix(lcs_pref_value="freq1", metric=metric)
        WMD = WordMoversSimilarity(sim_M, KG._concepts)

        sen_pairs, human_sim = self._dataset.load_sentence_pairs_and_similarities(dataset_name)
        sim_values = []
        map_sen2bow = dict(zip(texts, [[c["id"] for c in bow] for bow in cc]))
        pg, total_len = 0 , len(sen_pairs)
        remove_index = []
        for sen1, sen2 in sen_pairs:
            show_progression(pg, total_len)
            bow1, bow2 = list(set(map_sen2bow[sen1]) & set(KG._concepts)), list(set(map_sen2bow[sen2]) & set(KG._concepts))
            sim_value = WMD.word_mover_distance(bow1, bow2)
            if sim_value is None:
                print(sen1, sen2)
                remove_index.append(pg)
            else:
                sim_values.append(sim_value)
            pg = pg+1
        
        human_sim = np.delete(human_sim, remove_index)
        cor = pearsonr(sim_values, human_sim)[0]
        if save_results:
            results = list(zip([round(x, 3) for x in sim_values], sen_pairs))
            self._dataset.save_dataset(dict(zip(("correlation", "similarities"),(cor, results))), dataset_name+"_"+metric)
        return cor
Beispiel #2
0
 def global_secondorder_freq(self, write_value="freq1"):
     i, num_nodes = 0, self.graph.__len__()
     for n in self.graph.nodes:
         i = i + 1
         show_progression(i, num_nodes)
         try:
             num1 = sparql_request(
                 sql.query_babelnet_number_of(n))[0]["count"]["value"]
             self.graph.nodes[n][write_value] = int(num1)
         except:
             print("timeout babelnet", n)
             self.graph.nodes[n][write_value] = 6000000
     self.write_to_file()
Beispiel #3
0
    def build_nx_graph(self, query=sql.query_ancestors):
        start_t = time.time()
        concepts = self._init_concepts

        nxKG = nx.DiGraph()
        i, concepts_len = 0, len(concepts)
        for con in concepts:
            keys = [con]
            i = i + 1
            show_progression(i, concepts_len)
            if self._relatedness:
                keys += self.add_edges_for_concept(
                    query(con, sql.relation_prop), nxKG)
            while len(keys) > 0:
                key = keys.pop()
                keys += self.add_edges_for_concept(query(key), nxKG)
        self.graph = nxKG
        self.init_key2pos()
        self.write_to_file()
        return print("building the Graph from %s took: %s seconds." %
                     (self._database, (time.time() - start_t)))
Beispiel #4
0
 def build_nx_graph(self):
     if self._database != "babelnet":
         return print("Function only for babelnet")
     start_t = time.time()
     concepts = self._init_concepts
     nxKG = nx.DiGraph()
     i, concepts_len = 0, len(concepts)
     for con in concepts:
         keys = [con]
         i = i + 1
         show_progression(i, concepts_len)
         while len(keys) > 0:
             key = keys.pop()
             keys += self.add_edges_for_concept(sql.babelnet_paths2top(key),
                                                nxKG,
                                                split_delimiter="/s",
                                                prefix="bn:")
     self.graph = nxKG
     self.init_key2pos()
     self.write_to_file()
     return print("building the Graph from %s took: %s seconds." %
                  (self._database, (time.time() - start_t)))
Beispiel #5
0
def information_content(concepts):
    IC = []
    for c, i in concepts:
        show_progression(i, len(concepts))
        IC.append(sparql_request(sql.query_freq_wikidata(c, 2), "wikidata"))
    return [c[0]["count"]["value"] if c != None else None for c in IC]