Esempio n. 1
0
 def add_edges_for_concept(self,
                           query_str,
                           nxKG,
                           split_delimiter="/",
                           prefix=""):
     concept_keys = []
     pre = "is-a"
     v1, v2 = "no label", "no label"
     response = sparql_request(query_str, self._database)
     if type(response) is not type([]):
         response = sparql_request(query_str + " ", self._database)
     for r in response:
         (k1, k2) = (prefix + r["item"]["value"].split(split_delimiter)[-1],
                     prefix +
                     r["superItem"]["value"].split(split_delimiter)[-1])
         if "itemLabel" in r:
             v1 = r["itemLabel"]["value"]
         if "superItemLabel" in r:
             v2 = r["superItemLabel"]["value"]
         if "pre" in r:
             pre = r["pre"]["value"].split("/")[-1]
         if not nxKG.has_node(k1):
             nxKG.add_node(k1, value=v1)
         if not nxKG.has_node(k2):
             nxKG.add_node(k2, value=v2)
             concept_keys.append(k2)
         if not nxKG.has_edge(k2, k1):
             nxKG.add_edge(k2, k1, value=pre)
     return concept_keys
Esempio n. 2
0
 def global_secondorder_freq(self, write_value="freq1"):
     i, num_nodes = 0, self.graph.__len__()
     for n in self.graph.nodes:
         i = i + 1
         show_progression(i, num_nodes)
         try:
             num1 = sparql_request(sql.query_number_of(
                 n, "wdt:P31"))[0]["count"]["value"]
             num2 = sparql_request(sql.query_number_of(
                 n, "wdt:P279"))[0]["count"]["value"]
             self.graph.nodes[n][write_value] = int(num1) + int(num2)
         except:
             print("timeout", n)
             self.graph.nodes[n][write_value] = 6000000
     self.write_to_file()
 def ic_graph(self, id):
     try:
         num = sparql_request(sparql.query_freq_wikidata(id, 2))
     except:
         return 0
     if len(num) > 0 and "count" in num[0]:
         num = int(num[0]["count"]["value"])
     else:
         return 0
     return -np.log(num / 6900000)
 def match_noun_wikidata(self, name, limit=10):
     nouns = self.concepts_of_dataset(self.load_dataset(name))
     d = dict(zip(nouns, [[] for i in nouns]))
     for n in nouns:
         data = sparql_request(sparql.query_search_wikidata(n, limit))
         results = list(filter(lambda x: self.f_r(n, x), data))
         if len(results) == 0:
             results = list(
                 filter(lambda x: self.f_r(n, x, str.lower), data))
         if len(results) == 0:
             results = [data[0]]
         d[n] = results
     self.save_dataset(d, "%s_concept_map" % name)
     return d
Esempio n. 5
0
def build_nx_graph(concepts, database="wikidata"):
    query = get_query_for_database_to_build_KG(database)
    nxKG = nx.DiGraph()
    concepts_len = len(concepts)
    for i, key in enumerate(concepts):
        show_progression(i, concepts_len)
        for s in sparql_request(query(key), database):
            (k1, v1, k2, v2,
             pre) = (s["item"]["value"].split("/")[-1],
                     s["itemLabel"]["value"],
                     s["superItem"]["value"].split("/")[-1],
                     s["superItemLabel"]["value"], s["pre"]["value"])
            if not nxKG.has_node(k1):
                nxKG.add_node(k1, value=v1)
            if not nxKG.has_node(k2):
                nxKG.add_node(k2, value=v2)
            if not nxKG.has_edge(k2, k1):
                nxKG.add_edge(k2, k1, value=pre)
    return nxKG
Esempio n. 6
0
def build_igraph_KG(concepts, database="wikidata"):
    query = get_query_for_database_to_build_KG(database)
    concepts_len = len(concepts)
    g = Graph(directed=True)
    g.vs["name"] = ""
    g.vs["label"] = ""
    for i, c in enumerate(concepts):
        show_progression(i, concepts_len)
        for s in sparql_request(query(c), database):
            (k1, k2, pre) = (s["item"]["value"], s["superItem"]["value"],
                             s["pre"]["value"])
            if ("itemLabel" in s and "superItemLabel" in s):
                (v1, v2) = (s["itemLabel"]["value"],
                            s["superItemLabel"]["value"])
            else:
                (v1, v2) = (re.split('[/#]', k1)[-1], re.split('[/#]', k2)[-1])
            if len(g.vs.select(name=k1)) == 0:
                g.add_vertex(k1, label=v1)
            if len(g.vs.select(name=k2)) == 0:
                g.add_vertex(k2, label=v2)
            if g[k1, k2] == 0:
                g.add_edge(k1, k2, label=pre)
    return g
Esempio n. 7
0
def information_content(concepts):
    IC = []
    for c, i in concepts:
        show_progression(i, len(concepts))
        IC.append(sparql_request(sql.query_freq_wikidata(c, 2), "wikidata"))
    return [c[0]["count"]["value"] if c != None else None for c in IC]