def add_edges_for_concept(self, query_str, nxKG, split_delimiter="/", prefix=""): concept_keys = [] pre = "is-a" v1, v2 = "no label", "no label" response = sparql_request(query_str, self._database) if type(response) is not type([]): response = sparql_request(query_str + " ", self._database) for r in response: (k1, k2) = (prefix + r["item"]["value"].split(split_delimiter)[-1], prefix + r["superItem"]["value"].split(split_delimiter)[-1]) if "itemLabel" in r: v1 = r["itemLabel"]["value"] if "superItemLabel" in r: v2 = r["superItemLabel"]["value"] if "pre" in r: pre = r["pre"]["value"].split("/")[-1] if not nxKG.has_node(k1): nxKG.add_node(k1, value=v1) if not nxKG.has_node(k2): nxKG.add_node(k2, value=v2) concept_keys.append(k2) if not nxKG.has_edge(k2, k1): nxKG.add_edge(k2, k1, value=pre) return concept_keys
def global_secondorder_freq(self, write_value="freq1"): i, num_nodes = 0, self.graph.__len__() for n in self.graph.nodes: i = i + 1 show_progression(i, num_nodes) try: num1 = sparql_request(sql.query_number_of( n, "wdt:P31"))[0]["count"]["value"] num2 = sparql_request(sql.query_number_of( n, "wdt:P279"))[0]["count"]["value"] self.graph.nodes[n][write_value] = int(num1) + int(num2) except: print("timeout", n) self.graph.nodes[n][write_value] = 6000000 self.write_to_file()
def ic_graph(self, id): try: num = sparql_request(sparql.query_freq_wikidata(id, 2)) except: return 0 if len(num) > 0 and "count" in num[0]: num = int(num[0]["count"]["value"]) else: return 0 return -np.log(num / 6900000)
def match_noun_wikidata(self, name, limit=10): nouns = self.concepts_of_dataset(self.load_dataset(name)) d = dict(zip(nouns, [[] for i in nouns])) for n in nouns: data = sparql_request(sparql.query_search_wikidata(n, limit)) results = list(filter(lambda x: self.f_r(n, x), data)) if len(results) == 0: results = list( filter(lambda x: self.f_r(n, x, str.lower), data)) if len(results) == 0: results = [data[0]] d[n] = results self.save_dataset(d, "%s_concept_map" % name) return d
def build_nx_graph(concepts, database="wikidata"): query = get_query_for_database_to_build_KG(database) nxKG = nx.DiGraph() concepts_len = len(concepts) for i, key in enumerate(concepts): show_progression(i, concepts_len) for s in sparql_request(query(key), database): (k1, v1, k2, v2, pre) = (s["item"]["value"].split("/")[-1], s["itemLabel"]["value"], s["superItem"]["value"].split("/")[-1], s["superItemLabel"]["value"], s["pre"]["value"]) if not nxKG.has_node(k1): nxKG.add_node(k1, value=v1) if not nxKG.has_node(k2): nxKG.add_node(k2, value=v2) if not nxKG.has_edge(k2, k1): nxKG.add_edge(k2, k1, value=pre) return nxKG
def build_igraph_KG(concepts, database="wikidata"): query = get_query_for_database_to_build_KG(database) concepts_len = len(concepts) g = Graph(directed=True) g.vs["name"] = "" g.vs["label"] = "" for i, c in enumerate(concepts): show_progression(i, concepts_len) for s in sparql_request(query(c), database): (k1, k2, pre) = (s["item"]["value"], s["superItem"]["value"], s["pre"]["value"]) if ("itemLabel" in s and "superItemLabel" in s): (v1, v2) = (s["itemLabel"]["value"], s["superItemLabel"]["value"]) else: (v1, v2) = (re.split('[/#]', k1)[-1], re.split('[/#]', k2)[-1]) if len(g.vs.select(name=k1)) == 0: g.add_vertex(k1, label=v1) if len(g.vs.select(name=k2)) == 0: g.add_vertex(k2, label=v2) if g[k1, k2] == 0: g.add_edge(k1, k2, label=pre) return g
def information_content(concepts): IC = [] for c, i in concepts: show_progression(i, len(concepts)) IC.append(sparql_request(sql.query_freq_wikidata(c, 2), "wikidata")) return [c[0]["count"]["value"] if c != None else None for c in IC]