def query(title, org, type): sleep(1) org = org.split(",")[0] print("Querying ", org) try: publisher_query = f""" SELECT ?publisher ?publisherLabel WHERE {{ ?publisher wdt:P31 wd:Q1137109. ?publisher rdfs:label ?publisherLabel. filter contains(?publisherLabel,"{org}") filter langMatches(lang(?publisherLabel),'en') }} """ developer_query = f""" SELECT ?developer ?developerLabel WHERE {{ ?developer wdt:P31 wd:Q210167. ?developer rdfs:label ?developerLabel. filter contains(?developerLabel,"{org}") filter langMatches(lang(?developerLabel),'en') }} """ publishers = return_sparql_query_results(publisher_query)["results"]["bindings"] developers = return_sparql_query_results(developer_query)["results"]["bindings"] for row in publishers: if 'publisherLabel' in row: parse_code(title, Organization(row['publisher']['value'],row['publisherLabel']['value']), type) for row in developers: if 'developerLabel' in row: parse_code(title, Organization(row['developer']['value'],row['developerLabel']['value']), type) except: print("Could not query ", title) errors.append(title)
def oneQFromWikiData(q): sparql = '''SELECT ?province ?provinceLabel ?location ?isocode ?pop WHERE { # any subject # which is an instance of # https://www.wikidata.org/wiki/Property:P31 # German Bundesland # https://www.wikidata.org/wiki/Q1221156 ?province wdt:P31 wd:''' + q + '''. # https://www.wikidata.org/wiki/Property:P300 ?province wdt:P300 ?isocode. # https://www.wikidata.org/wiki/Property:P625 ?province wdt:P625 ?location. # get the population # https://www.wikidata.org/wiki/Property:P1082 ?province wdt:P1082 ?pop. SERVICE wikibase:label { # ... include the labels bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" } } ''' res = return_sparql_query_results(sparql) for record in (res['results']['bindings']): Population.pops.append(Population(record))
def find_suggestions(n_clicks, properties, values): if n_clicks >= 1: #SPARQL Query Creation filters = "" for i in range(len(properties)): try: temp_property = properties[i]['props']['value'] temp_value = values[i]['props']['value'] filters += "?item wdt:" + temp_property + " wd:" + temp_value + " . " except: try: temp_property = properties[i]['props']['value'] temp_value = "?variable" + str(i + 1) filters += "?item wdt:" + temp_property + temp_value + " . " except: pass query_string = """ SELECT ?item WHERE {""" +filters+"""}""" results = return_sparql_query_results(query_string) #Extract Properties from the results property_list = [] count = 0 print("The length of the item list is " + str(len(results["results"]["bindings"]))) for result in results["results"]["bindings"]: if count < 100: try: item = result['item']['value'].split("/")[-1] print(item) property_list.append(retrieve_properties(item)) count += 1 except: return "Please enter properties to filter the data" else: break if len(property_list) > 1: # Uses the two functions property_count_function() and getBooleanDF() df = property_count_function(property_list) boolean_df = getBooleanDF(property_list) print("boolean_df Done") frequent_items = fpgrowth(boolean_df, min_support=0.7, use_colnames=True) print("frequent_items Done") print(frequent_items) rules = association_rules(frequent_items, metric="confidence", min_threshold=0.8) rules["consequent_len"] = rules["consequents"].apply(lambda x: len(x)) rules = rules[(rules['consequent_len'] == 1) & (rules['lift'] > 1) & (rules['leverage'] > 0)] print(rules) print("DONE") else: return "Only one item could be found with the given inputs"
def searchByID(itemID): ## Search by an item ID (e.g. Q167676 ) ## ## Returns: ## The query response as a dictionary sparql_query = """ SELECT ?item ?itemLabel ?itemDescription ?partOf ?image ?article (GROUP_CONCAT(DISTINCT ?alias; separator=" | ") as ?aliases) WHERE { ?article schema:about ?item; schema:inLanguage "en"; schema:isPartOf <https://en.wikipedia.org/> BIND(wd:%s AS ?item) OPTIONAL {?item skos:altLabel ?alias FILTER (LANG (?alias) = "en")} OPTIONAL { ?item wdt:P18 ?image } OPTIONAL { ?item wdt:P361 ?partOf} SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } } GROUP BY ?item ?itemLabel ?itemDescription ?partOf ?image ?article """ % itemID response = return_sparql_query_results(sparql_query) return response
def hospi_ll(lat, lng, radius=.5, isas=[]): if isas: # construct {wd:Q16917 wd:Q4287745}. isas = " ".join(["wd:" + isa for isa in isas]) isas = "wdt:P31 ?kind. VALUES ?kind {{ {isas} }}.".format(isas=isas) sparql_query = """ SELECT ?place ?placeLabel ?distance WHERE {{ ?place wdt:P17 wd:Q30; # In US {isas} SERVICE wikibase:around {{ ?place wdt:P625 ?location . bd:serviceParam wikibase:center"Point({lng} {lat})"^^geo:wktLiteral. bd:serviceParam wikibase:radius "{radius}" . bd:serviceParam wikibase:distance ?distance . }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }} }} ORDER BY ?distance LIMIT 10""".format(lat=lat, lng=lng, radius=radius, isas=isas) # print(sparql_query) sleep(1) res = return_sparql_query_results(sparql_query) # pprint(res['results']['bindings']) return res
def label(word, isa=""): if isa: isa = "wdt:P31/wdt:P279* wd:{};".format(isa) sparql_query = """ SELECT ?item ?itemLabel ?itemDescription WHERE {{ ?item {isa} SERVICE wikibase:mwapi {{ bd:serviceParam wikibase:endpoint "www.wikidata.org"; wikibase:api "EntitySearch"; mwapi:search "{word}"; mwapi:language "en"; mwapi:limit "5". ?item wikibase:apiOutputItem mwapi:item. ?num wikibase:apiOrdinal true. }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }} }}""".format(word=word, isa=isa) print(sparql_query) res = return_sparql_query_results(sparql_query) # pprint(res) for item in res['results']['bindings']: # pprint(item) # print(item['itemLabel']['value']) # print(item['itemDescription']['value']) # print(item['item']['value']) print("{label}: {description} {uri}\n".format( label=item['itemLabel']['value'], description=item['itemDescription']['value'], uri=item['item']['value'])) return res
def get_city_population_wikidata(city: str) -> int: query = """ SELECT ?population WHERE { ?city rdfs:label '%s'@ru. ?city wdt:P1082 ?population. ?city wdt:P17 ?country. ?city rdfs:label ?cityLabel. ?country rdfs:label ?countryLabel. FILTER(LANG(?cityLabel) = "ru"). FILTER(LANG(?countryLabel) = "ru"). FILTER(CONTAINS(?countryLabel, "Россия")). } ORDER BY DESC(?population) LIMIT 1 """ % (city) try: res = return_sparql_query_results(query) population = res['results']['bindings'][0]['population']['value'] or 0 return population except Exception as e: logging.error(f"Something wrong in getting data from wikidata: {e}")
def hospi_ll(lat, long, radius=.5, isas=[]): isas = mk_isas(isas) sparql_query = """ SELECT ?place ?placeLabel ?distance WHERE {{ ?place wdt:P17 wd:Q30; # In US {isas} SERVICE wikibase:around {{ ?place wdt:P625 ?location . bd:serviceParam wikibase:center"Point({long} {lat})"^^geo:wktLiteral. bd:serviceParam wikibase:radius "{radius}" . bd:serviceParam wikibase:distance ?distance . }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }} }} ORDER BY ?distance LIMIT 10""".format(lat=lat, long=long, radius=radius, isas=isas) # print(sparql_query) sleep(1) res = return_sparql_query_results(sparql_query) # pprint(res['results']['bindings']) return res
def get_imdb_ids(request_dict): query_string=write_request(request_dict) results = return_sparql_query_results(query_string) imdb_ids=[] for binding in results['results']['bindings']: imdb_ids.append(binding['imdb']['value']) return imdb_ids
def generate_switch(): from collections import defaultdict import copy indefinite_corrector = inflect.engine() wiki_wiki = wikipediaapi.Wikipedia('en') questions = [] titles = {} bad_item = False for entity in switch_entities: es = entity['labels'] query = entity['query'] qs = entity['questions'] res = return_sparql_query_results(query) print("finished query") res = res['results']['bindings'] len_res = len(res) values = defaultdict(list) for ind, entity in enumerate(res): for e in es: name = entity[f'{e}']['value'] values[e].append(name) titles[name] = name questionCounter = 0 for ind, j in enumerate(values[es[0]]): for q in qs: new_q = copy.deepcopy(q) try: if es[2].find('Occupation') == -1: new_q['question'] = q['question'].replace(f'<{es[0]}>', j).\ replace(f'<{es[1]}>', values[es[1]][ind]).replace(f'<{es[2]}>', values[es[2]][ind]) else: new_q['question'] = q['question'].replace(f'<{es[0]}>', j).\ replace(f'<{es[1]}>', values[es[1]][ind]).replace(f'<{es[2]}>', indefinite_corrector.a(values[es[2]][ind])) except IndexError: break try: new_q['title'] = q['title'].replace(f'<{es[0]}>', j).\ replace(f'<{es[1]}>', values[es[1]][ind]).replace(f'<{es[2]}>', values[es[2]][ind]) and_location = [m.start() for m in re.finditer(' and ', new_q['title'])] if len(and_location) != 1: continue page1 = wiki_wiki.page(new_q['title'][:and_location[0]]) text1 = page1.summary page2 = wiki_wiki.page(new_q['title'][and_location[0]+5:]) text2 = page2.summary if random.randint(0,1): wikipage = text1 + " " + text2 else: wikipage = text2 + " " + text1 new_q['passage'] = wikipage if len(text1) < 100 or len(text2) < 100 or 'may refer to:' in wikipage: continue questions.append(new_q) questionCounter = questionCounter + 1 print(f"switch number {questionCounter}.") except KeyError: pass return questions
def get_urls_of_query(url): time.sleep(1) # костыль, но без него спам начинается urls = [] id_ph = url[url.find('Q'):] res = return_sparql_query_results('SELECT ?item WHERE { ?item wdt:P39 wd:' + id_ph + '. }') for s in res['results']['bindings']: urls.append([s['item']['value'], id_ph]) return urls
def plot_correlation(): # https://query.wikidata.org/ dictlist = return_sparql_query_results(""" SELECT DISTINCT ?code ?population ?area ?ppp ?ngdp ?growth ?totrsv ?hdi ?medinc ?literacy ?life_expectancy ?fertility_rate { ?country wdt:P31 wd:Q3624078 ; wdt:P297 ?code ; wdt:P2046 ?area ; wdt:P1082 ?population . OPTIONAL { ?country wdt:P4010 ?ppp . ?country wdt:P2131 ?ngdp . ?country wdt:P2219 ?growth . ?country wdt:P2134 ?totrsv . ?country wdt:P1081 ?hdi . ?country wdt:P3529 ?medinc . ?country wdt:P6897 ?literacy . ?country wdt:P2250 ?life_expectancy . ?country wdt:P4841 ?fertility_rate . } } """)['results']['bindings'] df = pd.DataFrame({ k: [x[k]['value'] if k is 'code' else float(x[k]['value']) if k in x else None for x in dictlist] \ for k in ['code', 'population', 'area', 'ppp', 'ngdp', 'growth', 'totrsv', 'hdi', 'medinc', 'literacy', 'life_expectancy', 'fertility_rate']}) \ .set_index('code') df['density'] = df['population'] / df['area'] df['ngdp/p'] = df['ngdp'] / df['population'] df['ppp/p'] = df['ppp'] / df['population'] df['growth/p'] = df['growth'] / df['population'] df['totrsv/p'] = df['totrsv'] / df['population'] df['hdi/p'] = df['hdi'] / df['population'] cols = ['Deaths_Per_Million'] dict_entorb = {c: [] for c in cols} for area in df.index: try: for col in cols: dict_entorb[col].append( entorb.to_dataframe(nation=area)[col].values[-1]) except: for col in cols: dict_entorb[col].append(None) for col in dict_entorb: df[col] = dict_entorb[col] ncols = int((len(df.columns) - 1) / 4 + 1) fig, axes = plt.subplots(ncols=ncols, nrows=4) for ax, col in zip(axes.flat, [c for c in df.columns if c not in cols]): df.plot(kind='scatter', y='Deaths_Per_Million', logy=True, x=col, sharey=False, ax=ax) fig.set_size_inches(16, 4 * ncols) return fig
def query(query): # send any sparql query to the wikidata query service and get full result back # here we use an example that counts the number of humans sparql_query = """ SELECT distinct ?item ?itemLabel ?itemDescription WHERE{ ?item rdfs:label "%s"@en. SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } """ % query return return_sparql_query_results(sparql_query)
def generate_mix_questions(): from collections import defaultdict import copy wiki_wiki = wikipediaapi.Wikipedia('en') questions = [] titles = {} for entity_ind, entity in enumerate(mix_entities): es = entity['labels'] query = entity['query'] qs = entity['questions'] res = return_sparql_query_results(query) res = res['results']['bindings'] values = defaultdict(list) for entity in res: for e in es: name = entity[f'{e}Label']['value'] values[e] += [name] q_id = entity[f'{e}']['value'].split('/')[-1] if not q_id.startswith('Q'): continue try: titles[name] = name except KeyError: pass actives = list(set(values[es[1]])) for ind_i, i in enumerate(values[es[0]]): print(f"Mix number {ind_i}") title = titles[i] page = wiki_wiki.page(title) wikipage = page.summary if len(wikipage) < 100 or 'may refer to:' in wikipage: continue random.shuffle(actives) for ind, j in enumerate(actives): if j != values[es[1]][ind_i]: if ind > 0: break for q in qs: try: new_q = copy.deepcopy(q) new_q['title'] = title new_q['passage'] = wikipage new_q['question'] = q['question'].replace(f'<{es[0]}>', i).replace(f'<{es[1]}>', j) questions.append(new_q) except KeyError: pass return questions
def get_wikidata_info(wikidata_search): ''' Returns all properties associated with wikidata object in json format by using qwikidata.sparql module Parameters: wikidata_search (string): wikdata url extracted from yago database Returns: data (list of dicts): all properties of a yago object using rdf-triple language ''' try: query_parts = [] query1 = """SELECT DISTINCT ?predLink ?objValue ?objValueLabel ?predValueLabel ?quantityAmount ?quantityUnitLabel ?qualPredLabel ?qualObjLabel WHERE {""" query2 = """VALUES (?item) {0}""".format(wikidata_search) query3 = """?item ?predLink ?statement .""" query4 = """?statement ?ps ?objValue .""" query5 = """?predValue wikibase:claim ?predLink.""" query6 = """?predValue wikibase:statementProperty ?ps.""" query7 = """OPTIONAL {""" query8 = """?statement ?psv ?valuenode .""" query9 = """?valuenode wikibase:quantityAmount ?quantityAmount.""" query10 = """?valuenode wikibase:quantityUnit ?quantityUnit.}""" query11 = """OPTIONAL {""" query12 = """?statement ?pq ?qualObj .""" query13 = """?qualPred wikibase:qualifier ?pq .}""" query14 = """SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }}""" query_parts.append(query1) query_parts.append(query2) query_parts.append(query3) query_parts.append(query4) query_parts.append(query5) query_parts.append(query6) query_parts.append(query7) query_parts.append(query8) query_parts.append(query9) query_parts.append(query10) query_parts.append(query11) query_parts.append(query12) query_parts.append(query13) query_parts.append(query14) query_string = "\n".join(query_parts) res = return_sparql_query_results(query_string) data = res["results"]["bindings"] except: data = None pass return data
def get_request(request): if not request: return [] try: response = return_sparql_query_results(request) results = response["results"] values = [ result["answer"]["value"] for result in results.get("bindings", {}) ] return [re.findall("Q\\d+", value)[0] for value in values] except: print("Failed request") return []
def listSensorItems(): ## List items that are instance of a sensor (Q167676) ## ## Returns: ## The query response as a dictionary sparql_query = """ SELECT ?sensor ?sensorLabel ?sensorDescription ?img WHERE { ?sensor wdt:P31 wd:Q167676. OPTIONAL{?sensor wdt:P18 ?img} SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ response = return_sparql_query_results(sparql_query) return response
def find_suggestions(n_clicks, properties, values): if n_clicks >= 1: filters = "" for i in range(len(properties)): try: temp_property = properties[i]['props']['value'] temp_value = values[i]['props']['value'] filters += "?item wdt:" + temp_property + " wd:" + temp_value + " . " except: try: temp_property = properties[i]['props']['value'] temp_value = "?variable" + str(i + 1) filters += "?item wdt:" + temp_property + temp_value + " . " except: pass query_string = """ SELECT ?item WHERE {""" + filters + """}""" results = return_sparql_query_results(query_string) item_list = [] for result in results["results"]["bindings"]: item_list.append(result['item']['value'].split("/")[-1]) print("The length of the item list is " + str(len(results["results"]["bindings"]))) nested_list = [] loading_bar_progress = 0 with concurrent.futures.ThreadPoolExecutor() as executor: future_nested_list = { executor.submit(retrieve_properties, item): item for item in item_list } for future in concurrent.futures.as_completed(future_nested_list): try: nested_list.append(future.result()) loading_bar_progress += 1 except Exception: loading_bar_progress += 1 print("Generated an exception") print(nested_list)
def q_wikidata(wiki_id): # P172 represents the ethnicity sparql_query = """ SELECT ?item ?itemLabel WHERE { wd:""" + wiki_id + """ wdt:P172 ?item. SERVICE wikibase:label {bd:serviceParam wikibase:language "en" . } } """ # Use a try to avoid errors where the property field for cizenship isn't # present e.g. objects try: res_all = return_sparql_query_results(sparql_query) return res_all['results']['bindings'][0]['itemLabel']['value'] except: return None
def generate_true_questions(): from collections import defaultdict import copy wiki_wiki = wikipediaapi.Wikipedia('en') questions = [] titles = {} for entity in entities: es = entity['labels'] query = entity['query'] qs = entity['questions'] res = return_sparql_query_results(query) res = res['results']['bindings'] values = defaultdict(list) for entity in res: for e in es: name = entity[f'{e}Label']['value'] values[e] += [name] q_id = entity[f'{e}']['value'].split('/')[-1] if not q_id.startswith('Q'): continue try: # title = get_entity_dict_from_api(q_id)['labels']['en']['value'] titles[name] = name except KeyError: pass for ind, j in enumerate(values[es[0]]): print(f"True number {ind}") for q in qs: new_q = copy.deepcopy(q) new_q['question'] = q['question'].replace(f'<{es[0]}>', j).replace(f'<{es[1]}>', values[es[1]][ind]) try: new_q['title'] = titles[values[es[1]][ind]] page = wiki_wiki.page(new_q['title']) wikipage = page.summary new_q['passage'] = wikipage if len(wikipage) < 100 or 'may refer to:' in wikipage: continue questions.append(new_q) except KeyError: pass return questions
def get_entity_id_for_city_by_name(city_name): """ :param city_name: :return: string Wikidata entity_id """ sparql_query = """ SELECT ?item ?itemLabel WHERE {{ ?item wdt:P31/wdt:P279* wd:Q515 . ?item rdfs:label ?itemLabel. FILTER(CONTAINS(LCASE(?itemLabel), "{}"@en)). }} limit 1 """.format(city_name.lower()) res = return_sparql_query_results(sparql_query) url = res.get("results").get("bindings")[0].get('item').get('value') return url.replace('http://www.wikidata.org/entity/', '')
def get_label(id): query_string = """SELECT * WHERE { wd:""" + id + """ rdfs:label ?label FILTER (langMatches( lang(?label), "EN" ) ) }""" print(id) result = "" try: res = return_sparql_query_results(query_string) except Exception as e: print(e) result = " " # 用来区别错误导致的值缺失还是本身缺失, # “” : 本身缺失; “ ”:错误导致的缺失 else: print(res["results"]["bindings"]) if (res["results"]["bindings"].__len__() != 0): result = res["results"]["bindings"][0]['label']['value'] return result
def wikidata_query(query): try: query_result = return_sparql_query_results(query.strip()) # print(query_result) if query.lower().startswith("ask"): return { "status": 0, "msg": "Query Success", "query_type": "ASK", "values": [query_result["boolean"]] } else: return { "status": 0, "msg": "Query Success", "query_type": "SELECT", "answers": query_result["results"]["bindings"] } except json.decoder.JSONDecodeError as jsonerr: print(jsonerr) return {"status": 1, "msg": "QUERY ERROR!"}
def searchByKeyword(keyword): ## Search by a keyword ID (e.g. sensor ) ## ## Returns: ## The query response as a dictionary sparql_query = """ SELECT ?item ?itemLabel ?itemDescription ?article ?part (GROUP_CONCAT(DISTINCT ?alias; separator=" | ") as ?aliases) WHERE{ ?item ?label "%s"@en. ?article schema:about ?item . ?article schema:inLanguage "en" . ?article schema:isPartOf <https://en.wikipedia.org/>. OPTIONAL {?item skos:altLabel ?alias FILTER (LANG (?alias) = "en")} OPTIONAL { ?item wdt:P361 ?part} SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } GROUP BY ?item ?itemLabel ?itemDescription ?article ?part """ % keyword response = return_sparql_query_results(sparql_query) return response
def generate_false_questions(): questions = [] for (es, query, qs) in entities: res = return_sparql_query_results(query) res = res['results']['bindings'] values = [] for entity in res: val = {} for e in es: val[e] = entity[f'{e}Label']['value'] values.append(val) for val in values: print(val) for q in qs: question = q for (k, v) in val.items(): question = question.replace(f'<{k}>', v) questions.append(question) return questions
def fromWikiData(): sparql = ''' SELECT ?country ?countryLabel ?isocc ?location ?pop WHERE { ?country wdt:P31 wd:Q3624078 . # sovereign state # get the iso country code # https://www.wikidata.org/wiki/Property:P297 ?country wdt:P297 ?isocc. # get the population # https://www.wikidata.org/wiki/Property:P1082 ?country wdt:P1082 ?pop. # https://www.wikidata.org/wiki/Property:P625 ?country wdt:P625 ?location. SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } }''' res = return_sparql_query_results(sparql) for record in (res['results']['bindings']): if Population.debug: print(record) country = Country(record) Country.countries.append(country)
def get_city_wikidata(city, country): query = """ SELECT ?city ?cityLabel ?country ?countryLabel ?population WHERE { ?city rdfs:label '%s'@en. ?city wdt:P1082 ?population. ?city wdt:P17 ?country. ?city rdfs:label ?cityLabel. ?country rdfs:label ?countryLabel. FILTER(LANG(?cityLabel) = "en"). FILTER(LANG(?countryLabel) = "en"). FILTER(CONTAINS(?countryLabel, "%s")). } """ % (city, country) res = return_sparql_query_results(query) if len(res['results']['bindings']) > 0: out = res['results']['bindings'][0] return out['population']['value'] return -1
def wikidata_queries(lista): lista2 = [] for elem in lista: try: if ',' in elem: lista_prov = elem.split(', ') x = str(lista_prov[1] + ' ' + lista_prov[0]) else: x = elem except: x = elem try: #devo comunque levare spazi e \t sparql_query = """ SELECT ?s ?sLabel ?viaf_id WHERE { ?s wdt:P31 wd:Q5 ; wdt:P214 ?viaf_id. FILTER regex(?sLabel, """ + '\'' + x + '\'' + """, 'i'). } """ res = return_sparql_query_results(sparql_query) #lista2.append(res) print(res) except: print(x + ' non va')
def wikidata_query(query): """ Perform sparql query """ data = {} res = return_sparql_query_results(query) for e in res['results']['bindings']: # Format results into a more json-ish format. id = e['item']['value'] data.setdefault(id, {}) for i in e: if i == "item": continue data[id].setdefault(i, list()) v = [e[i]['value']] if i == "itemAltLabel": # comma separated list. Split it into a list. v = e[i]['value'].split(", ") [data[id][i].append(x) for x in v if x not in data[id][i]] return data
def loadRawDataMountains(): res = return_sparql_query_results(sparql_query) return res