def get_wikidata_item(id_): d = get_entity_dict_from_api(id_) entity = { 'item': WikidataItem, 'property': WikidataProperty, 'lexeme': WikidataLexeme } return entity[d['type']](d)
def wikidata_get_sitelink(id): try: entity = get_entity_dict_from_api(id) if "sitelinks" in entity and "enwiki" in entity["sitelinks"]: return { "title": entity["sitelinks"]["enwiki"]["title"], "sitelink": entity["sitelinks"]["enwiki"]["url"] } except LdiResponseNotOk as exception: print(exception) return {"error": "Failed to get sitelink"} return {"error": "Unknown Error"}
def fill(X): name = X['name'] if name is NaN: if 'official_name' in X['tags']: name = X['tags']['official_name'] elif 'operator' in X['tags']: name = X['tags']['operator'] elif 'brand:wikidata' in X['tags']: wikidata = X['tags']['brand:wikidata'] q_dict = get_entity_dict_from_api(wikidata) name = WikidataItem(q_dict).get_label() elif 'brand:wikipedia' in X['tags']: wikipedia = X['tags']['brand:wikipedia'] name = wikipedia[3:] return name
def get_start(QID): """ Получение даты начала срока правления по QID в нужном формате :param QID: qID личности :return: дата начала правления """ person_dict = get_entity_dict_from_api(QID) if 'qualifiers' in person_dict['claims']['P39'][0] and 'P580' in person_dict['claims']['P39'][0]['qualifiers']: start_time = str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[1:11] if start_time[5:] == '00-00': start_time = start_time[:4] elif start_time[8:] == '00': start_time = start_time[:7] if len(start_time) == 4: if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-': start_time = '-' + start_time start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time else: start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time elif len(start_time) == 7: if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-': start_time = '-' + arrow.get(start_time, 'YYYY-MM').format('MM.YYYY') start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time else: start_time = arrow.get(start_time, 'YYYY-MM').format('MM.YYYY') start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time elif len(start_time) == 10: if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-': start_time = '-' + arrow.get(start_time, 'YYYY-MM-DD').format('DD.MM.YYYY') start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time else: start_time = arrow.get(start_time, 'YYYY-MM-DD').format('DD.MM.YYYY') start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace( '0', '') return start_time return 'нет данных'
def retrieveRelatedEntities(self, entityId, limit=None): entity_dict = get_entity_dict_from_api(entityId) list_of_entities = [] claims = entity_dict["claims"] #Format of json data is quite nested for i in claims: for item in claims[i]: if "mainsnak" in item: if item["mainsnak"]["datatype"] == "wikibase-item": datavalue = item["mainsnak"]["datavalue"] if datavalue["type"] == "wikibase-entityid": entity = datavalue["value"]["id"] list_of_entities.append(entity) if limit is not None and len( list_of_entities) == limit: return list_of_entities return list_of_entities
def get_boroughs(self): """ Venice Q641 has 6 boroughs - Cannaregio (including San Michele), - San Polo, - Dorsoduro (including Giudecca and Sacca Fisola), - Santa Croce, - San Marco (including San Giorgio Maggiore) and - Castello (including San Pietro di Castello and Sant'Elena). https://www.wikidata.org/wiki/Property:P150 contains administrative territorial entity :return: """ bourough_ids = [] bouroughs = [] property = self.entity.get("claims").get("P150") # @Todo Q_TRIER = "Q3138" has NO P150 if property is None: print(self.entity_id, "has no P150") lat, lon = self.get_coordinate_location() bouroughs.append({"Name": self.get_name(), "Lat": lat, "Lon": lon}) return bouroughs for item in property: entity = item.get("mainsnak").get("datavalue").get('value').get( 'id') bourough_ids.append(entity) for entity_id in bourough_ids: entity = get_entity_dict_from_api(entity_id) english_label = entity.get('labels').get("en") if None is english_label: key = next(iter(entity.get('labels'))) bourough_name = entity.get('labels').get(key).get("value") else: bourough_name = entity.get('labels').get("en").get("value") property = entity.get("claims").get("P625")[0].get('mainsnak').get( 'datavalue').get('value') lat, lon = property.get('latitude'), property.get('longitude') bouroughs.append({"Name": bourough_name, "Lat": lat, "Lon": lon}) return bouroughs
async def get_fact(query, args, tokenizer, trex_set, common_vocab, f_out): """ Collect more facts for the TREx-train set from LPAQA """ line = query.strip().split('\t') sub_url, sub, obj_url, obj = line sub_id = get_id_from_url(sub_url) obj_id = get_id_from_url(obj_url) # First, make sure fact is not in TREx test set if (sub_id, obj_id) in trex_set: return # Make sure object is a single token if len(tokenizer.tokenize(obj)) != 1: return # Make sure object is in common vocab subset if obj not in common_vocab: return # Make sure subject is prominent (has a Wikipedia page) try: q_dict = get_entity_dict_from_api(sub_id) q = WikidataItem(q_dict) if not q.get_sitelinks(): return except ValueError: return # Some entities don't have labels so the subject label is the URI if sub_id == sub: return # print('Writing fact: {} - {}', sub, obj) f_out.write( json.dumps({ 'sub_uri': sub_id, 'obj_uri': obj_id, 'sub_label': sub, 'obj_label': obj }) + '\n') # Increment global count await increment_count()
def get_end(QID): """ Получение даты окончания срока правления по qID в нужном формате :param QID: qID личности :return: дата окончания правления """ person_dict = get_entity_dict_from_api(QID) if 'qualifiers' in person_dict['claims']['P39'][0] and 'P582' in person_dict['claims']['P39'][0]['qualifiers']: end_time = str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[1:11] if end_time[5:] == '00-00': end_time = end_time[:4] elif end_time[8:] == '00': end_time = end_time[:7] if len(end_time) == 4: if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-': end_time = '-' + end_time end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time else: end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time elif len(end_time) == 7: if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-': end_time = '-' + arrow.get(end_time, 'YYYY-MM').format('MM.YYYY') end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time else: end_time = arrow.get(end_time, 'YYYY-MM').format('MM.YYYY') end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time elif len(end_time) == 10: if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-': end_time = '-' + arrow.get(end_time, 'YYYY-MM-DD').format('DD.MM.YYYY') end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time else: end_time = arrow.get(end_time, 'YYYY-MM-DD').format('DD.MM.YYYY') end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '') return end_time else: return 'нет данных'
def main(search_term): wikipedia = MediaWiki(lang='pap', user_agent='code-for-nl-pap-parser') wikidata = MediaWiki(url='https://www.wikidata.org/w/api.php', user_agent='code-for-nl-pap-parser') search_result = wikipedia.search(search_term, results=4) for result_item in search_result: page = wikipedia.page(result_item) print( 'I found page \'%s\' for term \'%s\'' % (result_item, search_term), 'with categories', '/'.join(page.categories), 'https://pap.wikipedia.org/wiki/' + urllib.parse.quote(result_item)) # print(page.images) # Now I am going to search this one on wikidata, this will return a code. like Q215887 search_data = wikidata.search(result_item, results=1) for data_item in search_data: Q_CODE = data_item print(result_item, 'is known on wikidata with the code', Q_CODE, 'https://www.wikidata.org/wiki/' + Q_CODE) # Now try the qwikidata interface entity = get_entity_dict_from_api(Q_CODE) q = WikidataItem(entity) pap_data_label = q.get_label(lang='pap') nl_data_label = q.get_label(lang='nl') if pap_data_label and nl_data_label: # First get the page. Read the images found data_page = wikidata.page(result_item) # print(data_page.images) print(pap_data_label, 'is called', nl_data_label, 'in dutch') elif pap_data_label and not nl_data_label: print(pap_data_label, 'has no entry for dutch!') elif not pap_data_label and nl_data_label: print(Q_CODE, 'does not match papiamentu entry') elif not pap_data_label and not nl_data_label: print(pap_data_label, 'has no entry for dutch or papiamentu!')
def save_label(Q, label): data = get_entity_dict_from_api(Q) if 'labels' in data and 'hy' in data['labels']: WikiDataItems.query.filter_by(q=Q).delete() db.session.commit() return None auth1, csrf_token = get_csrf_token() response = requests.post("https://www.wikidata.org/w/api.php", data={ "action": "wbsetlabel", "token": csrf_token, "format": "json", "id": Q, "language": "hy", "value": label }, auth=auth1) if response and 'success' in response.json() and response.json( )['success']: WikiDataItems.query.filter_by(q=Q).delete() db.session.commit() return csrf_token, response
def get_labels(username, rec=10): if rec < 0: return None, None Q = str(random.choice(WikiDataItems.query.all())) if Done.query.filter_by(username=username, q=Q).all(): return get_labels(username, rec - 1) dict = get_entity_dict_from_api(Q) labels = [] hydesc = 'հայերեն նկարագրություն չկա' if 'labels' in dict: if 'hy' in dict['labels']: return get_labels(username, rec - 1) if 'hy' in dict['descriptions']: hydesc = dict['descriptions']['hy']['value'] for lang in dict['labels']: temp = dict['labels'][lang] if 'descriptions' in dict and lang in dict['descriptions']: temp['description'] = dict['descriptions'][lang]['value'] if 'sitelinks' in dict and lang + 'wiki' in dict['sitelinks']: temp['url'] = dict['sitelinks'][lang + 'wiki']['url'] labels.append(dict['labels'][lang]) return Q, labels, hydesc
def get_place_from_wikidata(entity_id): parents = set() entity = WikidataItem(get_entity_dict_from_api(entity_id)) claims_groups = entity.get_truthy_claim_groups() place = Place() place.set_gramps_id(entity_id) name = PlaceName() name.set_language('sv') name.set_value(entity.get_label('sv')) place.set_name(name=name) place.set_title(entity.get_label('sv')) for lang in ['sv', 'en', 'de', 'fi', 'no', 'nn', 'da', 'se']: wiki_name = entity.get_label(lang) if len(wiki_name): place_name = PlaceName() place_name.set_language(lang) place_name.set_value(wiki_name) place.add_alternative_name(name=place_name) for alias in entity.get_aliases(lang): alt_name = PlaceName() alt_name.set_language(lang) alt_name.set_value(alias) place.add_alternative_name(name=alt_name) for link in entity.get_sitelinks(lang).values(): wikipedia_url = Url() wikipedia_url.set_path(link['url']) wikipedia_url.set_type('Wikipedia entry') wikipedia_url.set_description('Wikipedia %s:%s' % (link["title"], link["site"])) place.add_url(wikipedia_url) # Instance of -> PlaceType if PROPERTY_INSTANCE_OF in claims_groups: for claim in claims_groups[PROPERTY_INSTANCE_OF]: instance_of = claim.mainsnak.datavalue.value['id'] if ITEM_PARISH == instance_of: place.set_type(PlaceType.PARISH) elif ITEM_SOCKEN == instance_of: place.set_type(PlaceType.PARISH) elif ITEM_ISLAND == instance_of: place.set_type(PlaceType.UNKNOWN) # No islands in Gramps elif ITEM_MUNICIPALITY_OF_SWEDEN == instance_of: place.set_type(PlaceType.MUNICIPALITY) elif ITEM_MUNICIPALITY == instance_of: place.set_type(PlaceType.MUNICIPALITY) elif ITEM_COUNTRY == instance_of: place.set_type(PlaceType.COUNTRY) elif ITEM_SOVEREIGN_STATE == instance_of: place.set_type(PlaceType.COUNTRY) elif ITEM_STATE_OF_US == instance_of: place.set_type(PlaceType.STATE) elif ITEM_FEDERAL_STATE == instance_of: place.set_type(PlaceType.STATE) elif ITEM_COUNTY == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_COUNTY_OF_SWEDEN == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_FORMER_COUNTY_OF_SWEDEN == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_PROVINCE_OF_SWEDEN == instance_of: place.set_type(PlaceType.PROVINCE) elif ITEM_PROVINCE == instance_of: place.set_type(PlaceType.PROVINCE) elif ITEM_ADM_REGION == instance_of: place.set_type(PlaceType.REGION) elif ITEM_NEIGHBORHOOD == instance_of: place.set_type(PlaceType.NEIGHBORHOOD) elif ITEM_DISTRICT == instance_of: place.set_type(PlaceType.DISTRICT) elif ITEM_BOROUGH == instance_of: place.set_type(PlaceType.BOROUGH) elif ITEM_TOWN == instance_of: place.set_type(PlaceType.TOWN) elif ITEM_LARGE_VILLAGE == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_VILLAGE == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_URBAN_AREA_IN_SWEDEN == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_HAMLET == instance_of: place.set_type(PlaceType.HAMLET) elif ITEM_FARM == instance_of: place.set_type(PlaceType.FARM) elif ITEM_BUILDING == instance_of: place.set_type(PlaceType.BUILDING) if PROPERTY_COORDINATE_LOCATION in claims_groups: for claim in claims_groups[PROPERTY_COORDINATE_LOCATION]: datavalue = claim.mainsnak.datavalue place.set_latitude(str(datavalue.value['latitude'])) place.set_longitude(str(datavalue.value['longitude'])) extract_located_in(claims_groups, PROPERTY_LOCATED_IN_PRESENT, parents) extract_located_in(claims_groups, PROPERTY_LOCATED_IN_ADM, parents) extract_located_in(claims_groups, PROPERTY_LOCATED, parents) return place, parents
for monument in monument_list: count_mon += 1 print(count_mon) if count_mon % 4 == 0: print("HELLO") partition_num = str(count_mon / 4).split('.')[0] with open( '../../Downloads/hi_monument_english_labels' + partition_num + '.json', 'w') as fout: json.dump(complete_final_monument_list, fout) complete_final_monument_list = [] print("Checkpoint %d reached, JSON dumps saved |" % (count_mon / 4)) for key, val in monument.items(): if key == "title": monument['title'] = WikidataItem(get_entity_dict_from_api( str(val))).get_label() elif key == "id": monument['id'] = WikidataItem(get_entity_dict_from_api( str(val))).get_label() for key in monument['claims'].keys(): if (key == "P727"): continue else: property_list.append( WikidataProperty(get_entity_dict_from_api(key)).get_label()) final_monument_list = dict( zip(property_list, list(monument['claims'].values()))) monument['claims'].clear() monument['claims'].update(final_monument_list) property_list = []
#!/usr/bin/env python3 """Query the Wikidata query service for a given entity QID (or property PID) and get back entity information as JSON. The returned JSON is equivalent to the JSON data that is available in Wikidata entity dumps (available from https://dumps.wikimedia.org/wikidatawiki/entities/), but this may be more convient for accessing individual records. """ import json from functools import lru_cache from qwikidata.linked_data_interface import get_entity_dict_from_api if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__) parser.add_argument('id', help='Wikidata identifier') args = parser.parse_args() print(json.dumps(get_entity_dict_from_api(args.id), ensure_ascii=False))
for file in glob.glob('data/cities/*.json'): with open(file, 'r') as fin: datar = json.load(fin) for i in datar: data.append(i) fout.write(json.dumps(data, indent=2)) with open('data/cities.json', 'r') as cities, open('data/countries.json', 'r') as countries: cities_data, countries_data = json.load(cities), json.load(countries) keys = [] for city in data: if str(city['id']) not in cities_data: print(city['id']) wikidata_id = 'Q' + str(city['id']) wikidata_info = get_entity_dict_from_api(wikidata_id) name = wikidata_info['labels']['en']['value'] coords = wikidata_info['claims']['P625'][0]['mainsnak'][ 'datavalue']['value'] lat, lon = coords['latitude'], coords['longitude'] cities_data[str(city['id'])] = { 'name': name, 'lat': lat, 'lon': lon } for country in city['data'].values(): if str(country) in countries_data: continue print(country) wikidata_id = 'Q' + str(country) wikidata_info = get_entity_dict_from_api(wikidata_id)
f = open("codeList11.txt", "r") codes = f.read().splitlines() codeCount = 0 for line in codes: codeList.append(line) codeCount += 1 print('Processed', codeCount, 'Q-codes.') timeStart = perf_counter() cnt = 0 for name in codeList: cnt += 1 try: personDict = get_entity_dict_from_api(name) # Insert QCode here except: missingCodes = open("missingCodes.txt", "a") missingCodes.write(name + '\n') continue person = WikidataItem(personDict) claim_groups = person.get_truthy_claim_groups( ) # Gets a person's different Wikidata attributes try: eduGroups = claim_groups[ "P69"] # Grabs person's education from those attributes foundCount += 1 except: print(str(cnt) + ".", "Education not there for", person.get_label()) missingCount += 1
monument_labelled_prop_val = {} list_prop_value = monument['claims'] list_properties = list(list_prop_value.keys()) #Removing Properties from list of properties which dont have a wikidata page for prop in non_labelled_props: if prop in list_properties: list_properties.remove(prop) list_properties_copy = list_properties #Adding all properties to label list. The ones which dont have wikidata pages are stored in non_labelled_props for prop in list_properties: if prop not in label_list.keys(): try: prop_details = get_entity_dict_from_api(prop) if 'hi' in prop_details['labels'].keys(): prop_label = prop_details['labels']['hi']['value'] else: prop_label = str_translator( prop_details['labels']['en']['value']) label_list[prop] = prop_label except: non_labelled_props.append(prop) list_properties_copy.remove(prop) list_properties = list_properties_copy #For all values per property, label is extracted for each value ID [Q##### format] for prop in list_properties:
def __init__(self, entity_id): self.entity_id = entity_id self.entity = get_entity_dict_from_api(entity_id)
visited.add(line.strip()) current = '' for idx, (word, cid) in enumerate(sorted(words.items(), key=lambda x: x[1])): try: if cid not in visited: visited.add(cid) new_query = query.format(word) print(len(findings)) print('{0} ...'.format(word)) res_ = return_sparql_query_results(new_query) res = res_['results']['bindings'] for r in res: key = r['lexemeId']['value'].split('/')[-1] #print(r['lexemeId'], key) dct = get_entity_dict_from_api(key) if dct['senses']: this_sense = dct['senses'][0]['glosses'].get( 'en', {'value': ''})['value'] if this_sense: findings[cid, concepticon.conceptsets[cid].gloss] += [ (key, this_sense, word) ] print('... ', this_sense) except Exception as e: print(e) if cid != current: current = cid cgl = concepticon.conceptsets[cid].gloss if (cid, cgl) in findings: with open('wikidata.tsv', 'a') as f:
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty from qwikidata.linked_data_interface import get_entity_dict_from_api # create an item representing "Douglas Adams" Q_DOUGLAS_ADAMS = "Q42" q42_dict = get_entity_dict_from_api(Q_DOUGLAS_ADAMS) q42 = WikidataItem(q42_dict) # create a property representing "subclass of" P_SUBCLASS_OF = "P279" p279_dict = get_entity_dict_from_api(P_SUBCLASS_OF) p279 = WikidataProperty(p279_dict) # create a lexeme representing "bank" L_BANK = "L3354" l3354_dict = get_entity_dict_from_api(L_BANK) l3354 = WikidataLexeme(l3354_dict)
# Get entity ids from api from qwikidata.linked_data_interface import get_entity_dict_from_api from qwikidata.entity import WikidataItem, WikidataProperty, WikidataLexeme from tqdm import tqdm import pickle wikidata = pickle.load(open('/home/keshav/olpbench/wikidata_ids.pkl','rb')) english_labels_nf, items_nf = set(), set() labelsD = dict() for item in tqdm(wikidata): try: entity_dict = get_entity_dict_from_api(item) except: items_nf.add(item) continue if 'en' not in entity_dict['labels']: english_labels_nf.add(item) continue labelsD[item] = entity_dict['labels']['en']['value'] # Get entity ids from json dump import pickle from tqdm import tqdm from qwikidata.json_dump import WikidataJsonDump wjd = WikidataJsonDump("/home/keshav/wikidata-20201109-all.json.bz2") namesD = dict() not_found_english_label = 0 for item in tqdm(wjd): entity_id =item['id'] if 'en' not in item['labels']:
def getEntityInfo(eid): return get_entity_dict_from_api(eid)
def get_item(itemId): item = get_entity_dict_from_api(itemId) return item