def test_get_label_1(self) -> None: """Assert correct behavior in get_label method.""" q42_dict = _load_item_dict(typedefs.ItemId("Q42")) en_label = q42_dict["labels"][EN]["value"] de_label = q42_dict["labels"][DE]["value"] item = WikidataItem(q42_dict) assert item.get_label() == en_label assert item.get_label(lang=EN) == en_label assert item.get_label(lang=DE) == de_label assert item.get_label(lang=NO) == "" p279_dict = _load_property_dict(typedefs.PropertyId("P279")) en_label = p279_dict["labels"][EN]["value"] de_label = p279_dict["labels"][DE]["value"] prop = WikidataProperty(p279_dict) assert prop.get_label() == en_label assert prop.get_label(lang=EN) == en_label assert prop.get_label(lang=DE) == de_label assert prop.get_label(lang=NO) == ""
def main(search_term): wikipedia = MediaWiki(lang='pap', user_agent='code-for-nl-pap-parser') wikidata = MediaWiki(url='https://www.wikidata.org/w/api.php', user_agent='code-for-nl-pap-parser') search_result = wikipedia.search(search_term, results=4) for result_item in search_result: page = wikipedia.page(result_item) print( 'I found page \'%s\' for term \'%s\'' % (result_item, search_term), 'with categories', '/'.join(page.categories), 'https://pap.wikipedia.org/wiki/' + urllib.parse.quote(result_item)) # print(page.images) # Now I am going to search this one on wikidata, this will return a code. like Q215887 search_data = wikidata.search(result_item, results=1) for data_item in search_data: Q_CODE = data_item print(result_item, 'is known on wikidata with the code', Q_CODE, 'https://www.wikidata.org/wiki/' + Q_CODE) # Now try the qwikidata interface entity = get_entity_dict_from_api(Q_CODE) q = WikidataItem(entity) pap_data_label = q.get_label(lang='pap') nl_data_label = q.get_label(lang='nl') if pap_data_label and nl_data_label: # First get the page. Read the images found data_page = wikidata.page(result_item) # print(data_page.images) print(pap_data_label, 'is called', nl_data_label, 'in dutch') elif pap_data_label and not nl_data_label: print(pap_data_label, 'has no entry for dutch!') elif not pap_data_label and nl_data_label: print(Q_CODE, 'does not match papiamentu entry') elif not pap_data_label and not nl_data_label: print(pap_data_label, 'has no entry for dutch or papiamentu!')
def is_matched(q: WikidataItem) -> bool: # 確認是否有中文名 if q.get_label("zh") == "": print(f'Skip, no zh label: {q.get_enwiki_title()}') return False # entity不能是人 cg = q.get_claim_group("P31") # P31:instance_of instanceof = [c.mainsnak.datavalue.value['id'] for c in cg] if "Q5" in instanceof: # Q5:human print(f'Skip, is a person: {q.get_enwiki_title()}') return False # entity不能有位置claim cg = q.get_claim_group("P625") # P625:coordinate_location if cg.property_id is not None: print(f'Skip, has coordinate location: {q.get_enwiki_title()}') return False return True
def get_place_from_wikidata(entity_id): parents = set() entity = WikidataItem(get_entity_dict_from_api(entity_id)) claims_groups = entity.get_truthy_claim_groups() place = Place() place.set_gramps_id(entity_id) name = PlaceName() name.set_language('sv') name.set_value(entity.get_label('sv')) place.set_name(name=name) place.set_title(entity.get_label('sv')) for lang in ['sv', 'en', 'de', 'fi', 'no', 'nn', 'da', 'se']: wiki_name = entity.get_label(lang) if len(wiki_name): place_name = PlaceName() place_name.set_language(lang) place_name.set_value(wiki_name) place.add_alternative_name(name=place_name) for alias in entity.get_aliases(lang): alt_name = PlaceName() alt_name.set_language(lang) alt_name.set_value(alias) place.add_alternative_name(name=alt_name) for link in entity.get_sitelinks(lang).values(): wikipedia_url = Url() wikipedia_url.set_path(link['url']) wikipedia_url.set_type('Wikipedia entry') wikipedia_url.set_description('Wikipedia %s:%s' % (link["title"], link["site"])) place.add_url(wikipedia_url) # Instance of -> PlaceType if PROPERTY_INSTANCE_OF in claims_groups: for claim in claims_groups[PROPERTY_INSTANCE_OF]: instance_of = claim.mainsnak.datavalue.value['id'] if ITEM_PARISH == instance_of: place.set_type(PlaceType.PARISH) elif ITEM_SOCKEN == instance_of: place.set_type(PlaceType.PARISH) elif ITEM_ISLAND == instance_of: place.set_type(PlaceType.UNKNOWN) # No islands in Gramps elif ITEM_MUNICIPALITY_OF_SWEDEN == instance_of: place.set_type(PlaceType.MUNICIPALITY) elif ITEM_MUNICIPALITY == instance_of: place.set_type(PlaceType.MUNICIPALITY) elif ITEM_COUNTRY == instance_of: place.set_type(PlaceType.COUNTRY) elif ITEM_SOVEREIGN_STATE == instance_of: place.set_type(PlaceType.COUNTRY) elif ITEM_STATE_OF_US == instance_of: place.set_type(PlaceType.STATE) elif ITEM_FEDERAL_STATE == instance_of: place.set_type(PlaceType.STATE) elif ITEM_COUNTY == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_COUNTY_OF_SWEDEN == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_FORMER_COUNTY_OF_SWEDEN == instance_of: place.set_type(PlaceType.COUNTY) elif ITEM_PROVINCE_OF_SWEDEN == instance_of: place.set_type(PlaceType.PROVINCE) elif ITEM_PROVINCE == instance_of: place.set_type(PlaceType.PROVINCE) elif ITEM_ADM_REGION == instance_of: place.set_type(PlaceType.REGION) elif ITEM_NEIGHBORHOOD == instance_of: place.set_type(PlaceType.NEIGHBORHOOD) elif ITEM_DISTRICT == instance_of: place.set_type(PlaceType.DISTRICT) elif ITEM_BOROUGH == instance_of: place.set_type(PlaceType.BOROUGH) elif ITEM_TOWN == instance_of: place.set_type(PlaceType.TOWN) elif ITEM_LARGE_VILLAGE == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_VILLAGE == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_URBAN_AREA_IN_SWEDEN == instance_of: place.set_type(PlaceType.VILLAGE) elif ITEM_HAMLET == instance_of: place.set_type(PlaceType.HAMLET) elif ITEM_FARM == instance_of: place.set_type(PlaceType.FARM) elif ITEM_BUILDING == instance_of: place.set_type(PlaceType.BUILDING) if PROPERTY_COORDINATE_LOCATION in claims_groups: for claim in claims_groups[PROPERTY_COORDINATE_LOCATION]: datavalue = claim.mainsnak.datavalue place.set_latitude(str(datavalue.value['latitude'])) place.set_longitude(str(datavalue.value['longitude'])) extract_located_in(claims_groups, PROPERTY_LOCATED_IN_PRESENT, parents) extract_located_in(claims_groups, PROPERTY_LOCATED_IN_ADM, parents) extract_located_in(claims_groups, PROPERTY_LOCATED, parents) return place, parents
try: personDict = get_entity_dict_from_api(name) # Insert QCode here except: missingCodes = open("missingCodes.txt", "a") missingCodes.write(name + '\n') continue person = WikidataItem(personDict) claim_groups = person.get_truthy_claim_groups( ) # Gets a person's different Wikidata attributes try: eduGroups = claim_groups[ "P69"] # Grabs person's education from those attributes foundCount += 1 except: print(str(cnt) + ".", "Education not there for", person.get_label()) missingCount += 1 if (cnt % 10 == 0): readyToAppend = True continue eduEntries = len(eduGroups) # How many different entries there are print(str(cnt) + ".", "Writing education for", person.get_label()) eduList = [] for i in range(eduEntries): try: eduEntry = eduGroups[i] # Get list entry qid = eduEntry.mainsnak.datavalue.value["id"] # Get its ID eduValue = WikidataItem( get_entity_dict_from_api(qid)) # Get its actual name eduList.append(eduValue.get_label())