Esempio n. 1
0
    def test_get_label_1(self) -> None:
        """Assert correct behavior in get_label method."""
        q42_dict = _load_item_dict(typedefs.ItemId("Q42"))
        en_label = q42_dict["labels"][EN]["value"]
        de_label = q42_dict["labels"][DE]["value"]
        item = WikidataItem(q42_dict)

        assert item.get_label() == en_label
        assert item.get_label(lang=EN) == en_label
        assert item.get_label(lang=DE) == de_label
        assert item.get_label(lang=NO) == ""

        p279_dict = _load_property_dict(typedefs.PropertyId("P279"))
        en_label = p279_dict["labels"][EN]["value"]
        de_label = p279_dict["labels"][DE]["value"]
        prop = WikidataProperty(p279_dict)

        assert prop.get_label() == en_label
        assert prop.get_label(lang=EN) == en_label
        assert prop.get_label(lang=DE) == de_label
        assert prop.get_label(lang=NO) == ""
def main(search_term):
    wikipedia = MediaWiki(lang='pap', user_agent='code-for-nl-pap-parser')
    wikidata = MediaWiki(url='https://www.wikidata.org/w/api.php',
                         user_agent='code-for-nl-pap-parser')

    search_result = wikipedia.search(search_term, results=4)

    for result_item in search_result:
        page = wikipedia.page(result_item)
        print(
            'I found page \'%s\' for term \'%s\'' % (result_item, search_term),
            'with categories', '/'.join(page.categories),
            'https://pap.wikipedia.org/wiki/' +
            urllib.parse.quote(result_item))
        # print(page.images)

        # Now I am going to search this one on wikidata, this will return a code. like Q215887
        search_data = wikidata.search(result_item, results=1)

        for data_item in search_data:
            Q_CODE = data_item
            print(result_item, 'is known on wikidata with the code', Q_CODE,
                  'https://www.wikidata.org/wiki/' + Q_CODE)
            # Now try the qwikidata interface
            entity = get_entity_dict_from_api(Q_CODE)
            q = WikidataItem(entity)
            pap_data_label = q.get_label(lang='pap')
            nl_data_label = q.get_label(lang='nl')
            if pap_data_label and nl_data_label:
                # First get the page. Read the images found
                data_page = wikidata.page(result_item)
                # print(data_page.images)

                print(pap_data_label, 'is called', nl_data_label, 'in dutch')
            elif pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch!')
            elif not pap_data_label and nl_data_label:
                print(Q_CODE, 'does not match papiamentu entry')
            elif not pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch or papiamentu!')
Esempio n. 3
0
    def is_matched(q: WikidataItem) -> bool:
        # 確認是否有中文名
        if q.get_label("zh") == "":
            print(f'Skip, no zh label: {q.get_enwiki_title()}')
            return False

        # entity不能是人
        cg = q.get_claim_group("P31")  # P31:instance_of
        instanceof = [c.mainsnak.datavalue.value['id'] for c in cg]
        if "Q5" in instanceof:  # Q5:human
            print(f'Skip, is a person: {q.get_enwiki_title()}')
            return False

        # entity不能有位置claim
        cg = q.get_claim_group("P625")  # P625:coordinate_location
        if cg.property_id is not None:
            print(f'Skip, has coordinate location: {q.get_enwiki_title()}')
            return False
        return True
Esempio n. 4
0
def get_place_from_wikidata(entity_id):
    parents = set()
    entity = WikidataItem(get_entity_dict_from_api(entity_id))
    claims_groups = entity.get_truthy_claim_groups()
    place = Place()
    place.set_gramps_id(entity_id)

    name = PlaceName()
    name.set_language('sv')
    name.set_value(entity.get_label('sv'))
    place.set_name(name=name)

    place.set_title(entity.get_label('sv'))
    for lang in ['sv', 'en', 'de', 'fi', 'no', 'nn', 'da', 'se']:
        wiki_name = entity.get_label(lang)
        if len(wiki_name):
            place_name = PlaceName()
            place_name.set_language(lang)
            place_name.set_value(wiki_name)
            place.add_alternative_name(name=place_name)
            for alias in entity.get_aliases(lang):
                alt_name = PlaceName()
                alt_name.set_language(lang)
                alt_name.set_value(alias)
                place.add_alternative_name(name=alt_name)

        for link in entity.get_sitelinks(lang).values():
            wikipedia_url = Url()
            wikipedia_url.set_path(link['url'])
            wikipedia_url.set_type('Wikipedia entry')
            wikipedia_url.set_description('Wikipedia %s:%s' %
                                          (link["title"], link["site"]))
            place.add_url(wikipedia_url)

    # Instance of -> PlaceType
    if PROPERTY_INSTANCE_OF in claims_groups:
        for claim in claims_groups[PROPERTY_INSTANCE_OF]:
            instance_of = claim.mainsnak.datavalue.value['id']
            if ITEM_PARISH == instance_of:
                place.set_type(PlaceType.PARISH)
            elif ITEM_SOCKEN == instance_of:
                place.set_type(PlaceType.PARISH)
            elif ITEM_ISLAND == instance_of:
                place.set_type(PlaceType.UNKNOWN)  # No islands in Gramps
            elif ITEM_MUNICIPALITY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.MUNICIPALITY)
            elif ITEM_MUNICIPALITY == instance_of:
                place.set_type(PlaceType.MUNICIPALITY)
            elif ITEM_COUNTRY == instance_of:
                place.set_type(PlaceType.COUNTRY)
            elif ITEM_SOVEREIGN_STATE == instance_of:
                place.set_type(PlaceType.COUNTRY)
            elif ITEM_STATE_OF_US == instance_of:
                place.set_type(PlaceType.STATE)
            elif ITEM_FEDERAL_STATE == instance_of:
                place.set_type(PlaceType.STATE)
            elif ITEM_COUNTY == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_COUNTY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_FORMER_COUNTY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_PROVINCE_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.PROVINCE)
            elif ITEM_PROVINCE == instance_of:
                place.set_type(PlaceType.PROVINCE)
            elif ITEM_ADM_REGION == instance_of:
                place.set_type(PlaceType.REGION)
            elif ITEM_NEIGHBORHOOD == instance_of:
                place.set_type(PlaceType.NEIGHBORHOOD)
            elif ITEM_DISTRICT == instance_of:
                place.set_type(PlaceType.DISTRICT)
            elif ITEM_BOROUGH == instance_of:
                place.set_type(PlaceType.BOROUGH)
            elif ITEM_TOWN == instance_of:
                place.set_type(PlaceType.TOWN)
            elif ITEM_LARGE_VILLAGE == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_VILLAGE == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_URBAN_AREA_IN_SWEDEN == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_HAMLET == instance_of:
                place.set_type(PlaceType.HAMLET)
            elif ITEM_FARM == instance_of:
                place.set_type(PlaceType.FARM)
            elif ITEM_BUILDING == instance_of:
                place.set_type(PlaceType.BUILDING)

    if PROPERTY_COORDINATE_LOCATION in claims_groups:
        for claim in claims_groups[PROPERTY_COORDINATE_LOCATION]:
            datavalue = claim.mainsnak.datavalue
            place.set_latitude(str(datavalue.value['latitude']))
            place.set_longitude(str(datavalue.value['longitude']))

    extract_located_in(claims_groups, PROPERTY_LOCATED_IN_PRESENT, parents)
    extract_located_in(claims_groups, PROPERTY_LOCATED_IN_ADM, parents)
    extract_located_in(claims_groups, PROPERTY_LOCATED, parents)

    return place, parents
Esempio n. 5
0
    try:
        personDict = get_entity_dict_from_api(name)  # Insert QCode here
    except:
        missingCodes = open("missingCodes.txt", "a")
        missingCodes.write(name + '\n')
        continue
    person = WikidataItem(personDict)

    claim_groups = person.get_truthy_claim_groups(
    )  # Gets a person's different Wikidata attributes
    try:
        eduGroups = claim_groups[
            "P69"]  # Grabs person's education from those attributes
        foundCount += 1
    except:
        print(str(cnt) + ".", "Education not there for", person.get_label())
        missingCount += 1
        if (cnt % 10 == 0):
            readyToAppend = True
        continue
    eduEntries = len(eduGroups)  # How many different entries there are

    print(str(cnt) + ".", "Writing education for", person.get_label())
    eduList = []
    for i in range(eduEntries):
        try:
            eduEntry = eduGroups[i]  # Get list entry
            qid = eduEntry.mainsnak.datavalue.value["id"]  # Get its ID
            eduValue = WikidataItem(
                get_entity_dict_from_api(qid))  # Get its actual name
            eduList.append(eduValue.get_label())