Esempio n. 1
0
def get_wikidata_item(id_):
    d = get_entity_dict_from_api(id_)
    entity = {
        'item': WikidataItem,
        'property': WikidataProperty,
        'lexeme': WikidataLexeme
    }
    return entity[d['type']](d)
Esempio n. 2
0
def wikidata_get_sitelink(id):
    try:
        entity = get_entity_dict_from_api(id)

        if "sitelinks" in entity and "enwiki" in entity["sitelinks"]:
            return {
                "title": entity["sitelinks"]["enwiki"]["title"],
                "sitelink": entity["sitelinks"]["enwiki"]["url"]
            }
    except LdiResponseNotOk as exception:
        print(exception)
        return {"error": "Failed to get sitelink"}

    return {"error": "Unknown Error"}
Esempio n. 3
0
def fill(X):
    name = X['name']
    if name is NaN:
        if 'official_name' in X['tags']:
            name = X['tags']['official_name']
        elif 'operator' in X['tags']:
            name = X['tags']['operator']
        elif 'brand:wikidata' in X['tags']:
            wikidata = X['tags']['brand:wikidata']
            q_dict = get_entity_dict_from_api(wikidata)
            name = WikidataItem(q_dict).get_label()
        elif 'brand:wikipedia' in X['tags']:
            wikipedia = X['tags']['brand:wikipedia']
            name = wikipedia[3:]
    return name
Esempio n. 4
0
def get_start(QID):
    """
    Получение даты начала срока правления по QID в нужном формате

    :param QID: qID личности
    :return: дата начала правления
    """
    person_dict = get_entity_dict_from_api(QID)
    if 'qualifiers' in person_dict['claims']['P39'][0] and 'P580' in person_dict['claims']['P39'][0]['qualifiers']:
        start_time = str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[1:11]
        if start_time[5:] == '00-00':
            start_time = start_time[:4]
        elif start_time[8:] == '00':
            start_time = start_time[:7]
        if len(start_time) == 4:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-':
                start_time = '-' + start_time
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
            else:
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
        elif len(start_time) == 7:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-':
                start_time = '-' + arrow.get(start_time, 'YYYY-MM').format('MM.YYYY')
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
            else:
                start_time = arrow.get(start_time, 'YYYY-MM').format('MM.YYYY')
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
        elif len(start_time) == 10:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P580'][0]['datavalue']['value']['time'])[0] == '-':
                start_time = '-' + arrow.get(start_time, 'YYYY-MM-DD').format('DD.MM.YYYY')
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
            else:
                start_time = arrow.get(start_time, 'YYYY-MM-DD').format('DD.MM.YYYY')
                start_time = start_time[:start_time.rfind('.') + 1] + start_time[start_time.rfind('.') + 1:].replace(
                    '0', '')
                return start_time
    return 'нет данных'
Esempio n. 5
0
 def retrieveRelatedEntities(self, entityId, limit=None):
     entity_dict = get_entity_dict_from_api(entityId)
     list_of_entities = []
     claims = entity_dict["claims"]
     #Format of json data is quite nested
     for i in claims:
         for item in claims[i]:
             if "mainsnak" in item:
                 if item["mainsnak"]["datatype"] == "wikibase-item":
                     datavalue = item["mainsnak"]["datavalue"]
                     if datavalue["type"] == "wikibase-entityid":
                         entity = datavalue["value"]["id"]
                         list_of_entities.append(entity)
                         if limit is not None and len(
                                 list_of_entities) == limit:
                             return list_of_entities
     return list_of_entities
Esempio n. 6
0
    def get_boroughs(self):
        """

        Venice Q641 has  6 boroughs
            - Cannaregio (including San Michele),
            - San Polo,
            - Dorsoduro (including Giudecca and Sacca Fisola),
            - Santa Croce,
            - San Marco (including San Giorgio Maggiore) and
            - Castello (including San Pietro di Castello and Sant'Elena).
         https://www.wikidata.org/wiki/Property:P150        contains administrative territorial entity
        :return:
        """
        bourough_ids = []
        bouroughs = []

        property = self.entity.get("claims").get("P150")
        # @Todo Q_TRIER = "Q3138" has NO P150
        if property is None:

            print(self.entity_id, "has no P150")
            lat, lon = self.get_coordinate_location()
            bouroughs.append({"Name": self.get_name(), "Lat": lat, "Lon": lon})
            return bouroughs

        for item in property:
            entity = item.get("mainsnak").get("datavalue").get('value').get(
                'id')
            bourough_ids.append(entity)

        for entity_id in bourough_ids:
            entity = get_entity_dict_from_api(entity_id)
            english_label = entity.get('labels').get("en")
            if None is english_label:
                key = next(iter(entity.get('labels')))
                bourough_name = entity.get('labels').get(key).get("value")
            else:
                bourough_name = entity.get('labels').get("en").get("value")
            property = entity.get("claims").get("P625")[0].get('mainsnak').get(
                'datavalue').get('value')
            lat, lon = property.get('latitude'), property.get('longitude')

            bouroughs.append({"Name": bourough_name, "Lat": lat, "Lon": lon})

        return bouroughs
Esempio n. 7
0
async def get_fact(query, args, tokenizer, trex_set, common_vocab, f_out):
    """
    Collect more facts for the TREx-train set from LPAQA
    """
    line = query.strip().split('\t')
    sub_url, sub, obj_url, obj = line
    sub_id = get_id_from_url(sub_url)
    obj_id = get_id_from_url(obj_url)

    # First, make sure fact is not in TREx test set
    if (sub_id, obj_id) in trex_set:
        return

    # Make sure object is a single token
    if len(tokenizer.tokenize(obj)) != 1:
        return

    # Make sure object is in common vocab subset
    if obj not in common_vocab:
        return

    # Make sure subject is prominent (has a Wikipedia page)
    try:
        q_dict = get_entity_dict_from_api(sub_id)
        q = WikidataItem(q_dict)
        if not q.get_sitelinks():
            return
    except ValueError:
        return

    # Some entities don't have labels so the subject label is the URI
    if sub_id == sub:
        return

    # print('Writing fact: {} - {}', sub, obj)
    f_out.write(
        json.dumps({
            'sub_uri': sub_id,
            'obj_uri': obj_id,
            'sub_label': sub,
            'obj_label': obj
        }) + '\n')

    # Increment global count
    await increment_count()
Esempio n. 8
0
def get_end(QID):
    """
    Получение даты окончания срока правления по qID в нужном формате
    
    :param QID: qID личности
    :return: дата окончания правления
    """
    person_dict = get_entity_dict_from_api(QID)
    if 'qualifiers' in person_dict['claims']['P39'][0] and 'P582' in person_dict['claims']['P39'][0]['qualifiers']:
        end_time = str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[1:11]
        if end_time[5:] == '00-00':
            end_time = end_time[:4]
        elif end_time[8:] == '00':
            end_time = end_time[:7]
        if len(end_time) == 4:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-':
                end_time = '-' + end_time
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
            else:
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
        elif len(end_time) == 7:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-':
                end_time = '-' + arrow.get(end_time, 'YYYY-MM').format('MM.YYYY')
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
            else:
                end_time = arrow.get(end_time, 'YYYY-MM').format('MM.YYYY')
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
        elif len(end_time) == 10:
            if str(person_dict['claims']['P39'][0]['qualifiers']['P582'][0]['datavalue']['value']['time'])[0] == '-':
                end_time = '-' + arrow.get(end_time, 'YYYY-MM-DD').format('DD.MM.YYYY')
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
            else:
                end_time = arrow.get(end_time, 'YYYY-MM-DD').format('DD.MM.YYYY')
                end_time = end_time[:end_time.rfind('.') + 1] + end_time[end_time.rfind('.') + 1:].replace('0', '')
                return end_time
    else:
        return 'нет данных'
def main(search_term):
    wikipedia = MediaWiki(lang='pap', user_agent='code-for-nl-pap-parser')
    wikidata = MediaWiki(url='https://www.wikidata.org/w/api.php',
                         user_agent='code-for-nl-pap-parser')

    search_result = wikipedia.search(search_term, results=4)

    for result_item in search_result:
        page = wikipedia.page(result_item)
        print(
            'I found page \'%s\' for term \'%s\'' % (result_item, search_term),
            'with categories', '/'.join(page.categories),
            'https://pap.wikipedia.org/wiki/' +
            urllib.parse.quote(result_item))
        # print(page.images)

        # Now I am going to search this one on wikidata, this will return a code. like Q215887
        search_data = wikidata.search(result_item, results=1)

        for data_item in search_data:
            Q_CODE = data_item
            print(result_item, 'is known on wikidata with the code', Q_CODE,
                  'https://www.wikidata.org/wiki/' + Q_CODE)
            # Now try the qwikidata interface
            entity = get_entity_dict_from_api(Q_CODE)
            q = WikidataItem(entity)
            pap_data_label = q.get_label(lang='pap')
            nl_data_label = q.get_label(lang='nl')
            if pap_data_label and nl_data_label:
                # First get the page. Read the images found
                data_page = wikidata.page(result_item)
                # print(data_page.images)

                print(pap_data_label, 'is called', nl_data_label, 'in dutch')
            elif pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch!')
            elif not pap_data_label and nl_data_label:
                print(Q_CODE, 'does not match papiamentu entry')
            elif not pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch or papiamentu!')
Esempio n. 10
0
def save_label(Q, label):
    data = get_entity_dict_from_api(Q)
    if 'labels' in data and 'hy' in data['labels']:
        WikiDataItems.query.filter_by(q=Q).delete()
        db.session.commit()
        return None
    auth1, csrf_token = get_csrf_token()
    response = requests.post("https://www.wikidata.org/w/api.php",
                             data={
                                 "action": "wbsetlabel",
                                 "token": csrf_token,
                                 "format": "json",
                                 "id": Q,
                                 "language": "hy",
                                 "value": label
                             },
                             auth=auth1)
    if response and 'success' in response.json() and response.json(
    )['success']:
        WikiDataItems.query.filter_by(q=Q).delete()
        db.session.commit()
    return csrf_token, response
Esempio n. 11
0
def get_labels(username, rec=10):
    if rec < 0:
        return None, None
    Q = str(random.choice(WikiDataItems.query.all()))
    if Done.query.filter_by(username=username, q=Q).all():
        return get_labels(username, rec - 1)
    dict = get_entity_dict_from_api(Q)
    labels = []
    hydesc = 'հայերեն նկարագրություն չկա'
    if 'labels' in dict:
        if 'hy' in dict['labels']:
            return get_labels(username, rec - 1)
        if 'hy' in dict['descriptions']:
            hydesc = dict['descriptions']['hy']['value']
        for lang in dict['labels']:
            temp = dict['labels'][lang]
            if 'descriptions' in dict and lang in dict['descriptions']:
                temp['description'] = dict['descriptions'][lang]['value']
            if 'sitelinks' in dict and lang + 'wiki' in dict['sitelinks']:
                temp['url'] = dict['sitelinks'][lang + 'wiki']['url']
            labels.append(dict['labels'][lang])
    return Q, labels, hydesc
Esempio n. 12
0
def get_place_from_wikidata(entity_id):
    parents = set()
    entity = WikidataItem(get_entity_dict_from_api(entity_id))
    claims_groups = entity.get_truthy_claim_groups()
    place = Place()
    place.set_gramps_id(entity_id)

    name = PlaceName()
    name.set_language('sv')
    name.set_value(entity.get_label('sv'))
    place.set_name(name=name)

    place.set_title(entity.get_label('sv'))
    for lang in ['sv', 'en', 'de', 'fi', 'no', 'nn', 'da', 'se']:
        wiki_name = entity.get_label(lang)
        if len(wiki_name):
            place_name = PlaceName()
            place_name.set_language(lang)
            place_name.set_value(wiki_name)
            place.add_alternative_name(name=place_name)
            for alias in entity.get_aliases(lang):
                alt_name = PlaceName()
                alt_name.set_language(lang)
                alt_name.set_value(alias)
                place.add_alternative_name(name=alt_name)

        for link in entity.get_sitelinks(lang).values():
            wikipedia_url = Url()
            wikipedia_url.set_path(link['url'])
            wikipedia_url.set_type('Wikipedia entry')
            wikipedia_url.set_description('Wikipedia %s:%s' %
                                          (link["title"], link["site"]))
            place.add_url(wikipedia_url)

    # Instance of -> PlaceType
    if PROPERTY_INSTANCE_OF in claims_groups:
        for claim in claims_groups[PROPERTY_INSTANCE_OF]:
            instance_of = claim.mainsnak.datavalue.value['id']
            if ITEM_PARISH == instance_of:
                place.set_type(PlaceType.PARISH)
            elif ITEM_SOCKEN == instance_of:
                place.set_type(PlaceType.PARISH)
            elif ITEM_ISLAND == instance_of:
                place.set_type(PlaceType.UNKNOWN)  # No islands in Gramps
            elif ITEM_MUNICIPALITY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.MUNICIPALITY)
            elif ITEM_MUNICIPALITY == instance_of:
                place.set_type(PlaceType.MUNICIPALITY)
            elif ITEM_COUNTRY == instance_of:
                place.set_type(PlaceType.COUNTRY)
            elif ITEM_SOVEREIGN_STATE == instance_of:
                place.set_type(PlaceType.COUNTRY)
            elif ITEM_STATE_OF_US == instance_of:
                place.set_type(PlaceType.STATE)
            elif ITEM_FEDERAL_STATE == instance_of:
                place.set_type(PlaceType.STATE)
            elif ITEM_COUNTY == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_COUNTY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_FORMER_COUNTY_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.COUNTY)
            elif ITEM_PROVINCE_OF_SWEDEN == instance_of:
                place.set_type(PlaceType.PROVINCE)
            elif ITEM_PROVINCE == instance_of:
                place.set_type(PlaceType.PROVINCE)
            elif ITEM_ADM_REGION == instance_of:
                place.set_type(PlaceType.REGION)
            elif ITEM_NEIGHBORHOOD == instance_of:
                place.set_type(PlaceType.NEIGHBORHOOD)
            elif ITEM_DISTRICT == instance_of:
                place.set_type(PlaceType.DISTRICT)
            elif ITEM_BOROUGH == instance_of:
                place.set_type(PlaceType.BOROUGH)
            elif ITEM_TOWN == instance_of:
                place.set_type(PlaceType.TOWN)
            elif ITEM_LARGE_VILLAGE == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_VILLAGE == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_URBAN_AREA_IN_SWEDEN == instance_of:
                place.set_type(PlaceType.VILLAGE)
            elif ITEM_HAMLET == instance_of:
                place.set_type(PlaceType.HAMLET)
            elif ITEM_FARM == instance_of:
                place.set_type(PlaceType.FARM)
            elif ITEM_BUILDING == instance_of:
                place.set_type(PlaceType.BUILDING)

    if PROPERTY_COORDINATE_LOCATION in claims_groups:
        for claim in claims_groups[PROPERTY_COORDINATE_LOCATION]:
            datavalue = claim.mainsnak.datavalue
            place.set_latitude(str(datavalue.value['latitude']))
            place.set_longitude(str(datavalue.value['longitude']))

    extract_located_in(claims_groups, PROPERTY_LOCATED_IN_PRESENT, parents)
    extract_located_in(claims_groups, PROPERTY_LOCATED_IN_ADM, parents)
    extract_located_in(claims_groups, PROPERTY_LOCATED, parents)

    return place, parents
for monument in monument_list:
    count_mon += 1
    print(count_mon)
    if count_mon % 4 == 0:
        print("HELLO")
        partition_num = str(count_mon / 4).split('.')[0]
        with open(
                '../../Downloads/hi_monument_english_labels' + partition_num +
                '.json', 'w') as fout:
            json.dump(complete_final_monument_list, fout)
        complete_final_monument_list = []
        print("Checkpoint %d reached, JSON dumps saved |" % (count_mon / 4))

    for key, val in monument.items():
        if key == "title":
            monument['title'] = WikidataItem(get_entity_dict_from_api(
                str(val))).get_label()
        elif key == "id":
            monument['id'] = WikidataItem(get_entity_dict_from_api(
                str(val))).get_label()

    for key in monument['claims'].keys():
        if (key == "P727"):
            continue
        else:
            property_list.append(
                WikidataProperty(get_entity_dict_from_api(key)).get_label())
    final_monument_list = dict(
        zip(property_list, list(monument['claims'].values())))
    monument['claims'].clear()
    monument['claims'].update(final_monument_list)
    property_list = []
Esempio n. 14
0
#!/usr/bin/env python3
"""Query the Wikidata query service for a given entity QID (or property PID)
and get back entity information as JSON.

The returned JSON is equivalent to the JSON data that is available in
Wikidata entity dumps (available from https://dumps.wikimedia.org/wikidatawiki/entities/),
but this may be more convient for accessing individual records.

"""
import json

from functools import lru_cache

from qwikidata.linked_data_interface import get_entity_dict_from_api

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=__doc__)
    parser.add_argument('id', help='Wikidata identifier')
    args = parser.parse_args()
    print(json.dumps(get_entity_dict_from_api(args.id), ensure_ascii=False))
Esempio n. 15
0
    for file in glob.glob('data/cities/*.json'):
        with open(file, 'r') as fin:
            datar = json.load(fin)
            for i in datar:
                data.append(i)
    fout.write(json.dumps(data, indent=2))

with open('data/cities.json', 'r') as cities, open('data/countries.json',
                                                   'r') as countries:
    cities_data, countries_data = json.load(cities), json.load(countries)
    keys = []
    for city in data:
        if str(city['id']) not in cities_data:
            print(city['id'])
            wikidata_id = 'Q' + str(city['id'])
            wikidata_info = get_entity_dict_from_api(wikidata_id)
            name = wikidata_info['labels']['en']['value']
            coords = wikidata_info['claims']['P625'][0]['mainsnak'][
                'datavalue']['value']
            lat, lon = coords['latitude'], coords['longitude']
            cities_data[str(city['id'])] = {
                'name': name,
                'lat': lat,
                'lon': lon
            }
        for country in city['data'].values():
            if str(country) in countries_data:
                continue
            print(country)
            wikidata_id = 'Q' + str(country)
            wikidata_info = get_entity_dict_from_api(wikidata_id)
Esempio n. 16
0
f = open("codeList11.txt", "r")
codes = f.read().splitlines()
codeCount = 0
for line in codes:
    codeList.append(line)
    codeCount += 1

print('Processed', codeCount, 'Q-codes.')
timeStart = perf_counter()

cnt = 0
for name in codeList:
    cnt += 1

    try:
        personDict = get_entity_dict_from_api(name)  # Insert QCode here
    except:
        missingCodes = open("missingCodes.txt", "a")
        missingCodes.write(name + '\n')
        continue
    person = WikidataItem(personDict)

    claim_groups = person.get_truthy_claim_groups(
    )  # Gets a person's different Wikidata attributes
    try:
        eduGroups = claim_groups[
            "P69"]  # Grabs person's education from those attributes
        foundCount += 1
    except:
        print(str(cnt) + ".", "Education not there for", person.get_label())
        missingCount += 1
    monument_labelled_prop_val = {}
    list_prop_value = monument['claims']
    list_properties = list(list_prop_value.keys())

    #Removing Properties from list of properties which dont have a wikidata page
    for prop in non_labelled_props:
        if prop in list_properties:
            list_properties.remove(prop)

    list_properties_copy = list_properties

    #Adding all properties to label list. The ones which dont have wikidata pages are stored in non_labelled_props
    for prop in list_properties:
        if prop not in label_list.keys():
            try:
                prop_details = get_entity_dict_from_api(prop)
                if 'hi' in prop_details['labels'].keys():
                    prop_label = prop_details['labels']['hi']['value']
                else:
                    prop_label = str_translator(
                        prop_details['labels']['en']['value'])
                label_list[prop] = prop_label
            except:
                non_labelled_props.append(prop)
                list_properties_copy.remove(prop)

    list_properties = list_properties_copy

    #For all values per property, label is extracted for each value ID [Q##### format]
    for prop in list_properties:
Esempio n. 18
0
 def __init__(self, entity_id):
     self.entity_id = entity_id
     self.entity = get_entity_dict_from_api(entity_id)
Esempio n. 19
0
        visited.add(line.strip())

current = ''
for idx, (word, cid) in enumerate(sorted(words.items(), key=lambda x: x[1])):
    try:
        if cid not in visited:
            visited.add(cid)
            new_query = query.format(word)
            print(len(findings))
            print('{0} ...'.format(word))
            res_ = return_sparql_query_results(new_query)
            res = res_['results']['bindings']
            for r in res:
                key = r['lexemeId']['value'].split('/')[-1]
                #print(r['lexemeId'], key)
                dct = get_entity_dict_from_api(key)
                if dct['senses']:
                    this_sense = dct['senses'][0]['glosses'].get(
                        'en', {'value': ''})['value']
                    if this_sense:
                        findings[cid, concepticon.conceptsets[cid].gloss] += [
                            (key, this_sense, word)
                        ]
                        print('... ', this_sense)
    except Exception as e:
        print(e)
    if cid != current:
        current = cid
        cgl = concepticon.conceptsets[cid].gloss
        if (cid, cgl) in findings:
            with open('wikidata.tsv', 'a') as f:
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api

# create an item representing "Douglas Adams"
Q_DOUGLAS_ADAMS = "Q42"
q42_dict = get_entity_dict_from_api(Q_DOUGLAS_ADAMS)
q42 = WikidataItem(q42_dict)

# create a property representing "subclass of"
P_SUBCLASS_OF = "P279"
p279_dict = get_entity_dict_from_api(P_SUBCLASS_OF)
p279 = WikidataProperty(p279_dict)

# create a lexeme representing "bank"
L_BANK = "L3354"
l3354_dict = get_entity_dict_from_api(L_BANK)
l3354 = WikidataLexeme(l3354_dict)
# Get entity ids from api
from qwikidata.linked_data_interface import get_entity_dict_from_api
from qwikidata.entity import WikidataItem, WikidataProperty, WikidataLexeme
from tqdm import tqdm
import pickle
wikidata = pickle.load(open('/home/keshav/olpbench/wikidata_ids.pkl','rb'))
english_labels_nf, items_nf = set(), set()
labelsD = dict()
for item in tqdm(wikidata):
    try:
        entity_dict = get_entity_dict_from_api(item)
    except:
        items_nf.add(item)
        continue
    if 'en' not in entity_dict['labels']:
        english_labels_nf.add(item)
        continue
    labelsD[item] = entity_dict['labels']['en']['value']

# Get entity ids from json dump
import pickle
from tqdm import tqdm
from qwikidata.json_dump import WikidataJsonDump

wjd = WikidataJsonDump("/home/keshav/wikidata-20201109-all.json.bz2")
namesD = dict()
not_found_english_label = 0
for item in tqdm(wjd):
    entity_id =item['id']
    if 'en' not in item['labels']:
Esempio n. 22
0
def getEntityInfo(eid):
    return get_entity_dict_from_api(eid)
Esempio n. 23
0
 def get_item(itemId):
     item = get_entity_dict_from_api(itemId)
     return item