def dbpedia_sparql_extract_people(people_list_file): # With help from https://rdflib.github.io/sparqlwrapper/ # and https://stackoverflow.com/questions/38332857/ # sparql-query-to-get-all-person-available-in-dbpedia-is-showing-only-some-person if os.path.exists(people_list_file): os.unlink(people_list_file) # total_people = dbpedia_sparql_get_people_count() total_people = 2109301 for i in range(0, total_people, 10000): people_list = [] offset = str(i) print("We're at {sofar} out of {total}".format(sofar=offset, total=total_people)) sparql_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbp: <http://dbpedia.org/property/> SELECT ?name WHERE { ?resource foaf:name ?name . ?resource rdf:type dbo:Person . } """ sparql_query_offset = "LIMIT 10000 OFFSET {}".format(offset) response = util.dbpedia_do_sparql_query(sparql_query + sparql_query_offset) results = response['results']['bindings'] people_list.extend([res['name']['value'] for res in results]) print("Adding {count} to people list file".format(count=len(results))) with open(people_list_file, 'a') as f: f.writelines("\n".join(people_list))
def dbpedia_sparql_get_people_count() -> int: sparql_query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbp: <http://dbpedia.org/property/> SELECT COUNT(*) WHERE { ?resource foaf:name ?name . ?resource rdf:type dbo:Person . } """ res = util.dbpedia_do_sparql_query(sparql_query) return int(res['results']['bindings'][0]['callret-0']['value'])