def query(self, msg, queryString: str, limit=None) -> list: ''' get the query result Args: msg(str): the profile message to display queryString(str): the query to execute Return: list: the list of dicts with the result ''' profile = Profiler(msg, profile=self.profile) wd = SPARQL(self.endpoint) limitedQuery = queryString if limit is not None: limitedQuery = f"{queryString} LIMIT {limit}" results = wd.query(limitedQuery) lod = wd.asListOfDicts(results) for record in lod: for key in list(record.keys()): value = record[key] if isinstance(value, str): if value.startswith("http://www.wikidata.org/"): record[key] = self.getWikidataId(value) if key.lower().endswith("coord"): lat, lon = Wikidata.getCoordinateComponents(value) record["lat"] = lat record["lon"] = lon record.pop(key) profile.time(f"({len(lod)})") return lod
def getCities(self, region=None, country=None): ''' get the cities from Wikidata ''' if region is not None: values = "VALUES ?region { wd:%s }" % region if country is not None: values = "VALUES ?country { wd:%s}" % country queryString = """# get a list of cities for the given region # for geograpy3 library # see https://github.com/somnathrakshit/geograpy3/issues/15 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wd: <http://www.wikidata.org/entity/> SELECT DISTINCT ?city ?cityLabel ?geoNameId ?cityPop ?cityCoord ?region ?regionLabel ?regionIsoCode ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGdpPerCapita WHERE { # administrative unit of first order # example DE-NW Q1198 %s #?region wdt:P31/wdt:P279* wd:Q10864048. ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). # isocode state/province OPTIONAL { ?region wdt:P300 ?regionIsoCode. } # country this region belongs to ?region wdt:P17 ?country . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # https://www.wikidata.org/wiki/Property:P297 ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. OPTIONAL { ?country wdt:P2132 ?countryGdpPerCapita. } # located in administrative territory # https://www.wikidata.org/wiki/Property:P131 ?city wdt:P131* ?region. # label of the City ?city rdfs:label ?cityLabel filter (lang(?cityLabel) = "en"). # instance of human settlement https://www.wikidata.org/wiki/Q486972 ?city wdt:P31/wdt:P279* wd:Q486972 . # geoName Identifier ?city wdt:P1566 ?geoNameId. # population of city OPTIONAL { ?city wdt:P1082 ?cityPop.} # get the coordinates OPTIONAL { select (max(?coord) as ?cityCoord) where { ?city wdt:P625 ?coord. } } } ORDER BY ?cityLabel""" % values wd = SPARQL(self.endpoint) results = wd.query(queryString) cityList = wd.asListOfDicts(results) return cityList
def fromWikiData(self,endpoint): ''' get the country List from WikiData Args: endpoint(string): the url of the endpoint to be used Returns: list: and sets it as self.countryList as a side effect ''' wd=SPARQL(endpoint) queryString=""" # get a list countries with the corresponding ISO code PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wikibase: <http://wikiba.se/ontology#> PREFIX p: <http://www.wikidata.org/prop/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> SELECT ?country ?countryLabel ?shortName (MAX(?pop) as ?population) ?gdpPerCapita ?coord ?isocode WHERE { # instance of country ?country wdt:P31 wd:Q3624078. OPTIONAL { ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). } OPTIONAL { ?country p:P1813 ?shortNameStmt. # get the short name statement ?shortNameStmt ps:P1813 ?shortName # the the short name value from the statement filter (lang(?shortName) = "en") # filter for English short names only filter not exists {?shortNameStmt pq:P31 wd:Q28840786} # ignore flags (aka emojis) } OPTIONAL { # get the population # https://www.wikidata.org/wiki/Property:P1082 ?country wdt:P1082 ?pop. } OPTIONAL { # get the gross domestic product per capita ?country wdt:P2132 ?gdpPerCapita. } # get the iso countryCode { ?country wdt:P297 ?isocode }. # get the coordinate OPTIONAL { ?country wdt:P625 ?coord }. } GROUP BY ?country ?countryLabel ?shortName ?population ?gdpPerCapita ?coord ?isocode ORDER BY ?countryLabel""" results=wd.query(queryString) self.countryList=wd.asListOfDicts(results) for country in self.countryList: country['wikidataurl']=country.pop('country') country['name']=country.pop('countryLabel') super().setNone(country,['shortName','gdpPerCapita']) return self.countryList
def fromRDF(self, endpoint): ''' retrieve my event list from the given SPARQL endpoint ''' # get SPARQL access to GND data print( "Retrieving %s events from SPARQL endpoint %s\n ... this might take a few minutes ..." % (self.em.title, endpoint)) starttime = time.time() gndEp = SPARQL(endpoint) queryString = """# get events with most often used columns from GND # plus acronym, topic, homepage (seldom but useful) # WF 2020-07-12 PREFIX gndi: <https://d-nb.info/gnd> PREFIX gnd: <https://d-nb.info/standards/elementset/gnd#> PREFIX gndo: <https://d-nb.info/standards/vocab/gnd/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX dc: <http://purl.org/dc/terms/> PREFIX wdrs: <http://www.w3.org/2007/05/powder-s#> SELECT ?event ?eventId ?acronym ?variant ?title ?date ?areaCode ?place ?topic ?homepage WHERE { ?event a gnd:ConferenceOrEvent. ?event gnd:gndIdentifier ?eventId. OPTIONAL { ?event gnd:abbreviatedNameForTheConferenceOrEvent ?acronym. } OPTIONAL { ?event gnd:variantNameForTheConferenceOrEvent ?variant.} OPTIONAL { ?event gnd:preferredNameForTheConferenceOrEvent ?title.} OPTIONAL { ?event gnd:dateOfConferenceOrEvent ?date. } OPTIONAL { ?event gnd:geographicAreaCode ?areaCode. } OPTIONAL { ?event gnd:placeOfConferenceOrEvent ?place. } OPTIONAL { ?event gnd:topic ?topic. } { ?event gnd:homepage ?homepage. } } #LIMIT 10000""" results = gndEp.query(queryString) eventList = gndEp.asListOfDicts(results) print("retrieved %d events in %6.1f s" % (len(eventList), time.time() - starttime)) for rawevent in eventList: rawevent['url'] = rawevent.pop('event') fields = [ 'eventId', 'variant', 'name', 'areaCode', 'url', 'source', 'date', 'startDate', 'endDate', 'year', 'place', 'acronym', 'lookupAcronym', 'topic', 'homepage' ] self.em.setNone(rawevent, fields) dateStr = rawevent['date'] for key, value in GND.getDateRange(dateStr).items(): rawevent[key] = value event = Event() event.fromDict(rawevent) event.source = self.em.name self.em.add(event) self.em.store(sampleRecordCount=10000)
def getCityPopulations(self, profile=True): ''' get the city populations from Wikidata Args: profile(bool): if True show profiling information ''' queryString = """ # get a list of human settlements having a geoName identifier # to add to geograpy3 library # see https://github.com/somnathrakshit/geograpy3/issues/15 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wd: <http://www.wikidata.org/entity/> SELECT ?city ?cityLabel ?cityPop ?geoNameId ?country ?countryLabel ?countryIsoCode ?countryPopulation WHERE { # geoName Identifier ?city wdt:P1566 ?geoNameId. # instance of human settlement https://www.wikidata.org/wiki/Q486972 ?city wdt:P31/wdt:P279* wd:Q486972 . # population of city OPTIONAL { ?city wdt:P1082 ?cityPop.} # label of the City ?city rdfs:label ?cityLabel filter (lang(?cityLabel) = "en"). # country this city belongs to ?city wdt:P17 ?country . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # https://www.wikidata.org/wiki/Property:P297 ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. OPTIONAL { ?country wdt:P2132 ?countryGdpPerCapita. } }""" if profile: print( "getting cities with population and geoNamesId from wikidata endpoint %s" % self.endpoint) starttime = time.time() wd = SPARQL(self.endpoint) results = wd.query(queryString) cityList = wd.asListOfDicts(results) if profile: print("Found %d cities in %5.1f s" % (len(cityList), time.time() - starttime)) return cityList
def fromWikiData(self,endpoint): ''' get the province List from WikiData Args: endpoint(string): the url of the endpoint to be used Returns: list: and sets it as self.provinceList as a side effect ''' wd=SPARQL(endpoint) queryString=""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wikibase: <http://wikiba.se/ontology#> SELECT ?region ?isocc ?isocode4 ?regionLabel ?population ?location WHERE { # administrative unit of first order ?region wdt:P31/wdt:P279* wd:Q10864048. OPTIONAL { ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). } # filter historic regions # FILTER NOT EXISTS {?region wdt:P576 ?end} # get the population # https://www.wikidata.org/wiki/Property:P1082 OPTIONAL { ?region wdt:P1082 ?population. } # # https://www.wikidata.org/wiki/Property:P297 OPTIONAL { ?region wdt:P297 ?isocc. } # isocode state/province ?region wdt:P300 ?isocode4. # https://www.wikidata.org/wiki/Property:P625 OPTIONAL { ?region wdt:P625 ?location. } } ORDER BY (?isocode4) """ results=wd.query(queryString) self.provinceList=wd.asListOfDicts(results) for province in self.provinceList: province['wikidataurl']=province.pop('region') province['name']=province.pop('regionLabel') super().setNone(province,['population','location']) return self.provinceList
def getRegions(self): ''' get Regions from Wikidata `try query <https://query.wikidata.org/#%23%20get%20a%20list%20of%20regions%0A%23%20for%20geograpy3%20library%0A%23%20see%20https%3A%2F%2Fgithub.com%2Fsomnathrakshit%2Fgeograpy3%2Fissues%2F15%0APREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E%0APREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E%0APREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3E%0ASELECT%20%3Fcountry%20%3FcountryLabel%20%3FcountryIsoCode%20%3Fregion%20%3FregionIsoCode%20%3FregionLabel%20%3Fpopulation%20%3Flocation%0AWHERE%0A%7B%0A%20%20%23%20administrative%20unit%20of%20first%20order%0A%20%20%3Fregion%20wdt%3AP31%2Fwdt%3AP279%2a%20wd%3AQ10864048.%0A%20%20OPTIONAL%20%7B%0A%20%20%20%20%20%3Fregion%20rdfs%3Alabel%20%3FregionLabel%20filter%20%28lang%28%3FregionLabel%29%20%3D%20%22en%22%29.%0A%20%20%7D%0A%20%20%23%20filter%20historic%20regions%0A%20%20%23%20FILTER%20NOT%20EXISTS%20%7B%3Fregion%20wdt%3AP576%20%3Fend%7D%0A%20%20%23%20get%20the%20population%0A%20%20%23%20https%3A%2F%2Fwww.wikidata.org%2Fwiki%2FProperty%3AP1082%0A%20%20OPTIONAL%20%7B%20%3Fregion%20wdt%3AP1082%20%3Fpopulation.%20%7D%0A%20%20%23%20%23%20https%3A%2F%2Fwww.wikidata.org%2Fwiki%2FProperty%3AP297%0A%20%20OPTIONAL%20%7B%20%0A%20%20%20%20%3Fregion%20wdt%3AP17%20%3Fcountry.%0A%20%20%20%20%23%20label%20for%20the%20country%0A%20%20%20%20%3Fcountry%20rdfs%3Alabel%20%3FcountryLabel%20filter%20%28lang%28%3FcountryLabel%29%20%3D%20%22en%22%29.%0A%20%20%20%20%3Fcountry%20wdt%3AP297%20%3FcountryIsoCode.%20%0A%20%20%7D%0A%20%20%23%20isocode%20state%2Fprovince%0A%20%20%3Fregion%20wdt%3AP300%20%3FregionIsoCode.%0A%20%20%23%20https%3A%2F%2Fwww.wikidata.org%2Fwiki%2FProperty%3AP625%0A%20%20OPTIONAL%20%7B%20%3Fregion%20wdt%3AP625%20%3Flocation.%20%7D%0A%7D>`_ ''' queryString = """# get a list of regions # for geograpy3 library # see https://github.com/somnathrakshit/geograpy3/issues/15 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX wikibase: <http://wikiba.se/ontology#> SELECT DISTINCT ?country ?countryLabel ?countryIsoCode ?region (max(?regionAlpha2) as ?regionIsoCode) ?regionLabel (max(?population) as ?regionPopulation) ?location WHERE { # administrative unit of first order ?region wdt:P31/wdt:P279* wd:Q10864048. OPTIONAL { ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). } # filter historic regions # FILTER NOT EXISTS {?region wdt:P576 ?end} # get the population # https://www.wikidata.org/wiki/Property:P1082 OPTIONAL { ?region wdt:P1082 ?population. } # # https://www.wikidata.org/wiki/Property:P297 OPTIONAL { ?region wdt:P17 ?country. # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). ?country wdt:P297 ?countryIsoCode. } # isocode state/province ?region wdt:P300 ?regionAlpha2. # https://www.wikidata.org/wiki/Property:P625 OPTIONAL { ?region wdt:P625 ?location. } } GROUP BY ?country ?countryLabel ?countryIsoCode ?region ?regionIsoCode ?regionLabel ?location ORDER BY ?regionIsoCode""" wd = SPARQL(self.endpoint) results = wd.query(queryString) self.regionList = wd.asListOfDicts(results)
def getCountries(self): ''' get a list of countries `try query <https://query.wikidata.org/#%23%20get%20a%20list%20of%20countries%0A%23%20for%20geograpy3%20library%0A%23%20see%20https%3A%2F%2Fgithub.com%2Fsomnathrakshit%2Fgeograpy3%2Fissues%2F15%0APREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E%0APREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E%0APREFIX%20p%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2F%3E%0APREFIX%20ps%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fstatement%2F%3E%0APREFIX%20pq%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fqualifier%2F%3E%0A%23%20get%20City%20details%20with%20Country%0ASELECT%20DISTINCT%20%3Fcountry%20%3FcountryLabel%20%3FcountryIsoCode%20%3FcountryPopulation%20%3FcountryGDP_perCapita%20%3Fcoord%20%20WHERE%20%7B%0A%20%20%23%20instance%20of%20City%20Country%0A%20%20%3Fcountry%20wdt%3AP31%2Fwdt%3AP279%2a%20wd%3AQ3624078%20.%0A%20%20%23%20label%20for%20the%20country%0A%20%20%3Fcountry%20rdfs%3Alabel%20%3FcountryLabel%20filter%20%28lang%28%3FcountryLabel%29%20%3D%20%22en%22%29.%0A%20%20%23%20get%20the%20coordinates%0A%20%20%3Fcountry%20wdt%3AP625%20%3Fcoord.%0A%20%20%23%20https%3A%2F%2Fwww.wikidata.org%2Fwiki%2FProperty%3AP297%20ISO%203166-1%20alpha-2%20code%0A%20%20%3Fcountry%20wdt%3AP297%20%3FcountryIsoCode.%0A%20%20%23%20population%20of%20country%0A%20%20%3Fcountry%20wdt%3AP1082%20%3FcountryPopulation.%0A%20%20%23%20https%3A%2F%2Fwww.wikidata.org%2Fwiki%2FProperty%3AP2132%0A%20%20%23%20nonminal%20GDP%20per%20capita%0A%20%20%3Fcountry%20wdt%3AP2132%20%3FcountryGDP_perCapita.%0A%7D>`_ ''' queryString = """# get a list of countries # for geograpy3 library # see https://github.com/somnathrakshit/geograpy3/issues/15 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX p: <http://www.wikidata.org/prop/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> # get City details with Country SELECT DISTINCT ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGDP_perCapita ?countryCoord WHERE { # instance of Country ?country wdt:P31/wdt:P279* wd:Q6256 . # VALUES ?country { wd:Q55}. # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # get the coordinates OPTIONAL { select (max(?coord) as ?countryCoord) where { ?country wdt:P625 ?coord. } } # https://www.wikidata.org/wiki/Property:P297 ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. # https://www.wikidata.org/wiki/Property:P2132 # nominal GDP per capita OPTIONAL { ?country wdt:P2132 ?countryGDP_perCapita. } } ORDER BY ?countryIsoCode""" wd = SPARQL(self.endpoint) results = wd.query(queryString) self.countryList = wd.asListOfDicts(results)
def testWikdata(self): ''' check wikidata ''' # check we have local wikidata copy: #if getpass.getuser()=="wf": # # use 2018 wikidata copy # endpoint="http://jena.zeus.bitplan.com/wikidata/" endpoint = "https://query.wikidata.org/sparql" wd = SPARQL(endpoint) queryString = """# get a list of whisky distilleries PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> SELECT ?item ?coord WHERE { # instance of whisky distillery ?item wdt:P31 wd:Q10373548. # get the coordinate ?item wdt:P625 ?coord. } """ results = wd.query(queryString) self.assertTrue(238 <= len(results))
def fromWikiData(self,endpoint): ''' get the city List from WikiData Args: endpoint(string): the url of the endpoint to be used Returns: list: and sets it as self.cityList as a side effect ''' wd=SPARQL(endpoint) queryString="""# get a list of cities # for geograpy3 library # see https://github.com/somnathrakshit/geograpy3/issues/15 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX p: <http://www.wikidata.org/prop/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> # get human settlements SELECT DISTINCT ?city ?cityLabel (max(?cityPop) as ?cityPopulation) (min (?coord) as ?cityCoord) ?region ?regionLabel ?regionIsoCode ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGdpPerCapita WHERE { # if you uncomment this line this query might run for some 3 hours on a local wikidata copy using Apache Jena # run for Vienna, Illinois, Vienna Austria, Paris Texas and Paris France as example only # VALUES ?city { wd:Q577544 wd:Q1741 wd:Q830149 wd:Q90}. # run for Andorra Q228 # VALUES ?country {wd:Q228}. # instance of human settlement https://www.wikidata.org/wiki/Q486972 ?city wdt:P31/wdt:P279* wd:Q486972 . # label of the City ?city rdfs:label ?cityLabel filter (lang(?cityLabel) = "en"). # country this city belongs to ?city wdt:P17 ?country . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # https://www.wikidata.org/wiki/Property:P297 ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. OPTIONAL { ?country wdt:P2132 ?countryGdpPerCapita. } OPTIONAL { # located in administrative territory # https://www.wikidata.org/wiki/Property:P131 ?city wdt:P131* ?region. # administrative unit of first order ?region wdt:P31/wdt:P279* wd:Q10864048. ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). # isocode state/province OPTIONAL { ?region wdt:P300 ?regionIsoCode. } } # population of city OPTIONAL { ?city wdt:P1082 ?cityPop.} # get the coordinates OPTIONAL { ?city wdt:P625 ?coord. } } GROUP BY ?city ?cityLabel ?cityCoord ?region ?regionLabel ?regionIsoCode ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGdpPerCapita #ORDER BY ?cityLabel """ results=wd.query(queryString) self.cityList=wd.asListOfDicts(results) for city in self.cityList: city['wikidataurl']=city.pop('city') city['name']=city.pop('cityLabel') super().setNone(city,['coord','date','cityPopulation','countryPopulation','country','countryLabel','countryIsoCode','countryGDP_perCapita','region','regionLabel','regionIsoCode','ratio']) return self.cityList