def cacheRegionCities2Json(self, limit, showDone=False): # TODO - refactor to Locator/LocationContext - make available via command line wd = Wikidata() config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) countryManager.fromCache() regionManager = RegionManager(config=config) regionManager.fromCache() regionList = regionManager.getList() total = len(regionList) cachePath = f"{config.getCachePath()}/regions" if not os.path.exists(cachePath): os.makedirs(cachePath) for index, region in enumerate(regionList): if index >= limit: break regionId = region.wikidataid msg = f"{index+1:4d}/{total:4d}:getting cities for {region.name} {region.iso} {region.wikidataid}" jsonFileName = f"{cachePath}/{region.iso}.json" if os.path.isfile(jsonFileName): if showDone: print(msg) else: try: regionCities = wd.getCitiesForRegion(regionId, msg) jsonStr = json.dumps(regionCities) with open(jsonFileName, "w") as jsonFile: jsonFile.write(jsonStr) except Exception as ex: self.handleWikidataException(ex)
def testLocationListLoading(self): ''' test loading the locations from Json ''' samples = """ { "countries": [ { "name": "Afghanistan", "wikidataid": "Q889", "lat": 34, "lon": 66, "coordinates": "34,66", "partOf": null, "level": 3, "locationKind": "Country", "comment": null, "iso": "AF" }, { "name": "United States of America", "wikidataid": "Q30", "lat": 39.82818, "lon": -98.5795, "partOf": "Noth America", "level": 3, "locationKind": "Country", "comment": null, "labels": [ "America", "UNITED STATES OF AMERICA", "USA", "United States", "United States of America (the)" ], "iso": "US" }, { "name": "Australia", "wikidataid": "Q408", "lat": -28, "lon": 137, "coordinates": "-28,137", "partOf": null, "level": 3, "locationKind": "Country", "comment": null, "labels": [ "AUS" ], "iso": "AU" } ] } """ cm = CountryManager() cm.restoreFromJsonStr(samples) countriesByWikiDataId, _dup = cm.getLookup("wikidataid") self.assertTrue("Q30" in countriesByWikiDataId)
def test_getLocationByID(self): ''' tests if the correct location for a given wikidataid is returned ''' config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) countryManager.fromCache() country = countryManager.getLocationByID("Q30") # wikidataid of USA self.assertIsNotNone(country) self.assertTrue(hasattr(country, 'iso')) self.assertEqual(country.iso, 'US')
def testCountryManager(self): ''' tests the loading and parsing of the RegionManager form the json backup file ''' countryManager = CountryManager(config=self.getStorageConfig()) countryManager.fromCache() self.assertTrue(hasattr(countryManager,'countries')) self.assertTrue(len(countryManager.countries) >= 200) # check if California is in the list countriesByWikidataId=self.checkNoDuplicateWikidataIds(countryManager,"wikidataid") self.assertTrue("Q30" in countriesByWikidataId)
def testIssue41_CountriesFromErdem(self): ''' test getting Country list from Erdem ''' countryList = CountryManager.fromErdem() self.assertEqual(247, len(countryList.countries)) if self.debug: for country in countryList.countries: print(country)
def testIssue45_BallTree(self): ''' test calculation a ball tree for a given list of locations ''' countryList = CountryManager.fromErdem() ballTree, validList = countryList.getBallTuple() self.assertEqual(245, len(validList)) self.assertEqual("BallTree", type(ballTree).__name__) self.assertAlmostEqual(245, ballTree.sum_weight, delta=0.1) pass
def testCacheLocationLabels(self): ''' Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for the rdfs:label and skos:altLa of each location. A view containing all location labels is also created. ''' testLocationLabelExtraction = False if testLocationLabelExtraction: wd = Wikidata() config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) regionManager = RegionManager(config=config) cityManager = CityManager(config=config) sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug) for manager in countryManager, regionManager, cityManager: manager.fromCache() wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}" wikidataIdQueryRes = sqlDb.query(wikidataIdQuery) wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes] chunkSize = 1000 iterations = math.ceil(len(wikidataIds) / chunkSize) progress = 0 res = [] for i in range(iterations): workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize] progress += len(workOnIds) index = 0 values = "" for location in workOnIds: spacer = " \n\t\t\t" if index % 10 == 0 else " " values += f"{spacer}wd:{wd.getWikidataId(location)}" index += 1 query = self.getLablesQuery(values) res.extend( wd.query( f"Query {i}/{iterations} - Querying {manager.entityName} Labels", queryString=query)) wd.store2DB(res, tableName=f"{manager.entityName}_labels", sqlDB=sqlDb) self.createViews(sqlDB=sqlDb)
def testClosestLocation(self): ''' test getting the closes Location to a given location ''' # sample Country: Germany country = Country() country.name = 'Germany' country.lat = 51.0 country.lon = 9.0 # get a country list lookupCountryManager = CountryManager.fromErdem() # get the closest 2 locations for the given countryList countryListWithDistances = country.getNClosestLocations( lookupCountryManager, 2) self.checkLocationListWithDistances(countryListWithDistances, 2, "Luxembourg", 244) countryListWithDistances = country.getLocationsWithinRadius( lookupCountryManager, 300) self.checkLocationListWithDistances(countryListWithDistances, 2, "Luxembourg", 244)
def testRegionMatching(self): ''' test region matches ''' locator = Locator() if not locator.db_has_data(): locator.populate_db() countryList = CountryManager.fromErdem() config = LocationContext.getDefaultConfig() regionManager = RegionManager(config=config) regionManager.fromCache() for country in countryList.countries: locationListWithDistances = country.getNClosestLocations( regionManager, 3) if self.debug: print(f"{country}{country.lat:.2f},{country.lon:.2f}") for i, locationWithDistance in enumerate( locationListWithDistances): location, distance = locationWithDistance if self.debug: print(f" {i}:{location}-{distance:.0f} km") pass