def cacheRegionCities2Json(self, limit, showDone=False): # TODO - refactor to Locator/LocationContext - make available via command line wd = Wikidata() config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) countryManager.fromCache() regionManager = RegionManager(config=config) regionManager.fromCache() regionList = regionManager.getList() total = len(regionList) cachePath = f"{config.getCachePath()}/regions" if not os.path.exists(cachePath): os.makedirs(cachePath) for index, region in enumerate(regionList): if index >= limit: break regionId = region.wikidataid msg = f"{index+1:4d}/{total:4d}:getting cities for {region.name} {region.iso} {region.wikidataid}" jsonFileName = f"{cachePath}/{region.iso}.json" if os.path.isfile(jsonFileName): if showDone: print(msg) else: try: regionCities = wd.getCitiesForRegion(regionId, msg) jsonStr = json.dumps(regionCities) with open(jsonFileName, "w") as jsonFile: jsonFile.write(jsonStr) except Exception as ex: self.handleWikidataException(ex)
def testIssue_59_db_download(self): ''' tests if the cache database is downloaded if not present ''' with tempfile.TemporaryDirectory() as tmpdir: config=StorageConfig(cacheFile="locations.db", cacheRootDir=tmpdir) config.cacheFile = f"{config.getCachePath()}/{config.cacheFile}" loc=LocationContext.fromCache(config=config) locations=loc.locateLocation("Germany") self.assertTrue(len(locations)>0)
def test_getLocationByID(self): ''' tests if the correct location for a given wikidataid is returned ''' config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) countryManager.fromCache() country = countryManager.getLocationByID("Q30") # wikidataid of USA self.assertIsNotNone(country) self.assertTrue(hasattr(country, 'iso')) self.assertEqual(country.iso, 'US')
def testLocationContextFromCache(self): ''' test loading LocationContext from cache ''' testCache=False if self.inCI() or testCache: locationContext = LocationContext.fromCache() locationContext.load() self.assertTrue(len(locationContext.countries) > 180) self.assertTrue(len(locationContext.regions) > 3500) self.assertTrue(len(locationContext.cities) > 1000000)
def getQueryManager(self): ''' get the query manager ''' cachedir=LocationContext.getDefaultConfig().getCachePath() scriptDir=os.path.dirname(__file__) for path in cachedir,f"{scriptDir}/../geograpy/data": qYamlFile=f"{path}/queries.yaml" if os.path.isfile(qYamlFile): qm=QueryManager(lang='sql',debug=self.debug,queriesPath=qYamlFile) return qm return None
def testCacheLocationLabels(self): ''' Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for the rdfs:label and skos:altLa of each location. A view containing all location labels is also created. ''' testLocationLabelExtraction = False if testLocationLabelExtraction: wd = Wikidata() config = LocationContext.getDefaultConfig() countryManager = CountryManager(config=config) regionManager = RegionManager(config=config) cityManager = CityManager(config=config) sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug) for manager in countryManager, regionManager, cityManager: manager.fromCache() wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}" wikidataIdQueryRes = sqlDb.query(wikidataIdQuery) wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes] chunkSize = 1000 iterations = math.ceil(len(wikidataIds) / chunkSize) progress = 0 res = [] for i in range(iterations): workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize] progress += len(workOnIds) index = 0 values = "" for location in workOnIds: spacer = " \n\t\t\t" if index % 10 == 0 else " " values += f"{spacer}wd:{wd.getWikidataId(location)}" index += 1 query = self.getLablesQuery(values) res.extend( wd.query( f"Query {i}/{iterations} - Querying {manager.entityName} Labels", queryString=query)) wd.store2DB(res, tableName=f"{manager.entityName}_labels", sqlDB=sqlDb) self.createViews(sqlDB=sqlDb)
def testReadCachedCitiesByRegion(self): ''' test reading the cached json Files ''' # This is to populate the cities database return config = LocationContext.getDefaultConfig() regionManager = RegionManager(config=config) regionManager.fromCache() regionByIso, _dup = regionManager.getLookup("iso") self.assertEqual(56, len(_dup)) jsonFiles = CityManager.getJsonFiles(config) msg = f"reading {len(jsonFiles)} cached city by region JSON cache files" self.assertTrue(len(jsonFiles) > 2000) profiler = Profiler(msg) cityManager = CityManager(config=config) cityManager.getList().clear() for jsonFileName in jsonFiles: isoMatch = re.search(r"/([^\/]*)\.json", jsonFileName) if not isoMatch: print( f"{jsonFileName} - does not match a known region's ISO code" ) else: rIso = isoMatch.group(1) region = regionByIso[rIso] with open(jsonFileName) as jsonFile: cities4Region = json.load(jsonFile) for city4Region in cities4Region: city = City() city.fromDict(city4Region) # fix regionId if hasattr(city, "regionId"): city.partOfRegionId = city.regionId city.regionId = region.wikidataid cityManager.add(city) pass cityManager.store() profiler.time()
def testRegionMatching(self): ''' test region matches ''' locator = Locator() if not locator.db_has_data(): locator.populate_db() countryList = CountryManager.fromErdem() config = LocationContext.getDefaultConfig() regionManager = RegionManager(config=config) regionManager.fromCache() for country in countryList.countries: locationListWithDistances = country.getNClosestLocations( regionManager, 3) if self.debug: print(f"{country}{country.lat:.2f},{country.lon:.2f}") for i, locationWithDistance in enumerate( locationListWithDistances): location, distance = locationWithDistance if self.debug: print(f" {i}:{location}-{distance:.0f} km") pass
def getLocationContext(self): if self.locationContext is None: self.locationContext = LocationContext.fromCache() return self.locationContext
def getStorageConfig(self): #config=StorageConfig.getDefault() config=LocationContext.getDefaultConfig() return config