コード例 #1
0
 def cacheRegionCities2Json(self, limit, showDone=False):
     # TODO - refactor to Locator/LocationContext - make available via command line
     wd = Wikidata()
     config = LocationContext.getDefaultConfig()
     countryManager = CountryManager(config=config)
     countryManager.fromCache()
     regionManager = RegionManager(config=config)
     regionManager.fromCache()
     regionList = regionManager.getList()
     total = len(regionList)
     cachePath = f"{config.getCachePath()}/regions"
     if not os.path.exists(cachePath):
         os.makedirs(cachePath)
     for index, region in enumerate(regionList):
         if index >= limit:
             break
         regionId = region.wikidataid
         msg = f"{index+1:4d}/{total:4d}:getting cities for {region.name} {region.iso} {region.wikidataid}"
         jsonFileName = f"{cachePath}/{region.iso}.json"
         if os.path.isfile(jsonFileName):
             if showDone:
                 print(msg)
         else:
             try:
                 regionCities = wd.getCitiesForRegion(regionId, msg)
                 jsonStr = json.dumps(regionCities)
                 with open(jsonFileName, "w") as jsonFile:
                     jsonFile.write(jsonStr)
             except Exception as ex:
                 self.handleWikidataException(ex)
コード例 #2
0
 def testIssue_59_db_download(self):
     '''
     tests if the cache database is downloaded if not present
     '''
     with tempfile.TemporaryDirectory() as tmpdir:
         config=StorageConfig(cacheFile="locations.db", cacheRootDir=tmpdir)
         config.cacheFile = f"{config.getCachePath()}/{config.cacheFile}"
         loc=LocationContext.fromCache(config=config)
         locations=loc.locateLocation("Germany")
         self.assertTrue(len(locations)>0)
コード例 #3
0
 def test_getLocationByID(self):
     '''
     tests if the correct location for a given wikidataid is returned
     '''
     config = LocationContext.getDefaultConfig()
     countryManager = CountryManager(config=config)
     countryManager.fromCache()
     country = countryManager.getLocationByID("Q30")  # wikidataid of USA
     self.assertIsNotNone(country)
     self.assertTrue(hasattr(country, 'iso'))
     self.assertEqual(country.iso, 'US')
コード例 #4
0
 def testLocationContextFromCache(self):
     '''
     test loading LocationContext from cache
     '''
     testCache=False
     if self.inCI() or testCache:
         locationContext = LocationContext.fromCache()
         locationContext.load()
         self.assertTrue(len(locationContext.countries) > 180)
         self.assertTrue(len(locationContext.regions) > 3500)
         self.assertTrue(len(locationContext.cities) > 1000000)
コード例 #5
0
 def getQueryManager(self):
     '''
     get the query manager
     '''
     cachedir=LocationContext.getDefaultConfig().getCachePath()
     scriptDir=os.path.dirname(__file__)
     for path in cachedir,f"{scriptDir}/../geograpy/data":
         qYamlFile=f"{path}/queries.yaml"
         if os.path.isfile(qYamlFile):
             qm=QueryManager(lang='sql',debug=self.debug,queriesPath=qYamlFile)
             return qm
     return None
コード例 #6
0
    def testCacheLocationLabels(self):
        '''
        Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for
        the rdfs:label and skos:altLa of each location.
        A view containing all location labels is also created.
        '''
        testLocationLabelExtraction = False
        if testLocationLabelExtraction:
            wd = Wikidata()
            config = LocationContext.getDefaultConfig()
            countryManager = CountryManager(config=config)
            regionManager = RegionManager(config=config)
            cityManager = CityManager(config=config)
            sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug)
            for manager in countryManager, regionManager, cityManager:
                manager.fromCache()
                wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}"
                wikidataIdQueryRes = sqlDb.query(wikidataIdQuery)
                wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes]

                chunkSize = 1000
                iterations = math.ceil(len(wikidataIds) / chunkSize)
                progress = 0
                res = []
                for i in range(iterations):
                    workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize]
                    progress += len(workOnIds)
                    index = 0
                    values = ""
                    for location in workOnIds:
                        spacer = "  \n\t\t\t" if index % 10 == 0 else " "
                        values += f"{spacer}wd:{wd.getWikidataId(location)}"
                        index += 1
                    query = self.getLablesQuery(values)
                    res.extend(
                        wd.query(
                            f"Query {i}/{iterations} - Querying {manager.entityName} Labels",
                            queryString=query))
                wd.store2DB(res,
                            tableName=f"{manager.entityName}_labels",
                            sqlDB=sqlDb)
            self.createViews(sqlDB=sqlDb)
コード例 #7
0
 def testReadCachedCitiesByRegion(self):
     '''
     test reading the cached json Files
     '''
     # This is to populate the cities database
     return
     config = LocationContext.getDefaultConfig()
     regionManager = RegionManager(config=config)
     regionManager.fromCache()
     regionByIso, _dup = regionManager.getLookup("iso")
     self.assertEqual(56, len(_dup))
     jsonFiles = CityManager.getJsonFiles(config)
     msg = f"reading {len(jsonFiles)} cached city by region JSON cache files"
     self.assertTrue(len(jsonFiles) > 2000)
     profiler = Profiler(msg)
     cityManager = CityManager(config=config)
     cityManager.getList().clear()
     for jsonFileName in jsonFiles:
         isoMatch = re.search(r"/([^\/]*)\.json", jsonFileName)
         if not isoMatch:
             print(
                 f"{jsonFileName} - does not match a known region's ISO code"
             )
         else:
             rIso = isoMatch.group(1)
             region = regionByIso[rIso]
             with open(jsonFileName) as jsonFile:
                 cities4Region = json.load(jsonFile)
                 for city4Region in cities4Region:
                     city = City()
                     city.fromDict(city4Region)
                     # fix regionId
                     if hasattr(city, "regionId"):
                         city.partOfRegionId = city.regionId
                     city.regionId = region.wikidataid
                     cityManager.add(city)
                     pass
     cityManager.store()
     profiler.time()
コード例 #8
0
 def testRegionMatching(self):
     '''
     test region matches
     '''
     locator = Locator()
     if not locator.db_has_data():
         locator.populate_db()
     countryList = CountryManager.fromErdem()
     config = LocationContext.getDefaultConfig()
     regionManager = RegionManager(config=config)
     regionManager.fromCache()
     for country in countryList.countries:
         locationListWithDistances = country.getNClosestLocations(
             regionManager, 3)
         if self.debug:
             print(f"{country}{country.lat:.2f},{country.lon:.2f}")
         for i, locationWithDistance in enumerate(
                 locationListWithDistances):
             location, distance = locationWithDistance
             if self.debug:
                 print(f"    {i}:{location}-{distance:.0f} km")
     pass
コード例 #9
0
 def getLocationContext(self):
     if self.locationContext is None:
         self.locationContext = LocationContext.fromCache()
     return self.locationContext
コード例 #10
0
 def getStorageConfig(self):
     #config=StorageConfig.getDefault()
     config=LocationContext.getDefaultConfig()
     return config