def cacheRegionCities2Json(self, limit, showDone=False):
     # TODO - refactor to Locator/LocationContext - make available via command line
     wd = Wikidata()
     config = LocationContext.getDefaultConfig()
     countryManager = CountryManager(config=config)
     countryManager.fromCache()
     regionManager = RegionManager(config=config)
     regionManager.fromCache()
     regionList = regionManager.getList()
     total = len(regionList)
     cachePath = f"{config.getCachePath()}/regions"
     if not os.path.exists(cachePath):
         os.makedirs(cachePath)
     for index, region in enumerate(regionList):
         if index >= limit:
             break
         regionId = region.wikidataid
         msg = f"{index+1:4d}/{total:4d}:getting cities for {region.name} {region.iso} {region.wikidataid}"
         jsonFileName = f"{cachePath}/{region.iso}.json"
         if os.path.isfile(jsonFileName):
             if showDone:
                 print(msg)
         else:
             try:
                 regionCities = wd.getCitiesForRegion(regionId, msg)
                 jsonStr = json.dumps(regionCities)
                 with open(jsonFileName, "w") as jsonFile:
                     jsonFile.write(jsonStr)
             except Exception as ex:
                 self.handleWikidataException(ex)
Exemple #2
0
 def testLocationListLoading(self):
     '''
     test loading the locations from Json
     '''
     samples = """
     {
         "countries": [
             {
                 "name": "Afghanistan",
                 "wikidataid": "Q889",
                 "lat": 34,
                 "lon": 66,
                 "coordinates": "34,66",
                 "partOf": null,
                 "level": 3,
                 "locationKind": "Country",
                 "comment": null,
                 "iso": "AF"
             },
             {
                 "name": "United States of America",
                 "wikidataid": "Q30",
                 "lat": 39.82818,
                 "lon": -98.5795,
                 "partOf": "Noth America",
                 "level": 3,
                 "locationKind": "Country",
                 "comment": null,
                 "labels": [
                     "America",
                     "UNITED STATES OF AMERICA",
                     "USA",
                     "United States",
                     "United States of America (the)"
                 ],
                 "iso": "US"
             },
             {
                 "name": "Australia",
                 "wikidataid": "Q408",
                 "lat": -28,
                 "lon": 137,
                 "coordinates": "-28,137",
                 "partOf": null,
                 "level": 3,
                 "locationKind": "Country",
                 "comment": null,
                 "labels": [
                     "AUS"
                 ],
                 "iso": "AU"
             }
         ]
     }
     """
     cm = CountryManager()
     cm.restoreFromJsonStr(samples)
     countriesByWikiDataId, _dup = cm.getLookup("wikidataid")
     self.assertTrue("Q30" in countriesByWikiDataId)
Exemple #3
0
 def test_getLocationByID(self):
     '''
     tests if the correct location for a given wikidataid is returned
     '''
     config = LocationContext.getDefaultConfig()
     countryManager = CountryManager(config=config)
     countryManager.fromCache()
     country = countryManager.getLocationByID("Q30")  # wikidataid of USA
     self.assertIsNotNone(country)
     self.assertTrue(hasattr(country, 'iso'))
     self.assertEqual(country.iso, 'US')
 def testCountryManager(self):
     '''
     tests the loading and parsing of the RegionManager form the json backup file
     '''
     countryManager = CountryManager(config=self.getStorageConfig())
     countryManager.fromCache()
     self.assertTrue(hasattr(countryManager,'countries'))
     self.assertTrue(len(countryManager.countries) >= 200)
     # check if California is in the list
     countriesByWikidataId=self.checkNoDuplicateWikidataIds(countryManager,"wikidataid")
     self.assertTrue("Q30" in countriesByWikidataId)
Exemple #5
0
    def testIssue41_CountriesFromErdem(self):
        '''
        test getting Country list from Erdem

        '''
        countryList = CountryManager.fromErdem()
        self.assertEqual(247, len(countryList.countries))
        if self.debug:
            for country in countryList.countries:
                print(country)
Exemple #6
0
 def testIssue45_BallTree(self):
     '''
     test calculation a ball tree for a given list of locations
     '''
     countryList = CountryManager.fromErdem()
     ballTree, validList = countryList.getBallTuple()
     self.assertEqual(245, len(validList))
     self.assertEqual("BallTree", type(ballTree).__name__)
     self.assertAlmostEqual(245, ballTree.sum_weight, delta=0.1)
     pass
Exemple #7
0
    def testCacheLocationLabels(self):
        '''
        Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for
        the rdfs:label and skos:altLa of each location.
        A view containing all location labels is also created.
        '''
        testLocationLabelExtraction = False
        if testLocationLabelExtraction:
            wd = Wikidata()
            config = LocationContext.getDefaultConfig()
            countryManager = CountryManager(config=config)
            regionManager = RegionManager(config=config)
            cityManager = CityManager(config=config)
            sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug)
            for manager in countryManager, regionManager, cityManager:
                manager.fromCache()
                wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}"
                wikidataIdQueryRes = sqlDb.query(wikidataIdQuery)
                wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes]

                chunkSize = 1000
                iterations = math.ceil(len(wikidataIds) / chunkSize)
                progress = 0
                res = []
                for i in range(iterations):
                    workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize]
                    progress += len(workOnIds)
                    index = 0
                    values = ""
                    for location in workOnIds:
                        spacer = "  \n\t\t\t" if index % 10 == 0 else " "
                        values += f"{spacer}wd:{wd.getWikidataId(location)}"
                        index += 1
                    query = self.getLablesQuery(values)
                    res.extend(
                        wd.query(
                            f"Query {i}/{iterations} - Querying {manager.entityName} Labels",
                            queryString=query))
                wd.store2DB(res,
                            tableName=f"{manager.entityName}_labels",
                            sqlDB=sqlDb)
            self.createViews(sqlDB=sqlDb)
Exemple #8
0
    def testClosestLocation(self):
        '''
        test getting the closes Location to a given location
        '''
        # sample Country: Germany
        country = Country()
        country.name = 'Germany'
        country.lat = 51.0
        country.lon = 9.0
        # get a country list
        lookupCountryManager = CountryManager.fromErdem()
        # get the closest 2 locations for the given countryList
        countryListWithDistances = country.getNClosestLocations(
            lookupCountryManager, 2)
        self.checkLocationListWithDistances(countryListWithDistances, 2,
                                            "Luxembourg", 244)

        countryListWithDistances = country.getLocationsWithinRadius(
            lookupCountryManager, 300)
        self.checkLocationListWithDistances(countryListWithDistances, 2,
                                            "Luxembourg", 244)
Exemple #9
0
 def testRegionMatching(self):
     '''
     test region matches
     '''
     locator = Locator()
     if not locator.db_has_data():
         locator.populate_db()
     countryList = CountryManager.fromErdem()
     config = LocationContext.getDefaultConfig()
     regionManager = RegionManager(config=config)
     regionManager.fromCache()
     for country in countryList.countries:
         locationListWithDistances = country.getNClosestLocations(
             regionManager, 3)
         if self.debug:
             print(f"{country}{country.lat:.2f},{country.lon:.2f}")
         for i, locationWithDistance in enumerate(
                 locationListWithDistances):
             location, distance = locationWithDistance
             if self.debug:
                 print(f"    {i}:{location}-{distance:.0f} km")
     pass