def testPopulation(self): ''' test adding population data from wikidata to GeoLite2 information ''' Locator.resetInstance() loc = Locator.getInstance() loc.populate_db() endpoint = None user = getpass.getuser() if self.debug: print("current user is %s" % user) # uncomment to refresh using wikidata # please note https://github.com/RDFLib/sparqlwrapper/issues/163 hits as of 2020-09 # endpoint='https://query.wikidata.org/sparql' # uncomment to use your own wikidata copy as an endpoint # if user=="wf": # use 2020 Apache Jena based wikidata copy #endpoint="http://jena.zeus.bitplan.com/wikidata" # use 2018 Blazegraph based wikidata copy #endpoint="http://blazegraph.bitplan.com/sparql" loc.getWikidataCityPopulation(loc.sqlDB, endpoint) tableList = loc.sqlDB.getTableList() uml = UML() title = """geograpy Tables 2020-09-26 [[https://github.com/somnathrakshit/geograpy3 © 2020 geograpy3 project]]""" plantUml = uml.tableListToPlantUml(tableList, title=title, packageName="geograpy3") if self.debug: print(plantUml)
def testGeolite2Cities(self): ''' test the locs.db cache for cities ''' loc = Locator() cities = loc.getGeolite2Cities() if self.debug: print("Found %d cities " % len(cities)) self.assertEqual(121223, len(cities)) pass
def downloadAndTestDB(config: StorageConfig, loc: Locator = None, forceUpdate: bool = False): '''downloads and tests the downloaded db''' if loc is None: loc = Locator(storageConfig=config) loc.downloadDB(forceUpdate=forceUpdate) self.assertTrue(os.path.exists(config.cacheFile)) self.assertTrue(loc.db_has_data()) return loc
def setUp(self,debug=False): ''' setUp test environment ''' TestCase.setUp(self) self.debug=debug msg=f"test {self._testMethodName}, debug={self.debug}" self.profile=Profiler(msg) Locator.resetInstance() locator=Locator.getInstance() locator.downloadDB() # actively test Wikidata tests? self.testWikidata = False
def testLocatorWithWikiData(self): ''' test Locator ''' Locator.resetInstance() loc=Locator.getInstance() #forceUpdate=True forceUpdate=False loc.populate_db(force=forceUpdate) tableList=loc.sqlDB.getTableList() expectedCities=800000 self.assertTrue(loc.db_recordCount(tableList,"countries")>=200) self.assertTrue(loc.db_recordCount(tableList,"regions")>=3000) self.assertTrue(loc.db_recordCount(tableList,"cities")>=expectedCities)
def testDelimiters(self): ''' test the delimiter statistics for names ''' loc = Locator.getInstance() loc.populate_db() ddls = [ "DROP VIEW IF EXISTS allNames", """CREATE VIEW allNames as select name from countries union select name from regions union select name from cities""" ] for ddl in ddls: loc.sqlDB.execute(ddl) query = "SELECT name from allNames" nameRecords = loc.sqlDB.query(query) show = self.debug show = True if show: print("found %d name records" % len(nameRecords)) ordC = Counter() for nameRecord in nameRecords: name = nameRecord["name"] for char in name: code = ord(char) if code < ord("A"): ordC[code] += 1 for index, countT in enumerate(ordC.most_common(10)): code, count = countT if show: print("%d: %d %s -> %d" % (index, code, chr(code), count))
def testIsoRegexp(self): ''' test regular expression for iso codes ''' loc = Locator.getInstance() self.assertFalse(loc.isISO('Singapore')) query = """ select distinct country_iso_code as isocode from cities union select distinct subdivision_1_iso_code as isocode from cities union select distinct subdivision_1_iso_code as isocode from cities union select distinct countryIsoCode as isocode from countries union select distinct regionIsoCode as isocode from regions """ loc.populate_db() isocodeRecords = loc.sqlDB.query(query) for isocodeRecord in isocodeRecords: isocode = isocodeRecord['isocode'] if isocode: isIso = loc.isISO(isocode) if not isIso and self.debug: print(isocode) self.assertTrue(isIso)
def testGetCountry(self): ''' test getting a country by name or ISO ''' locator = Locator() debug = True examples = [("DE", "Germany"), ("US", "United States of America"), ("USA", None)] for name, expectedName in examples: country = locator.getCountry(name) if debug: print(country) if expectedName is None: self.assertIsNone(country) else: self.assertIsNotNone(country) self.assertEqual(expectedName, country.name)
def testHasViews(self): ''' test that the views are available ''' loc = Locator.getInstance() viewsMap = loc.sqlDB.getTableDict(tableType="view") for view in ["CityLookup", "RegionLookup", "CountryLookup"]: self.assertTrue(view in viewsMap)
def testHasData(self): ''' check has data and populate functionality ''' loc = Locator() if os.path.isfile(loc.db_file): os.remove(loc.db_file) # reinit sqlDB loc = Locator() self.assertFalse(loc.db_has_data()) loc.populate_db() self.assertTrue(loc.db_has_data())
def testUML(self): ''' test adding population data from wikidata to GeoLite2 information ''' Locator.resetInstance() loc = Locator.getInstance() loc.populate_db() user = getpass.getuser() if self.debug: print("current user is %s" % user) tableList = loc.sqlDB.getTableList() uml = UML() title = """geograpy Tables 2021-08-13 [[https://github.com/somnathrakshit/geograpy3 © 2020-2021 geograpy3 project]]""" plantUml = uml.tableListToPlantUml(tableList, title=title, packageName="geograpy3") showUml = True if showUml or self.debug: print(plantUml)
def testRegionMatching(self): ''' test region matches ''' locator = Locator() if not locator.db_has_data(): locator.populate_db() countryList = CountryManager.fromErdem() config = LocationContext.getDefaultConfig() regionManager = RegionManager(config=config) regionManager.fromCache() for country in countryList.countries: locationListWithDistances = country.getNClosestLocations( regionManager, 3) if self.debug: print(f"{country}{country.lat:.2f},{country.lon:.2f}") for i, locationWithDistance in enumerate( locationListWithDistances): location, distance = locationWithDistance if self.debug: print(f" {i}:{location}-{distance:.0f} km") pass
def locateCity(location, correctMisspelling=False, debug=False): ''' locate the given location string Args: location(string): the description of the location Returns: Locator: the location ''' e = Extractor(text=location, debug=debug) e.split() loc = Locator.getInstance(correctMisspelling=correctMisspelling, debug=debug) city = loc.locateCity(e.places) return city
def testHasData(self): ''' check has data and populate functionality ''' testDownload=False if self.inCI() or testDownload: with tempfile.TemporaryDirectory() as cacheRootDir: config=StorageConfig(cacheRootDir=cacheRootDir, cacheDirName='geograpy3') config.cacheFile = f"{config.getCachePath()}/{LocationContext.db_filename}" loc=Locator(storageConfig=config) if os.path.isfile(loc.db_file): os.remove(loc.db_file) # reinit sqlDB loc=Locator(storageConfig=config) self.assertFalse(loc.db_has_data()) loc.populate_db() self.assertTrue(loc.db_has_data())
def testQueries(self): ''' test preconfigured queries ''' qm=self.getQueryManager() self.assertIsNotNone(qm) locator=Locator.getInstance() show=self.debug #show=True for _name,query in qm.queriesByName.items(): qlod=locator.sqlDB.query(query.query) for tablefmt in ["mediawiki","github"]: self.documentQueryResult(query, qlod,tablefmt,show=show) pass
def testWordCount(self): ''' test the word count ''' loc = Locator.getInstance() query = "SELECT name from CITIES" nameRecords = loc.sqlDB.query(query) if self.debug: print("testWordCount: found %d names" % len(nameRecords)) wc = Counter() for nameRecord in nameRecords: name = nameRecord['name'] words = re.split(r"\W+", name) wc[len(words)] += 1 if self.debug: print("most common 20: %s" % wc.most_common(20))
def testQuery(self): ''' test a single query ''' queries=[("LocationLabel Count","""select count(*),hierarchy from location_labels group by hierarchy"""), ("NY example","select * from cityLookup where label='New York City'"), ("Berlin example","select * from cityLookup where label='Berlin' order by pop desc,regionName"), ("Issue #25","select * from countryLookup where label in ('France', 'Hungary', 'Poland', 'Spain', 'United Kingdom')"), ("Issue #25 Bulgaria","select * from cityLookup where label in ('Bulgaria','Croatia','Hungary','Czech Republic') order by pop desc,regionName")] for tableName in ["countries","regions","cities"]: queries.append((f"unique wikidataids for {tableName}",f"select count(distinct(wikidataid)) as wikidataids from {tableName}")) queries.append((f"total #records for {tableName}",f"select count(*) as recordcount from {tableName}")) locator=Locator.getInstance() for title,queryString in queries: query=Query(name=title,query=queryString,lang="sql") qlod=locator.sqlDB.query(queryString) for tablefmt in ["mediawiki","github"]: self.documentQueryResult(query, qlod, tablefmt, show=True)
def setUp(self): super().setUp(debug=False) Locator.resetInstance() pass
def lookupQuery(self, viewName, whereClause): loc = Locator.getInstance() queryString = f"SELECT * FROM {viewName} where {whereClause} AND pop is not NULL ORDER by pop desc" lookupRecords = loc.sqlDB.query(queryString) return lookupRecords
def setUp(self): self.debug = True Locator.resetInstance() pass