Ejemplo n.º 1
0
 def store(self, cacheFileName='Event_all'):
     '''
     store my contents to the given cacheFileName - 
     implemented as SQL storage
     
     Args:
         cacheFileName(string): the path to the database
     '''
     dbfile = Lookup.getDBFile(cacheFileName)
     # remove existing database dump if it exists
     if os.path.exists(dbfile):
         os.remove(dbfile)
     backup = SQLDB(dbfile)
     errors = []
     print("storing %s to %s" % (self.name, dbfile))
     # debugging of CREATE TABLE
     # backup.c.execute("CREATE TABLE Event_test(foundBy TEXT)")
     for em in self.ems:
         if not em.config.mode is StoreMode.SQL:
             raise Exception(
                 "lookup store only support SQL storemode but found %s for %s"
                 % (em.config.mode, em.name))
         else:
             cacheFile = em.getCacheFile(config=em.config,
                                         mode=StoreMode.SQL)
             sqlDB = em.getSQLDB(cacheFile)
             sqlDB.copyTo(backup)
     backup.close()
     return errors
Ejemplo n.º 2
0
 def checkListOfRecords(self,listOfRecords,entityName,primaryKey=None,executeMany=True,fixNone=False,fixDates=False,debug=False,doClose=True):
     '''
     check the handling of the given list of Records
     
     Args:
       
        listOfRecords(list): a list of dicts that contain the data to be stored
        entityName(string): the name of the entity type to be used as a table name
        primaryKey(string): the name of the key / column to be used as a primary key
        executeMany(boolean): True if executeMany mode of sqlite3 should be used
        fixNone(boolean): fix dict entries that are undefined to have a "None" entry
        debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
        doClose(boolean): True if the connection should be closed
   
     '''     
     size=len(listOfRecords)
     if self.debug:
         print("%s size is %d fixNone is %r fixDates is: %r" % (entityName,size,fixNone,fixDates))
     self.sqlDB=SQLDB(debug=debug,errorDebug=True)
     entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
     startTime=time.time()
     self.sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
     elapsed=time.time()-startTime
     if self.debug:
         print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed)) 
     resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)    
     if self.debug:
         print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed)) 
     if doClose:
         self.sqlDB.close()
     return resultList
Ejemplo n.º 3
0
 def copyFrom(self, dbFile):
     '''
     copy the contents of the given database file to my database
     Args:
         dbFile(string): the database to copy the content from
     '''
     sqlDB = self.getSQLDB()
     sourceDB = SQLDB(dbFile)
     sourceDB.copyTo(sqlDB)
Ejemplo n.º 4
0
 def getDocumentRecords(self,archiveName,folderPath):
     '''
     get the document records
     '''
     sqlDB=SQLDB(self.getCacheFile())
     sqlQuery="SELECT * FROM document WHERE archiveName=(?) AND folderPath=(?)"
     params=(archiveName,folderPath,)
     dictList=sqlDB.query(sqlQuery, params)
     return dictList
Ejemplo n.º 5
0
    def testGeoTextAndGrapy(self):
        '''
        test the GeoText and geograpy3 library
        '''
        debug = True
        limit = 100
        sqlQuery = """select count(*) as count,
locality from Event_wikicfp
where locality is not null
group by locality
order by 1 desc
LIMIT %d
""" % limit
        dbFile = Lookup.getDBFile()
        if os.path.isfile(dbFile):
            sqlDB = SQLDB(dbFile)
        else:
            lookup = Lookup.ensureAllIsAvailable("testGeoText")
            sqlDB = lookup.getSQLDB()
        if sqlDB is not None:
            print("testGeoText from database %s " % sqlDB.dbname)
            totalResult = sqlDB.query("""select count(*) as count
  from event_wikicfp
  where locality is not null""")
            total = totalResult[0]['count']
            listOfDicts = sqlDB.query(sqlQuery)
            index = 0
            rsum = 0
            found = 0
            problems = []
            for record in listOfDicts:
                locality = record['locality']
                count = record['count']
                index += 1
                rsum += count
                print("%5d: %5d/%5d %5.1f%%=%s" %
                      (index, count, rsum, rsum / total * 100, locality))
                geo = GeoText(locality)
                if debug:
                    print("  %s" % geo.countries)
                    print("  %s" % geo.cities)
                city = geograpy.locateCity(locality)
                if city is not None:
                    found += 1
                else:
                    problems.append(locality)
                if debug:
                    print("  %s%s" % (city, '✅' if city is not None else '❌'))
            if self.debug:
                print("found %d/%d = %5.1f%%" %
                      (found, limit, found / limit * 100))
                print("problems: %s" % problems)
            self.assertTrue(found / limit > 0.8)
        pass
Ejemplo n.º 6
0
 def testWikidataCities(self):
     '''
     test getting city information from wikidata
     
     '''
     # Wikidata time outs in CI environment need to be avoided
     if getpass.getuser() != "wf":
         return
     config = StorageConfig.getSQL(debug=self.debug)
     config.cacheRootDir = "/tmp/wdhs"
     cachedir = config.getCachePath()
     config.cacheFile = f"{cachedir}/hs.db"
     # use 2018 wikidata copy
     # wikidata.endpoint="http://blazegraph.bitplan.com/sparql"
     # use 2020 wikidata copy
     wikidata = Wikidata()
     wikidata.endpoint = "https://confident.dbis.rwth-aachen.de/jena/wdhs/sparql"
     #wikidata.endpoint="http://jena.bitplan.com/wdhs/sparql"
     regions = [{
         "name": "Singapore",
         "country": "Q334",
         "region": None,
         "cities": 46
     }, {
         "name": "Beijing",
         "country": None,
         "region": "Q956",
         "cities": 25
     }, {
         "name": "Paris",
         "country": None,
         "region": "Q13917",
         "cities": 1242
     }, {
         "name": "Barcelona",
         "country": None,
         "region": "Q5705",
         "cities": 1242
     }, {
         "name": "Rome",
         "country": None,
         "region": "Q1282",
         "cities": 1242
     }]
     limit = 1000000  #if self.inCI() else 100
     cityList = wikidata.getCities(limit=limit)
     sqlDB = SQLDB(config.cacheFile)
     entityInfo = sqlDB.createTable(cityList, "hs", withDrop=True)
     sqlDB.store(cityList, entityInfo, fixNone=True)
     expected = 200000  # if self.inCI() else limit
     self.assertTrue(len(cityList) >= expected)
Ejemplo n.º 7
0
 def populate_Cities_FromWikidata(self,sqlDB):
     '''
     populate the given sqlDB with the Wikidata Cities
     
     Args:
         sqlDB(SQLDB): target SQL database
     '''
     dbFile=self.db_path+"/City_wikidata.db"
     if not os.path.exists(dbFile):
         print("Downloading %s ... this might take a few seconds" % dbFile)
         dbUrl="http://wiki.bitplan.com/images/confident/City_wikidata.db"
         urllib.request.urlretrieve(dbUrl,dbFile)
     wikiCitiesDB=SQLDB(dbFile)
     wikiCitiesDB.copyTo(sqlDB)
Ejemplo n.º 8
0
 def testIssue41(self):
     '''
     https://github.com/WolfgangFahl/pyLoDStorage/issues/41
     improve error message when create table command fails
     '''
     listOfRecords=[{
         'name':'value',
         'py/object': 'datetime.time'
     }]
     self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
     try:
         _entityInfo=self.sqlDB.createTable(listOfRecords[:1],'Invalid','name')
         self.fail("There should be an exception")
     except Exception as ex:
         self.assertTrue("CREATE TABLE Invalid" in str(ex))
Ejemplo n.º 9
0
 def getSqlDB():
     '''
     get the SQlite database connection
     '''
     config=DMSStorage.getStorageConfig(mode="sql")
     # https://stackoverflow.com/a/48234567/1497139
     sqlDB=SQLDB(config.cacheFile,check_same_thread=False)
     return sqlDB
Ejemplo n.º 10
0
 def __init__(self, db_file=None,correctMisspelling=False,debug=False):
     '''
     Constructor
     
     Args:
         db_file(str): the path to the database file
         correctMispelling(bool): if True correct typical misspellings
         debug(bool): if True show debug information
     '''
     self.debug=debug
     self.correctMisspelling=correctMisspelling
     self.db_path=os.path.dirname(os.path.realpath(__file__)) 
     self.db_file = db_file or self.db_path+"/locs.db"
     self.view="GeoLite2CityLookup"
     self.sqlDB=SQLDB(self.db_file,errorDebug=True)
     self.getAliases()
     self.dbVersion="2020-09-27 16:48:09"
 def getWordUsageDB(self):
     '''
     get the Word usage database
     '''
     wordUsageDBFile = Lookup.getDBFile("wordusage")
     if os.path.isfile(wordUsageDBFile):
         wSQLDB = SQLDB(wordUsageDBFile)
         return wSQLDB
     return None
Ejemplo n.º 12
0
    def testCacheLocationLabels(self):
        '''
        Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for
        the rdfs:label and skos:altLa of each location.
        A view containing all location labels is also created.
        '''
        testLocationLabelExtraction = False
        if testLocationLabelExtraction:
            wd = Wikidata()
            config = LocationContext.getDefaultConfig()
            countryManager = CountryManager(config=config)
            regionManager = RegionManager(config=config)
            cityManager = CityManager(config=config)
            sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug)
            for manager in countryManager, regionManager, cityManager:
                manager.fromCache()
                wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}"
                wikidataIdQueryRes = sqlDb.query(wikidataIdQuery)
                wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes]

                chunkSize = 1000
                iterations = math.ceil(len(wikidataIds) / chunkSize)
                progress = 0
                res = []
                for i in range(iterations):
                    workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize]
                    progress += len(workOnIds)
                    index = 0
                    values = ""
                    for location in workOnIds:
                        spacer = "  \n\t\t\t" if index % 10 == 0 else " "
                        values += f"{spacer}wd:{wd.getWikidataId(location)}"
                        index += 1
                    query = self.getLablesQuery(values)
                    res.extend(
                        wd.query(
                            f"Query {i}/{iterations} - Querying {manager.entityName} Labels",
                            queryString=query))
                wd.store2DB(res,
                            tableName=f"{manager.entityName}_labels",
                            sqlDB=sqlDb)
            self.createViews(sqlDB=sqlDb)
Ejemplo n.º 13
0
 def testIssue14_execute(self):
     '''
     https://github.com/WolfgangFahl/pyLoDStorage/issues/14
     
     offer execute wrapper directly via sqlDB
     '''
     sqlDB=SQLDB()
     ddl="""
     CREATE TABLE contacts (
         contact_id INTEGER PRIMARY KEY,
         first_name TEXT NOT NULL,
         last_name TEXT NOT NULL
     )
     """
     sqlDB.execute(ddl)
     tableList=sqlDB.getTableList()
     if self.debug:
         print(tableList)
     self.assertEqual(1,len(tableList))
     self.assertEqual("contacts",tableList[0]['name'])
Ejemplo n.º 14
0
    def testPlantUml(self):
        '''
        get plant UML functionality 
        '''
        schemaManager = None
        if getpass.getuser() != "travis":
            o = Ontology()
            schemaManager = o.getRQSchema(
                fromCache=False)  # to force SMW query

        lookup = Lookup("plantuml", getAll=False, butNot='or')
        dbfile = lookup.getDBFile('Event_all')
        sqlDB = SQLDB(dbfile)
        tableList = sqlDB.getTableList()
        eventTableList = []
        eventSchemas = lookup.getEventSchemas()
        for table in tableList:
            tableName = table['name']
            if tableName.startswith("Event_"):
                table['schema'] = eventSchemas[tableName]
                eventTableList.append(table)
                countQuery = "SELECT count(*) as count from %s" % tableName
                countResult = sqlDB.query(countQuery)
                table['instances'] = countResult[0]['count']
        self.assertEqual(8, len(eventTableList))
        uml = UML()
        now = datetime.now()
        nowYMD = now.strftime("%Y-%m-%d")
        title = """ConfIDent  Entities
%s
[[https://projects.tib.eu/en/confident/ © 2019-2020 ConfIDent project]]
see also [[http://ptp.bitplan.com/settings Proceedings Title Parser]]
""" % nowYMD
        plantUml = uml.mergeSchema(schemaManager,
                                   eventTableList,
                                   title=title,
                                   packageName='DataDonations',
                                   generalizeTo="Event")
        print(plantUml)
        self.assertTrue("Event <|-- Event_confref" in plantUml)
        self.assertTrue("class Event " in plantUml)
Ejemplo n.º 15
0
 def getSQLDB(self, cacheFile):
     '''
     get the SQL database for the given cacheFile
     
     Args:
         cacheFile(string): the file to get the SQL db from
     '''
     config = self.config
     sqldb = self.sqldb = SQLDB(cacheFile,
                                debug=config.debug,
                                errorDebug=config.errorDebug)
     return sqldb
Ejemplo n.º 16
0
    def testUniqueConstraint(self):
        '''
        test for https://github.com/WolfgangFahl/pyLoDStorage/issues/4
        sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info
        '''
        listOfDicts=[
            {"name": "John Doe"},
            {"name": "Frank Doe"}, 
            {"name": "John Doe"}, 
            {"name":"Tim Doe"}]
        sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=sqlDB.createTable(listOfDicts[:10],'Does','name')
        try:
            sqlDB.store(listOfDicts,entityInfo,executeMany=False)
            self.fail("There should be an exception")
        except Exception as ex:
            expected="""INSERT INTO Does (name) values (:name)
failed:UNIQUE constraint failed: Does.name
record  #3={'name': 'John Doe'}"""
            errMsg=str(ex)
            self.assertEqual(expected,errMsg)
Ejemplo n.º 17
0
 def testCopy(self):
     '''
     test copying databases into another database
     '''
     dbFile="/tmp/DAWT_Sample3x1000.db"
     copyDB=SQLDB(dbFile)
     for sampleNo in range(3):
         listOfRecords=Sample.getSample(1000)
         self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False)  
         self.sqlDB.copyTo(copyDB)
     size=os.stat(dbFile).st_size
     if self.debug:
         print ("size of copy DB is %d" % size)
     self.assertTrue(size>70000)
     tableList=copyDB.getTableList()
     if self.debug:
         print(tableList)
     for sampleNo in range(3):
         self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name'])
     # check that database is writable
     # https://stackoverflow.com/a/44707371/1497139
     copyDB.execute("pragma user_version=0")
Ejemplo n.º 18
0
 def getFolder(self,archive,folderPath:str):
     '''
     get the folder for the given archive and folderPath
     
     Args:
         archive: the  archive
         folderPath: the path of the folder
     '''
     sqlDB=SQLDB(self.getCacheFile())
     sqlQuery="SELECT * FROM folder WHERE archiveName=(?) AND path=(?)"
     archiveName=archive.name
     params=(archiveName,folderPath,)
     records=sqlDB.query(sqlQuery, params)
     folder=None
     if len(records)>1:
         msg=f"{len(records)} folders found for {archiveName}:{folderPath} - there should be only one"
         raise Exception(msg)
     elif len(records)==1:
         folder=Folder()
         folder.fromDict(records[0])
     folder.archive=archive
     return folder
Ejemplo n.º 19
0
 def getSQLDB(self, cacheFileName='Event_all'):
     '''
     get the SQLDB
     
     Args:
         cacheFileName(string): prefix of database file name
         
     Returns:
         SQLDB: the SQLDB access
     '''
     dbfile = Lookup.getDBFile(cacheFileName)
     sqlDB = SQLDB(dbfile)
     return sqlDB
Ejemplo n.º 20
0
    def testIssue15(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/15
        
        auto create view ddl in mergeschema
        
        '''
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=self.debug)
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=self.debug)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        viewDDL=Schema.getGeneralViewDDL(tableList,"PersonBase")
        if self.debug:
            print (viewDDL)
        expected="""CREATE VIEW PersonBase AS 
  SELECT name,lastmodified FROM Person
UNION
  SELECT name,lastmodified FROM Family"""
        self.assertEqual(expected,viewDDL)
        pass
 def testWordParser(self):
     '''
     try finding quantiles
     
     see https://stackoverflow.com/questions/2374640/how-do-i-calculate-percentiles-with-python-numpy
     '''
     lookup = Lookup("test Word parser")
     sqlDB = lookup.getSQLDB()
     if sqlDB is not None:
         totalWordUsages = []
         for source in ['wikidata', 'crossref', 'dblp', 'CEUR-WS']:
             listOfDicts = TestWordParser.getProceedingsTitles(
                 sqlDB, source)
             cwp = CorpusWordParser()
             wordusages = cwp.parse(listOfDicts)
             lens = {}
             for wordusage in wordusages:
                 totalWordUsages.append(wordusage.__dict__)
                 if wordusage.eventId in lens:
                     lens[wordusage.eventId] += 1
                 else:
                     lens[wordusage.eventId] = 1
             df = DataFrame(lens.values())
             print(df.quantile(1))
             quantileValues = df.quantile(.90)
             print(quantileValues)
             plot = Plot(lens.values(),
                         "%s wordcount histogram" % source,
                         xlabel="wordcount",
                         ylabel="frequency")
             plot.hist(mode='save')
         wordUsageDBFile = Lookup.getDBFile("wordusage")
         wSQLDB = SQLDB(wordUsageDBFile)
         entityInfo = wSQLDB.createTable(totalWordUsages,
                                         "wordusage",
                                         withDrop=True)
         wSQLDB.store(totalWordUsages, entityInfo)
Ejemplo n.º 22
0
 def getSampleTableDB(withDrop=False,debug=False,failIfTooFew=False,sampleSize=1000):
     listOfRecords=Sample.getSample(sampleSize)
     sqlDB=SQLDB()
     entityName="sample"
     primaryKey='pKey'
     sampleRecordCount=sampleSize*10
     sqlDB.debug=debug
     entityInfo=sqlDB.createTable(listOfRecords, entityName, primaryKey=primaryKey, withDrop=withDrop, sampleRecordCount=sampleRecordCount,failIfTooFew=failIfTooFew)
     executeMany=True
     fixNone=True
     sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
     return sqlDB
Ejemplo n.º 23
0
 def setSingleDBConfig(self, config):
     '''
     set the cacheFile to the singleDB "Event_all.db"
     Args:
         config(Storareconfig): the storage configuration to use
     '''
     # if event_all.db is available use it ...
     config.cacheFile = Lookup.getDBFile()
     # if cacheFile is not available
     if not os.path.isfile(config.cacheFile):
         config.errors = ['cachefile %s is missing' % config.cacheFile]
         config.cacheFile = None
     else:
         config.errors = self.check(SQLDB(config.cacheFile),
                                    debug=self.debug)
         # make sure the event_all db is complete
         if len(config.errors) > 0:
             config.cacheFile = None
         else:
             config.singleDB = True
Ejemplo n.º 24
0
 def testBackup(self):
     '''
     test creating a backup of the SQL database
     '''
     if sys.version_info >= (3, 7):
         listOfRecords=Sample.getCities()
         self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False)
         backupDB="/tmp/testSqlite.db"
         showProgress=200 if self.debug else 0
         self.sqlDB.backup(backupDB,profile=self.debug,showProgress=showProgress)
         size=os.stat(backupDB).st_size
         if self.debug:
             print ("size of backup DB is %d" % size)
         self.assertTrue(size>600000)
         self.sqlDB.close()
         # restore
         ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=self.debug,showProgress=showProgress)
         entityInfo=EntityInfo(listOfRecords[:50],'City',debug=self.debug)
         allCities=ramDB.queryAll(entityInfo)
         self.assertEqual(len(allCities),len(listOfRecords))
Ejemplo n.º 25
0
 def testQueryParams(self):
     '''
     test Query Params
     '''
     listOfDicts=[
         {"city": "New York", "country": "US"},
         {"city": "Amsterdam", "country": "NL"},
         {"city": "Paris", "country": "FR"}]
     sqlDB=SQLDB(debug=self.debug,errorDebug=True)
     entityInfo=sqlDB.createTable(listOfDicts[:10],'cities','city')
     sqlDB.store(listOfDicts,entityInfo,executeMany=False)
     query="SELECT * from cities WHERE country in (?)"
     params=('FR',)
     frCities=sqlDB.query(query,params)
     if self.debug:
         print (frCities);
     self.assertEqual([{'city': 'Paris', 'country': 'FR'}],frCities)
Ejemplo n.º 26
0
class Locator(object):
    '''
    location handling
    '''
    
    # singleton instance
    locator=None

    def __init__(self, db_file=None,correctMisspelling=False,debug=False):
        '''
        Constructor
        
        Args:
            db_file(str): the path to the database file
            correctMispelling(bool): if True correct typical misspellings
            debug(bool): if True show debug information
        '''
        self.debug=debug
        self.correctMisspelling=correctMisspelling
        self.db_path=os.path.dirname(os.path.realpath(__file__)) 
        self.db_file = db_file or self.db_path+"/locs.db"
        self.view="GeoLite2CityLookup"
        self.sqlDB=SQLDB(self.db_file,errorDebug=True)
        self.getAliases()
        self.dbVersion="2020-09-27 16:48:09"
        
    @staticmethod
    def resetInstance():
        Locator.locator=None    
    
    @staticmethod
    def getInstance(correctMisspelling=False,debug=False):
        '''
        get the singleton instance of the Locator. If parameters are changed on further calls
        the initial parameters will still be in effect since the original instance will be returned!
        
        Args:
            correctMispelling(bool): if True correct typical misspellings
            debug(bool): if True show debug information
        '''
        if Locator.locator is None:
            Locator.locator=Locator(correctMisspelling=correctMisspelling,debug=debug)
        return Locator.locator
        
    def locateCity(self,places):
        '''
        locate a city, region country combination based on the given wordtoken information
        
        Args:
            places(list): a list of places derived by splitting a locality e.g.  "San Francisco, CA"
            leads to "San Francisco", "CA"
        
        Returns:
            City: a city with country and region details
        '''
        # make sure the database is populated
        self.populate_db()
        country=None
        cities=[]
        regions=[]
        # loop over all word elements
        for place in places:
            place=place.strip()
            if place in self.aliases:
                place=self.aliases[place]
            foundCountry=self.getCountry(place)
            if foundCountry is not None:
                country=foundCountry
            foundCities=self.cities_for_name(place)
            cities.extend(foundCities)
            foundRegions=self.regions_for_name(place)
            regions.extend(foundRegions)
        foundCity=self.disambiguate(country, regions, cities)
        return foundCity
    
    def isISO(self,s):
        '''
        check if the given string is an ISO code
        
        Returns:
            bool: True if the string is an ISO Code
        '''
        m=re.search(r"^([A-Z]{1,2}\-)?[0-9A-Z]{1,3}$",s)
        result=m is not None
        return result
               
    def disambiguate(self,country,regions,cities,byPopulation=True): 
        '''
        try determining country, regions and city from the potential choices
        
        Args:
            country(Country): a matching country found
            regions(list): a list of matching Regions found
            cities(list): a list of matching cities found
            
        Return:
            City: the found city or None
        '''
        if self.debug:
            print("countries: %s " % country)
            print("regions: %s" % "\n\t".join(str(r) for r in regions))
            print("cities: %s" % "\n\t".join(str(c) for c in cities))
        foundCity=None
        # is the city information unique?
        if len(cities)==1:
            foundCity=cities[0]
        else: 
            if len(cities)>1:
                if country is not None:
                    for city in cities:
                        if self.debug:
                            print("city %s: " %(city))
                        if city.country.iso==country.iso:
                            foundCity=city
                            break
                if foundCity is None and len(regions)>0:
                    for region in regions:
                        for city in cities:
                            if city.region.iso==region.iso and not city.region.name==city.name:
                                foundCity=city
                                break;
                        if foundCity is not None:
                            break
                if foundCity is None and byPopulation:
                    foundCity=max(cities,key=lambda city:0 if city.population is None else city.population)
                    pass
                    
        return foundCity    
    
    def cities_for_name(self, cityName):
        '''
        find cities with the given cityName
        
        Args:
            cityName(string): the potential name of a city
        
        Returns:
            a list of city records
        '''
        cities=[]
        for column in ['name','wikidataName']:
            cityRecords=self.places_by_name(cityName, column)
            for cityRecord in cityRecords:
                cities.append(City.fromGeoLite2(cityRecord))
        return cities

    def regions_for_name(self, region_name):
        '''
        get the regions for the given region_name (which might be an ISO code)
        
        Args:
            region_name(string): region name
            
        Returns:
            list: the list of cities for this region
        '''
        regions=[]    
        if self.isISO(region_name):
            columnName="regionIsoCode"
        else:
            columnName='regionLabel'
        query="SELECT * from regions WHERE %s = (?)" % (columnName)
        params=(region_name,)
        regionRecords=self.sqlDB.query(query,params)
        for regionRecord in regionRecords:
            regions.append(Region.fromWikidata(regionRecord))
        return regions                     
    
    def correct_country_misspelling(self, name):
        '''
        correct potential misspellings 
        Args:
            name(string): the name of the country potentially misspelled
        Return:
            string: correct name of unchanged
        '''
        cur_dir = os.path.dirname(os.path.realpath(__file__))
        with open(cur_dir + "/data/ISO3166ErrorDictionary.csv") as info:
            reader = csv.reader(info)
            for row in reader:
                if name in remove_non_ascii(row[0]):
                    return row[2]
        return name

    def is_a_country(self, name):
        '''
        check if the given string name is a country
        
        Args:
            name(string): the string to check
        Returns:
            True: if pycountry thinks the string is a country
        '''
        country=self.getCountry(name)
        result=country is not None
        return result
       
    def getCountry(self,name):
        '''
        get the country for the given name    
        Args:
            name(string): the name of the country to lookup
        Returns:     
            country: the country if one was found or None if not
        '''
        if self.isISO(name):
            pcountry=pycountry.countries.get(alpha_2=name)
        else:
            if self.correctMisspelling:
                name = self.correct_country_misspelling(name)
            pcountry=pycountry.countries.get(name=name)
        country=None
        if pcountry is not None:
            country=Country.fromPyCountry(pcountry)
        #if country is None:
        #    query="SELECT * FROM countries WHERE countryLabel = (?)"""
        #    params=(name,)
        #    countryRecords=self.sqlDB.query(query,params)
        #    if len(countryRecords)>0:
        #        pass
        return country
    
    def getView(self):
        '''
        get the view to be used
        
        Returns:
            str: the SQL view to be used for CityLookups e.g. GeoLite2CityLookup
        '''
        view=self.view
        return view
 
    def places_by_name(self, placeName, columnName):
        '''
        get places by name and column
        Args:
            placeName(string): the name of the place
            columnName(string): the column to look at
        '''
        if not self.db_has_data():
            self.populate_db()
        view=self.getView()
        query='SELECT * FROM %s WHERE %s = (?)' % (view,columnName)
        params=(placeName,)
        cities=self.sqlDB.query(query,params)
        return cities
    
    def getGeolite2Cities(self):
        '''
        get the Geolite2 City-Locations as a list of Dicts
        
        Returns:
            list: a list of Geolite2 City-Locator dicts
        '''
        cities=self.readCSV("GeoLite2-City-Locations-en.csv")
        return cities
    
    def readCSV(self,fileName):
        records=[]
        cur_dir = os.path.dirname(os.path.realpath(__file__))
        csvfile="%s/data/%s" % (cur_dir,fileName)
        with open(csvfile) as info:
            reader = csv.DictReader(info)
            for row in reader:
                records.append(row)
        return records
     
    def recreateDatabase(self):
        '''
        recreate my lookup database
        '''
        print("recreating database ... %s" % self.db_file)
        self.populate_db(force=True)
                
    def populate_db(self,force=False):
        '''
        populate the cities SQL database which caches the information from the GeoLite2-City-Locations.csv file
        
        Args:
            force(bool): if True force a recreation of the database
        '''
        hasData=self.db_has_data()
        if force:
            self.populate_Cities(self.sqlDB)
            self.populateFromWikidata(self.sqlDB)
            self.getWikidataCityPopulation(self.sqlDB)
            self.createViews(self.sqlDB)
            self.populate_Version(self.sqlDB)
    
        elif not hasData:
            url="http://wiki.bitplan.com/images/confident/locs.db.gz"
            zipped=self.db_file+".gz"
            print("Downloading %s from %s ... this might take a few seconds" % (zipped,url))
            urllib.request.urlretrieve(url,zipped)
            print("unzipping %s from %s" % (self.db_file,zipped))
            with gzip.open(zipped, 'rb') as gzipped:
                with open(self.db_file, 'wb') as unzipped:
                    shutil.copyfileobj(gzipped, unzipped)
        if not os.path.isfile(self.db_file):
            raise("could not create lookup database %s" % self.db_file)
            
    def populate_Version(self,sqlDB):
        '''
        populate the version table
        
        Args:
            sqlDB(SQLDB): target SQL database
        '''
        versionList=[{"version":self.dbVersion}]
        entityInfo=sqlDB.createTable(versionList,"Version","version",withDrop=True)
        sqlDB.store(versionList,entityInfo)
        
    def getAliases(self):
        '''
        get the aliases hashTable
        '''
        aliases=self.readCSV("aliases.csv")
        self.aliases={}
        for alias in aliases:
            self.aliases[alias['name']]=alias['alias']
        
    def populateFromWikidata(self,sqlDB):
        '''
        populate countries and regions from Wikidata
        
        Args:
            sqlDB(SQLDB): target SQL database
        '''
        self.populate_Countries(sqlDB)
        self.populate_Regions(sqlDB)
        return
        # ignore the following code as of 2020-09-26
        self.populate_Cities_FromWikidata(sqlDB)
        viewDDLs=["DROP VIEW IF EXISTS WikidataCityLookup","""
CREATE VIEW WikidataCityLookup AS
SELECT 
  name AS name,
  regionLabel as regionName,
  regionIsoCode as regionIsoCode,
  countryLabel as countryName,
  countryIsoCode as countryIsoCode,
  cityPopulation as population,
  countryGDP_perCapita as gdp
FROM City_wikidata
"""]
#                  subdivision_1_name AS regionName,
#  subdivision_1_iso_code as regionIsoCode,
#  country_name AS countryName,
#  country_iso_code as countryIsoCode
        for viewDDL in viewDDLs:
            self.sqlDB.execute(viewDDL)
           
    def populate_Countries(self,sqlDB):
        '''
        populate database with countries from wikiData
        
        Args:
            sqlDB(SQLDB): target SQL database
        '''
        print("retrieving Country data from wikidata ... (this might take a few seconds)")
        wikidata=Wikidata()
        wikidata.getCountries()
        entityInfo=sqlDB.createTable(wikidata.countryList,"countries",None,withDrop=True,sampleRecordCount=200)
        sqlDB.store(wikidata.countryList,entityInfo,fixNone=True)

    def populate_Regions(self,sqlDB):
        '''
        populate database with regions from wikiData
        
        Args:
            sqlDB(SQLDB): target SQL database
        '''
        print("retrieving Region data from wikidata ... (this might take a minute)")
        wikidata=Wikidata()
        wikidata.getRegions()
        entityInfo=sqlDB.createTable(wikidata.regionList[:5000],"regions",primaryKey=None,withDrop=True)
        sqlDB.store(wikidata.regionList,entityInfo,fixNone=True)
   
    def populate_Cities_FromWikidata(self,sqlDB):
        '''
        populate the given sqlDB with the Wikidata Cities
        
        Args:
            sqlDB(SQLDB): target SQL database
        '''
        dbFile=self.db_path+"/City_wikidata.db"
        if not os.path.exists(dbFile):
            print("Downloading %s ... this might take a few seconds" % dbFile)
            dbUrl="http://wiki.bitplan.com/images/confident/City_wikidata.db"
            urllib.request.urlretrieve(dbUrl,dbFile)
        wikiCitiesDB=SQLDB(dbFile)
        wikiCitiesDB.copyTo(sqlDB)
        
    def getWikidataCityPopulation(self,sqlDB,endpoint=None):
        '''
        Args:
            sqlDB(SQLDB): target SQL database
            endpoint(str): url of the wikidata endpoint or None if default should be used
        '''
        dbFile=self.db_path+"/city_wikidata_population.db"
        rawTableName="cityPops"
        # is the wikidata population database available?
        if not os.path.exists(dbFile):
            # shall we created it from a wikidata query?
            if endpoint is not None:
                wikidata=Wikidata()
                wikidata.endpoint=endpoint
                cityList=wikidata.getCityPopulations()
                wikiCitiesDB=SQLDB(dbFile) 
                entityInfo=wikiCitiesDB.createTable(cityList[:300],rawTableName,primaryKey=None,withDrop=True)
                wikiCitiesDB.store(cityList,entityInfo,fixNone=True)
            else:
                # just download a copy 
                print("Downloading %s ... this might take a few seconds" % dbFile)
                dbUrl="http://wiki.bitplan.com/images/confident/city_wikidata_population.db"
                urllib.request.urlretrieve(dbUrl,dbFile)
        # (re) open the database
        wikiCitiesDB=SQLDB(dbFile) 
          
        # check whether the table is populated
        tableList=sqlDB.getTableList()        
        tableName="citiesWithPopulation"     
      
        if self.db_recordCount(tableList, tableName)<10000:
            # check that database is writable
            # https://stackoverflow.com/a/44707371/1497139
            sqlDB.execute("pragma user_version=0")
            # makes sure both tables are in target sqlDB
            wikiCitiesDB.copyTo(sqlDB)
            # create joined table
            sqlQuery="""
              select 
    geoname_id,
    city_name,
    cp.cityLabel,
    country_iso_code,
    country_name,
    subdivision_1_iso_code,
    subdivision_1_name,
    cp.city as wikidataurl,
    cp.cityPop 
  from cities c 
  join cityPops cp 
  on c.geoname_id=cp.geoNameId 
union  
  select 
    geoNameId as geoname_id,
    null as city_name,
    cityLabel,
    countryIsoCode as country_iso_code,
    countryLabel as country_name,
    null as subdivision_1_iso_code,
    null as subdivision_1_name,
    city as wikidataurl,
    cityPop 
  from cityPops 
  where cityPop is not Null
group by geoNameId
order by cityPop desc
            """
            cityList=sqlDB.query(sqlQuery) 
            entityInfo=sqlDB.createTable(cityList,tableName,primaryKey=None,withDrop=True,sampleRecordCount=500)
            sqlDB.store(cityList,entityInfo,fixNone=True)
            # remove raw Table
            #sqlCmd="DROP TABLE %s " %rawTableName
            #sqlDB.execute(sqlCmd)
            
     
    def populate_Cities(self,sqlDB):
        '''
        populate the given sqlDB with the Geolite2 Cities
        
        Args:
            sqlDB(SQLDB): the SQL database to use
        '''
        cities=self.getGeolite2Cities()
        entityName="cities"
        primaryKey="geoname_id"
        entityInfo=sqlDB.createTable(cities[:100],entityName,primaryKey,withDrop=True)
        sqlDB.store(cities,entityInfo,executeMany=False)
        
    def createViews(self,sqlDB):
        viewDDLs=["DROP VIEW IF EXISTS GeoLite2CityLookup","""
CREATE VIEW GeoLite2CityLookup AS
SELECT 
  city_name AS name,
  cityLabel AS wikidataName,
  wikidataurl,
  cityPop,
  subdivision_1_name AS regionName,
  subdivision_1_iso_code as regionIsoCode,
  country_name AS countryName,
  country_iso_code as countryIsoCode

FROM citiesWithPopulation
"""]
        for viewDDL in viewDDLs:
            sqlDB.execute(viewDDL)
        
    
    def db_recordCount(self,tableList,tableName):
        '''
        count the number of records for the given tableName
        
        Args:
            tableList(list): the list of table to check
            tableName(str): the name of the table to check
            
        Returns
            int: the number of records found for the table 
        '''
        tableFound=False
        for table in tableList:
            if table['name']==tableName:
                tableFound=True
                break
        count=0
        if tableFound:    
            query="SELECT Count(*) AS count FROM %s" % tableName
            countResult=self.sqlDB.query(query)
            count=countResult[0]['count']
        return count
     
    def db_has_data(self):
        '''
        check whether the database has data / is populated
        
        Returns:
            boolean: True if the cities table exists and has more than one record
        '''
        tableList=self.sqlDB.getTableList()
        hasCities=self.db_recordCount(tableList,"citiesWithPopulation")>10000
        hasCountries=self.db_recordCount(tableList,"countries")>100
        hasRegions=self.db_recordCount(tableList,"regions")>1000
        hasVersion=self.db_recordCount(tableList,"Version")==1
        versionOk=False
        if hasVersion:
            query="SELECT version from Version"
            dbVersionList=self.sqlDB.query(query)
            versionOk=dbVersionList[0]['version']==self.dbVersion
        #hasWikidataCities=self.db_recordCount(tableList,'City_wikidata')>100000
        ok=hasVersion and versionOk and hasCities and hasRegions and hasCountries
        return ok
Ejemplo n.º 27
0
    def getWikidataCityPopulation(self,sqlDB,endpoint=None):
        '''
        Args:
            sqlDB(SQLDB): target SQL database
            endpoint(str): url of the wikidata endpoint or None if default should be used
        '''
        dbFile=self.db_path+"/city_wikidata_population.db"
        rawTableName="cityPops"
        # is the wikidata population database available?
        if not os.path.exists(dbFile):
            # shall we created it from a wikidata query?
            if endpoint is not None:
                wikidata=Wikidata()
                wikidata.endpoint=endpoint
                cityList=wikidata.getCityPopulations()
                wikiCitiesDB=SQLDB(dbFile) 
                entityInfo=wikiCitiesDB.createTable(cityList[:300],rawTableName,primaryKey=None,withDrop=True)
                wikiCitiesDB.store(cityList,entityInfo,fixNone=True)
            else:
                # just download a copy 
                print("Downloading %s ... this might take a few seconds" % dbFile)
                dbUrl="http://wiki.bitplan.com/images/confident/city_wikidata_population.db"
                urllib.request.urlretrieve(dbUrl,dbFile)
        # (re) open the database
        wikiCitiesDB=SQLDB(dbFile) 
          
        # check whether the table is populated
        tableList=sqlDB.getTableList()        
        tableName="citiesWithPopulation"     
      
        if self.db_recordCount(tableList, tableName)<10000:
            # check that database is writable
            # https://stackoverflow.com/a/44707371/1497139
            sqlDB.execute("pragma user_version=0")
            # makes sure both tables are in target sqlDB
            wikiCitiesDB.copyTo(sqlDB)
            # create joined table
            sqlQuery="""
              select 
    geoname_id,
    city_name,
    cp.cityLabel,
    country_iso_code,
    country_name,
    subdivision_1_iso_code,
    subdivision_1_name,
    cp.city as wikidataurl,
    cp.cityPop 
  from cities c 
  join cityPops cp 
  on c.geoname_id=cp.geoNameId 
union  
  select 
    geoNameId as geoname_id,
    null as city_name,
    cityLabel,
    countryIsoCode as country_iso_code,
    countryLabel as country_name,
    null as subdivision_1_iso_code,
    null as subdivision_1_name,
    city as wikidataurl,
    cityPop 
  from cityPops 
  where cityPop is not Null
group by geoNameId
order by cityPop desc
            """
            cityList=sqlDB.query(sqlQuery) 
            entityInfo=sqlDB.createTable(cityList,tableName,primaryKey=None,withDrop=True,sampleRecordCount=500)
            sqlDB.store(cityList,entityInfo,fixNone=True)
Ejemplo n.º 28
0
    def main(cls, args):
        '''
        command line activation with parsed args
        
        Args:
            args(list): the command line arguments
        '''
        debug = args.debug
        endpoints = EndpointManager.getEndpoints(args.endpointPath)
        qm = QueryManager(lang=args.language,
                          debug=debug,
                          queriesPath=args.queriesPath)
        query = None
        queryCode = args.query
        endpointConf = None
        formats = None
        # preload ValueFormatter
        ValueFormatter.getFormats(args.formatsPath)
        if args.list:
            for name, query in qm.queriesByName.items():
                print(f"{name}:{query.title}")
        elif args.listEndpoints:
            # list endpoints
            for endpoint in endpoints.values():
                if hasattr(endpoint,
                           "lang") and endpoint.lang == args.language:
                    print(endpoint)

        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            query = qm.queriesByName[args.queryName]
            formats = query.formats
            queryCode = query.query
            if debug or args.showQuery:
                if hasattr(query,
                           "description") and query.description is not None:
                    print(query.description)
        if query is None:
            name = "?"
            if queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            query = Query(name="?", query=queryCode, lang=args.language)
        if queryCode:
            if debug or args.showQuery:
                print(f"{args.language}:\n{queryCode}")
            if args.endpointName:
                endpointConf = endpoints.get(args.endpointName)
            if args.language == "sparql":
                method = 'POST'
                if args.endpointName:
                    endPointUrl = endpointConf.endpoint
                    method = endpointConf.method
                    query.tryItUrl = endpointConf.website
                    query.database = endpointConf.database
                else:
                    endPointUrl = query.endpoint
                if args.method:
                    method = method
                sparql = SPARQL(endPointUrl, method=method)
                if args.prefixes and endpointConf is not None:
                    queryCode = f"{endpointConf.prefixes}\n{queryCode}"
                if args.raw:
                    qres = cls.rawQuery(endPointUrl,
                                        query=query.query,
                                        resultFormat=args.format,
                                        mimeType=args.mimeType)
                    print(qres)
                    return
                if "wikidata" in args.endpointName and formats is None:
                    formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(queryCode)
            elif args.language == "sql":
                sqlDB = SQLDB(endpointConf.endpoint)
                qlod = sqlDB.query(queryCode)
            else:
                raise Exception(
                    f"language {args.language} not known/supported")
            if args.format is Format.csv:
                csv = CSV.toCSV(qlod)
                print(csv)
            elif args.format in [
                    Format.latex, Format.github, Format.mediawiki
            ]:
                doc = query.documentQueryResult(qlod,
                                                tablefmt=str(args.format),
                                                floatfmt=".0f")
                docstr = doc.asText()
                print(docstr)
            elif args.format in [Format.json
                                 ] or args.format is None:  # set as default
                # https://stackoverflow.com/a/36142844/1497139
                print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
            elif args.format in [Format.xml]:
                lod2xml = Lod2Xml(qlod)
                xml = lod2xml.asXml()
                print(xml)

            else:
                raise Exception(f"format {args.format} not supported yet")
Ejemplo n.º 29
0
class TestSQLDB(Basetest):
    '''
    Test the SQLDB database wrapper
    '''
    
    def checkListOfRecords(self,listOfRecords,entityName,primaryKey=None,executeMany=True,fixNone=False,fixDates=False,debug=False,doClose=True):
        '''
        check the handling of the given list of Records
        
        Args:
          
           listOfRecords(list): a list of dicts that contain the data to be stored
           entityName(string): the name of the entity type to be used as a table name
           primaryKey(string): the name of the key / column to be used as a primary key
           executeMany(boolean): True if executeMany mode of sqlite3 should be used
           fixNone(boolean): fix dict entries that are undefined to have a "None" entry
           debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
           doClose(boolean): True if the connection should be closed
      
        '''     
        size=len(listOfRecords)
        if self.debug:
            print("%s size is %d fixNone is %r fixDates is: %r" % (entityName,size,fixNone,fixDates))
        self.sqlDB=SQLDB(debug=debug,errorDebug=True)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
        startTime=time.time()
        self.sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
        elapsed=time.time()-startTime
        if self.debug:
            print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed)) 
        resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)    
        if self.debug:
            print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed)) 
        if doClose:
            self.sqlDB.close()
        return resultList
    
    def testEntityInfo(self):
        '''
        test creating entityInfo from the sample record
        '''
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True)
        self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd)
        self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd)
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        tableList=self.sqlDB.getTableList()
        if self.debug:
            print (tableList)
        self.assertEqual(1,len(tableList))
        personTable=tableList[0]
        self.assertEqual("Person",personTable['name'])
        self.assertEqual(7,len(personTable['columns']))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False)
        if self.debug:
            print(plantUml)
        expected="""package Royals {
  class Person << Entity >> {
   age : FLOAT 
   born : DATE 
   lastmodified : TIMESTAMP 
   name : TEXT <<PK>>
   numberInLine : INTEGER 
   ofAge : BOOLEAN 
   wikidataurl : TEXT 
  }
}
"""
        self.assertEqual(expected,plantUml)
        
        # testGeneralization
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        self.assertEqual(2,len(tableList))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False)
        if self.debug:
            print(plantUml)
        expected='''class PersonBase << Entity >> {
 lastmodified : TIMESTAMP 
 name : TEXT <<PK>>
}
class Person << Entity >> {
 age : FLOAT 
 born : DATE 
 numberInLine : INTEGER 
 ofAge : BOOLEAN 
 wikidataurl : TEXT 
}
class Family << Entity >> {
 country : TEXT 
}
PersonBase <|-- Person
PersonBase <|-- Family
'''
        self.assertEqual(expected,plantUml)
        
    def testIssue15(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/15
        
        auto create view ddl in mergeschema
        
        '''
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=self.debug)
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=self.debug)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        viewDDL=Schema.getGeneralViewDDL(tableList,"PersonBase")
        if self.debug:
            print (viewDDL)
        expected="""CREATE VIEW PersonBase AS 
  SELECT name,lastmodified FROM Person
UNION
  SELECT name,lastmodified FROM Family"""
        self.assertEqual(expected,viewDDL)
        pass
        
    def testUniqueConstraint(self):
        '''
        test for https://github.com/WolfgangFahl/pyLoDStorage/issues/4
        sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info
        '''
        listOfDicts=[
            {"name": "John Doe"},
            {"name": "Frank Doe"}, 
            {"name": "John Doe"}, 
            {"name":"Tim Doe"}]
        sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=sqlDB.createTable(listOfDicts[:10],'Does','name')
        try:
            sqlDB.store(listOfDicts,entityInfo,executeMany=False)
            self.fail("There should be an exception")
        except Exception as ex:
            expected="""INSERT INTO Does (name) values (:name)
failed:UNIQUE constraint failed: Does.name
record  #3={'name': 'John Doe'}"""
            errMsg=str(ex)
            self.assertEqual(expected,errMsg)
            
    def testSqlite3(self):
        '''
        test sqlite3 with a few records from the royal family
        '''
        listOfRecords=Sample.getRoyals()
        resultList=self.checkListOfRecords(listOfRecords, 'Person', 'name',debug=True)
        if self.debug:
            print(resultList)
        self.assertEqual(listOfRecords,resultList)
       
    def testIssue13_setNoneValue(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/13
        set None value for undefined LoD entries
        '''
        listOfRecords=[
            { 'make': 'Ford','model':'Model T', 'color':'black'},
            { 'make': 'VW', 'model':'beetle'}
        ]
        entityName="Car"
        primaryKey="Model"
        resultList=self.checkListOfRecords(listOfRecords, entityName, primaryKey,fixNone=True)
        if self.debug:
            print (resultList)
            
    def testIssue14_execute(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/14
        
        offer execute wrapper directly via sqlDB
        '''
        sqlDB=SQLDB()
        ddl="""
        CREATE TABLE contacts (
            contact_id INTEGER PRIMARY KEY,
            first_name TEXT NOT NULL,
            last_name TEXT NOT NULL
        )
        """
        sqlDB.execute(ddl)
        tableList=sqlDB.getTableList()
        if self.debug:
            print(tableList)
        self.assertEqual(1,len(tableList))
        self.assertEqual("contacts",tableList[0]['name'])
        
    def testIssue41(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/41
        improve error message when create table command fails
        '''
        listOfRecords=[{
            'name':'value',
            'py/object': 'datetime.time'
        }]
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        try:
            _entityInfo=self.sqlDB.createTable(listOfRecords[:1],'Invalid','name')
            self.fail("There should be an exception")
        except Exception as ex:
            self.assertTrue("CREATE TABLE Invalid" in str(ex))
         
    def testBindingError(self):
        '''
        test list of Records with incomplete record leading to
        "You did not supply a value for binding 2"
        see https://bugs.python.org/issue41638
        '''
        listOfRecords=[{'name':'Pikachu', 'type':'Electric'},{'name':'Raichu' }]
        for executeMany in [True,False]:
            try:
                self.checkListOfRecords(listOfRecords,'Pokemon','name',executeMany=executeMany)
                self.fail("There should be an exception")
            except Exception as ex:
                if self.debug:
                    print(str(ex))
                self.assertTrue('no value supplied for column' in str(ex))                                                         
        
    def testListOfCities(self):
        '''
        test sqlite3 with some 120000 city records
        '''
        listOfRecords=Sample.getCities()
        for fixDates in [True,False]:
            retrievedList=self.checkListOfRecords(listOfRecords,'City',fixDates=fixDates)
            self.assertEqual(len(listOfRecords),len(retrievedList))
            
    def testQueryParams(self):
        '''
        test Query Params
        '''
        listOfDicts=[
            {"city": "New York", "country": "US"},
            {"city": "Amsterdam", "country": "NL"},
            {"city": "Paris", "country": "FR"}]
        sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=sqlDB.createTable(listOfDicts[:10],'cities','city')
        sqlDB.store(listOfDicts,entityInfo,executeMany=False)
        query="SELECT * from cities WHERE country in (?)"
        params=('FR',)
        frCities=sqlDB.query(query,params)
        if self.debug:
            print (frCities);
        self.assertEqual([{'city': 'Paris', 'country': 'FR'}],frCities)
        
    def testSqllite3Speed(self):
        '''
        test sqlite3 speed with some 100000 artificial sample records
        consisting of two columns with a running index
        '''
        limit=100000
        listOfRecords=Sample.getSample(limit)
        self.checkListOfRecords(listOfRecords, 'Sample', 'pKey')  
        
    def testIssue87AllowUsingQueryWithGenerator(self):
        '''
        test the query gen approach
        '''   
        debug=self.debug
        #debug=True
        sqlDB=self.getSampleTableDB(sampleSize=5)
        sqlQuery="select * FROM sample"
        for cindex,record in enumerate(sqlDB.queryGen(sqlQuery)):
            if debug:
                print(record)
            self.assertEqual(cindex,record["cindex"])

    def testBackup(self):
        '''
        test creating a backup of the SQL database
        '''
        if sys.version_info >= (3, 7):
            listOfRecords=Sample.getCities()
            self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False)
            backupDB="/tmp/testSqlite.db"
            showProgress=200 if self.debug else 0
            self.sqlDB.backup(backupDB,profile=self.debug,showProgress=showProgress)
            size=os.stat(backupDB).st_size
            if self.debug:
                print ("size of backup DB is %d" % size)
            self.assertTrue(size>600000)
            self.sqlDB.close()
            # restore
            ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=self.debug,showProgress=showProgress)
            entityInfo=EntityInfo(listOfRecords[:50],'City',debug=self.debug)
            allCities=ramDB.queryAll(entityInfo)
            self.assertEqual(len(allCities),len(listOfRecords))
            
    def testCopy(self):
        '''
        test copying databases into another database
        '''
        dbFile="/tmp/DAWT_Sample3x1000.db"
        copyDB=SQLDB(dbFile)
        for sampleNo in range(3):
            listOfRecords=Sample.getSample(1000)
            self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False)  
            self.sqlDB.copyTo(copyDB)
        size=os.stat(dbFile).st_size
        if self.debug:
            print ("size of copy DB is %d" % size)
        self.assertTrue(size>70000)
        tableList=copyDB.getTableList()
        if self.debug:
            print(tableList)
        for sampleNo in range(3):
            self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name'])
        # check that database is writable
        # https://stackoverflow.com/a/44707371/1497139
        copyDB.execute("pragma user_version=0")
        
    @staticmethod
    def getSampleTableDB(withDrop=False,debug=False,failIfTooFew=False,sampleSize=1000):
        listOfRecords=Sample.getSample(sampleSize)
        sqlDB=SQLDB()
        entityName="sample"
        primaryKey='pKey'
        sampleRecordCount=sampleSize*10
        sqlDB.debug=debug
        entityInfo=sqlDB.createTable(listOfRecords, entityName, primaryKey=primaryKey, withDrop=withDrop, sampleRecordCount=sampleRecordCount,failIfTooFew=failIfTooFew)
        executeMany=True
        fixNone=True
        sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
        return sqlDB
        
    def testIssue16(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/16
        allow to only warn if samplerecordcount is higher than number of available records
        '''
        self.getSampleTableDB(withDrop=False, debug=True,failIfTooFew=False)
        try:
            self.getSampleTableDB(withDrop=True, debug=True,failIfTooFew=True)
            self.fail("There should be an exception that too few sample records where provided")
        except Exception as ex:
            self.assertTrue("only 1000/10000 of needed sample records to createTable available" in str(ex))
        
    def testIssue18(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/18
        '''
        sqlDB=self.getSampleTableDB()
        tableDict=sqlDB.getTableDict()
        if self.debug:
            print (tableDict)
        self.assertTrue("sample" in tableDict)
        cols=tableDict["sample"]["columns"]
        self.assertTrue("pkey" in cols)
Ejemplo n.º 30
0
    def testEntityInfo(self):
        '''
        test creating entityInfo from the sample record
        '''
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True)
        self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd)
        self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd)
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        tableList=self.sqlDB.getTableList()
        if self.debug:
            print (tableList)
        self.assertEqual(1,len(tableList))
        personTable=tableList[0]
        self.assertEqual("Person",personTable['name'])
        self.assertEqual(7,len(personTable['columns']))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False)
        if self.debug:
            print(plantUml)
        expected="""package Royals {
  class Person << Entity >> {
   age : FLOAT 
   born : DATE 
   lastmodified : TIMESTAMP 
   name : TEXT <<PK>>
   numberInLine : INTEGER 
   ofAge : BOOLEAN 
   wikidataurl : TEXT 
  }
}
"""
        self.assertEqual(expected,plantUml)
        
        # testGeneralization
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        self.assertEqual(2,len(tableList))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False)
        if self.debug:
            print(plantUml)
        expected='''class PersonBase << Entity >> {
 lastmodified : TIMESTAMP 
 name : TEXT <<PK>>
}
class Person << Entity >> {
 age : FLOAT 
 born : DATE 
 numberInLine : INTEGER 
 ofAge : BOOLEAN 
 wikidataurl : TEXT 
}
class Family << Entity >> {
 country : TEXT 
}
PersonBase <|-- Person
PersonBase <|-- Family
'''
        self.assertEqual(expected,plantUml)