def store(self, cacheFileName='Event_all'):
     '''
     store my contents to the given cacheFileName - 
     implemented as SQL storage
     
     Args:
         cacheFileName(string): the path to the database
     '''
     dbfile = Lookup.getDBFile(cacheFileName)
     # remove existing database dump if it exists
     if os.path.exists(dbfile):
         os.remove(dbfile)
     backup = SQLDB(dbfile)
     errors = []
     print("storing %s to %s" % (self.name, dbfile))
     # debugging of CREATE TABLE
     # backup.c.execute("CREATE TABLE Event_test(foundBy TEXT)")
     for em in self.ems:
         if not em.config.mode is StoreMode.SQL:
             raise Exception(
                 "lookup store only support SQL storemode but found %s for %s"
                 % (em.config.mode, em.name))
         else:
             cacheFile = em.getCacheFile(config=em.config,
                                         mode=StoreMode.SQL)
             sqlDB = em.getSQLDB(cacheFile)
             sqlDB.copyTo(backup)
     backup.close()
     return errors
Example #2
0
 def checkListOfRecords(self,listOfRecords,entityName,primaryKey=None,executeMany=True,fixNone=False,fixDates=False,debug=False,doClose=True):
     '''
     check the handling of the given list of Records
     
     Args:
       
        listOfRecords(list): a list of dicts that contain the data to be stored
        entityName(string): the name of the entity type to be used as a table name
        primaryKey(string): the name of the key / column to be used as a primary key
        executeMany(boolean): True if executeMany mode of sqlite3 should be used
        fixNone(boolean): fix dict entries that are undefined to have a "None" entry
        debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
        doClose(boolean): True if the connection should be closed
   
     '''     
     size=len(listOfRecords)
     if self.debug:
         print("%s size is %d fixNone is %r fixDates is: %r" % (entityName,size,fixNone,fixDates))
     self.sqlDB=SQLDB(debug=debug,errorDebug=True)
     entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
     startTime=time.time()
     self.sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
     elapsed=time.time()-startTime
     if self.debug:
         print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed)) 
     resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)    
     if self.debug:
         print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed)) 
     if doClose:
         self.sqlDB.close()
     return resultList
Example #3
0
 def getSqlDB():
     '''
     get the SQlite database connection
     '''
     config=DMSStorage.getStorageConfig(mode="sql")
     # https://stackoverflow.com/a/48234567/1497139
     sqlDB=SQLDB(config.cacheFile,check_same_thread=False)
     return sqlDB
 def getWordUsageDB(self):
     '''
     get the Word usage database
     '''
     wordUsageDBFile = Lookup.getDBFile("wordusage")
     if os.path.isfile(wordUsageDBFile):
         wSQLDB = SQLDB(wordUsageDBFile)
         return wSQLDB
     return None
 def copyFrom(self, dbFile):
     '''
     copy the contents of the given database file to my database
     Args:
         dbFile(string): the database to copy the content from
     '''
     sqlDB = self.getSQLDB()
     sourceDB = SQLDB(dbFile)
     sourceDB.copyTo(sqlDB)
Example #6
0
 def getDocumentRecords(self,archiveName,folderPath):
     '''
     get the document records
     '''
     sqlDB=SQLDB(self.getCacheFile())
     sqlQuery="SELECT * FROM document WHERE archiveName=(?) AND folderPath=(?)"
     params=(archiveName,folderPath,)
     dictList=sqlDB.query(sqlQuery, params)
     return dictList
Example #7
0
    def testGeoTextAndGrapy(self):
        '''
        test the GeoText and geograpy3 library
        '''
        debug = True
        limit = 100
        sqlQuery = """select count(*) as count,
locality from Event_wikicfp
where locality is not null
group by locality
order by 1 desc
LIMIT %d
""" % limit
        dbFile = Lookup.getDBFile()
        if os.path.isfile(dbFile):
            sqlDB = SQLDB(dbFile)
        else:
            lookup = Lookup.ensureAllIsAvailable("testGeoText")
            sqlDB = lookup.getSQLDB()
        if sqlDB is not None:
            print("testGeoText from database %s " % sqlDB.dbname)
            totalResult = sqlDB.query("""select count(*) as count
  from event_wikicfp
  where locality is not null""")
            total = totalResult[0]['count']
            listOfDicts = sqlDB.query(sqlQuery)
            index = 0
            rsum = 0
            found = 0
            problems = []
            for record in listOfDicts:
                locality = record['locality']
                count = record['count']
                index += 1
                rsum += count
                print("%5d: %5d/%5d %5.1f%%=%s" %
                      (index, count, rsum, rsum / total * 100, locality))
                geo = GeoText(locality)
                if debug:
                    print("  %s" % geo.countries)
                    print("  %s" % geo.cities)
                city = geograpy.locateCity(locality)
                if city is not None:
                    found += 1
                else:
                    problems.append(locality)
                if debug:
                    print("  %s%s" % (city, '✅' if city is not None else '❌'))
            if self.debug:
                print("found %d/%d = %5.1f%%" %
                      (found, limit, found / limit * 100))
                print("problems: %s" % problems)
            self.assertTrue(found / limit > 0.8)
        pass
 def getSQLDB(self, cacheFile):
     '''
     get the SQL database for the given cacheFile
     
     Args:
         cacheFile(string): the file to get the SQL db from
     '''
     config = self.config
     sqldb = self.sqldb = SQLDB(cacheFile,
                                debug=config.debug,
                                errorDebug=config.errorDebug)
     return sqldb
Example #9
0
 def getSampleTableDB(withDrop=False,debug=False,failIfTooFew=False,sampleSize=1000):
     listOfRecords=Sample.getSample(sampleSize)
     sqlDB=SQLDB()
     entityName="sample"
     primaryKey='pKey'
     sampleRecordCount=sampleSize*10
     sqlDB.debug=debug
     entityInfo=sqlDB.createTable(listOfRecords, entityName, primaryKey=primaryKey, withDrop=withDrop, sampleRecordCount=sampleRecordCount,failIfTooFew=failIfTooFew)
     executeMany=True
     fixNone=True
     sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
     return sqlDB
Example #10
0
 def getSQLDB(self, cacheFileName='Event_all'):
     '''
     get the SQLDB
     
     Args:
         cacheFileName(string): prefix of database file name
         
     Returns:
         SQLDB: the SQLDB access
     '''
     dbfile = Lookup.getDBFile(cacheFileName)
     sqlDB = SQLDB(dbfile)
     return sqlDB
Example #11
0
 def testWikidataCities(self):
     '''
     test getting city information from wikidata
     
     '''
     # Wikidata time outs in CI environment need to be avoided
     if getpass.getuser() != "wf":
         return
     config = StorageConfig.getSQL(debug=self.debug)
     config.cacheRootDir = "/tmp/wdhs"
     cachedir = config.getCachePath()
     config.cacheFile = f"{cachedir}/hs.db"
     # use 2018 wikidata copy
     # wikidata.endpoint="http://blazegraph.bitplan.com/sparql"
     # use 2020 wikidata copy
     wikidata = Wikidata()
     wikidata.endpoint = "https://confident.dbis.rwth-aachen.de/jena/wdhs/sparql"
     #wikidata.endpoint="http://jena.bitplan.com/wdhs/sparql"
     regions = [{
         "name": "Singapore",
         "country": "Q334",
         "region": None,
         "cities": 46
     }, {
         "name": "Beijing",
         "country": None,
         "region": "Q956",
         "cities": 25
     }, {
         "name": "Paris",
         "country": None,
         "region": "Q13917",
         "cities": 1242
     }, {
         "name": "Barcelona",
         "country": None,
         "region": "Q5705",
         "cities": 1242
     }, {
         "name": "Rome",
         "country": None,
         "region": "Q1282",
         "cities": 1242
     }]
     limit = 1000000  #if self.inCI() else 100
     cityList = wikidata.getCities(limit=limit)
     sqlDB = SQLDB(config.cacheFile)
     entityInfo = sqlDB.createTable(cityList, "hs", withDrop=True)
     sqlDB.store(cityList, entityInfo, fixNone=True)
     expected = 200000  # if self.inCI() else limit
     self.assertTrue(len(cityList) >= expected)
Example #12
0
 def populate_Cities_FromWikidata(self,sqlDB):
     '''
     populate the given sqlDB with the Wikidata Cities
     
     Args:
         sqlDB(SQLDB): target SQL database
     '''
     dbFile=self.db_path+"/City_wikidata.db"
     if not os.path.exists(dbFile):
         print("Downloading %s ... this might take a few seconds" % dbFile)
         dbUrl="http://wiki.bitplan.com/images/confident/City_wikidata.db"
         urllib.request.urlretrieve(dbUrl,dbFile)
     wikiCitiesDB=SQLDB(dbFile)
     wikiCitiesDB.copyTo(sqlDB)
Example #13
0
 def testIssue41(self):
     '''
     https://github.com/WolfgangFahl/pyLoDStorage/issues/41
     improve error message when create table command fails
     '''
     listOfRecords=[{
         'name':'value',
         'py/object': 'datetime.time'
     }]
     self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
     try:
         _entityInfo=self.sqlDB.createTable(listOfRecords[:1],'Invalid','name')
         self.fail("There should be an exception")
     except Exception as ex:
         self.assertTrue("CREATE TABLE Invalid" in str(ex))
Example #14
0
 def testQueryParams(self):
     '''
     test Query Params
     '''
     listOfDicts=[
         {"city": "New York", "country": "US"},
         {"city": "Amsterdam", "country": "NL"},
         {"city": "Paris", "country": "FR"}]
     sqlDB=SQLDB(debug=self.debug,errorDebug=True)
     entityInfo=sqlDB.createTable(listOfDicts[:10],'cities','city')
     sqlDB.store(listOfDicts,entityInfo,executeMany=False)
     query="SELECT * from cities WHERE country in (?)"
     params=('FR',)
     frCities=sqlDB.query(query,params)
     if self.debug:
         print (frCities);
     self.assertEqual([{'city': 'Paris', 'country': 'FR'}],frCities)
Example #15
0
 def __init__(self, db_file=None,correctMisspelling=False,debug=False):
     '''
     Constructor
     
     Args:
         db_file(str): the path to the database file
         correctMispelling(bool): if True correct typical misspellings
         debug(bool): if True show debug information
     '''
     self.debug=debug
     self.correctMisspelling=correctMisspelling
     self.db_path=os.path.dirname(os.path.realpath(__file__)) 
     self.db_file = db_file or self.db_path+"/locs.db"
     self.view="GeoLite2CityLookup"
     self.sqlDB=SQLDB(self.db_file,errorDebug=True)
     self.getAliases()
     self.dbVersion="2020-09-27 16:48:09"
Example #16
0
    def testCacheLocationLabels(self):
        '''
        Generates the location label tabels in the SQL db fro countries, regions and cities by querying wikidata for
        the rdfs:label and skos:altLa of each location.
        A view containing all location labels is also created.
        '''
        testLocationLabelExtraction = False
        if testLocationLabelExtraction:
            wd = Wikidata()
            config = LocationContext.getDefaultConfig()
            countryManager = CountryManager(config=config)
            regionManager = RegionManager(config=config)
            cityManager = CityManager(config=config)
            sqlDb = SQLDB(dbname=config.cacheFile, debug=self.debug)
            for manager in countryManager, regionManager, cityManager:
                manager.fromCache()
                wikidataIdQuery = f"SELECT DISTINCT wikidataid FROM {manager.entityPluralName}"
                wikidataIdQueryRes = sqlDb.query(wikidataIdQuery)
                wikidataIds = [l['wikidataid'] for l in wikidataIdQueryRes]

                chunkSize = 1000
                iterations = math.ceil(len(wikidataIds) / chunkSize)
                progress = 0
                res = []
                for i in range(iterations):
                    workOnIds = wikidataIds[i * chunkSize:(i + 1) * chunkSize]
                    progress += len(workOnIds)
                    index = 0
                    values = ""
                    for location in workOnIds:
                        spacer = "  \n\t\t\t" if index % 10 == 0 else " "
                        values += f"{spacer}wd:{wd.getWikidataId(location)}"
                        index += 1
                    query = self.getLablesQuery(values)
                    res.extend(
                        wd.query(
                            f"Query {i}/{iterations} - Querying {manager.entityName} Labels",
                            queryString=query))
                wd.store2DB(res,
                            tableName=f"{manager.entityName}_labels",
                            sqlDB=sqlDb)
            self.createViews(sqlDB=sqlDb)
Example #17
0
 def setSingleDBConfig(self, config):
     '''
     set the cacheFile to the singleDB "Event_all.db"
     Args:
         config(Storareconfig): the storage configuration to use
     '''
     # if event_all.db is available use it ...
     config.cacheFile = Lookup.getDBFile()
     # if cacheFile is not available
     if not os.path.isfile(config.cacheFile):
         config.errors = ['cachefile %s is missing' % config.cacheFile]
         config.cacheFile = None
     else:
         config.errors = self.check(SQLDB(config.cacheFile),
                                    debug=self.debug)
         # make sure the event_all db is complete
         if len(config.errors) > 0:
             config.cacheFile = None
         else:
             config.singleDB = True
Example #18
0
 def testIssue14_execute(self):
     '''
     https://github.com/WolfgangFahl/pyLoDStorage/issues/14
     
     offer execute wrapper directly via sqlDB
     '''
     sqlDB=SQLDB()
     ddl="""
     CREATE TABLE contacts (
         contact_id INTEGER PRIMARY KEY,
         first_name TEXT NOT NULL,
         last_name TEXT NOT NULL
     )
     """
     sqlDB.execute(ddl)
     tableList=sqlDB.getTableList()
     if self.debug:
         print(tableList)
     self.assertEqual(1,len(tableList))
     self.assertEqual("contacts",tableList[0]['name'])
Example #19
0
    def testPlantUml(self):
        '''
        get plant UML functionality 
        '''
        schemaManager = None
        if getpass.getuser() != "travis":
            o = Ontology()
            schemaManager = o.getRQSchema(
                fromCache=False)  # to force SMW query

        lookup = Lookup("plantuml", getAll=False, butNot='or')
        dbfile = lookup.getDBFile('Event_all')
        sqlDB = SQLDB(dbfile)
        tableList = sqlDB.getTableList()
        eventTableList = []
        eventSchemas = lookup.getEventSchemas()
        for table in tableList:
            tableName = table['name']
            if tableName.startswith("Event_"):
                table['schema'] = eventSchemas[tableName]
                eventTableList.append(table)
                countQuery = "SELECT count(*) as count from %s" % tableName
                countResult = sqlDB.query(countQuery)
                table['instances'] = countResult[0]['count']
        self.assertEqual(8, len(eventTableList))
        uml = UML()
        now = datetime.now()
        nowYMD = now.strftime("%Y-%m-%d")
        title = """ConfIDent  Entities
%s
[[https://projects.tib.eu/en/confident/ © 2019-2020 ConfIDent project]]
see also [[http://ptp.bitplan.com/settings Proceedings Title Parser]]
""" % nowYMD
        plantUml = uml.mergeSchema(schemaManager,
                                   eventTableList,
                                   title=title,
                                   packageName='DataDonations',
                                   generalizeTo="Event")
        print(plantUml)
        self.assertTrue("Event <|-- Event_confref" in plantUml)
        self.assertTrue("class Event " in plantUml)
Example #20
0
    def testUniqueConstraint(self):
        '''
        test for https://github.com/WolfgangFahl/pyLoDStorage/issues/4
        sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info
        '''
        listOfDicts=[
            {"name": "John Doe"},
            {"name": "Frank Doe"}, 
            {"name": "John Doe"}, 
            {"name":"Tim Doe"}]
        sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=sqlDB.createTable(listOfDicts[:10],'Does','name')
        try:
            sqlDB.store(listOfDicts,entityInfo,executeMany=False)
            self.fail("There should be an exception")
        except Exception as ex:
            expected="""INSERT INTO Does (name) values (:name)
failed:UNIQUE constraint failed: Does.name
record  #3={'name': 'John Doe'}"""
            errMsg=str(ex)
            self.assertEqual(expected,errMsg)
Example #21
0
 def getFolder(self,archive,folderPath:str):
     '''
     get the folder for the given archive and folderPath
     
     Args:
         archive: the  archive
         folderPath: the path of the folder
     '''
     sqlDB=SQLDB(self.getCacheFile())
     sqlQuery="SELECT * FROM folder WHERE archiveName=(?) AND path=(?)"
     archiveName=archive.name
     params=(archiveName,folderPath,)
     records=sqlDB.query(sqlQuery, params)
     folder=None
     if len(records)>1:
         msg=f"{len(records)} folders found for {archiveName}:{folderPath} - there should be only one"
         raise Exception(msg)
     elif len(records)==1:
         folder=Folder()
         folder.fromDict(records[0])
     folder.archive=archive
     return folder
Example #22
0
 def testCopy(self):
     '''
     test copying databases into another database
     '''
     dbFile="/tmp/DAWT_Sample3x1000.db"
     copyDB=SQLDB(dbFile)
     for sampleNo in range(3):
         listOfRecords=Sample.getSample(1000)
         self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False)  
         self.sqlDB.copyTo(copyDB)
     size=os.stat(dbFile).st_size
     if self.debug:
         print ("size of copy DB is %d" % size)
     self.assertTrue(size>70000)
     tableList=copyDB.getTableList()
     if self.debug:
         print(tableList)
     for sampleNo in range(3):
         self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name'])
     # check that database is writable
     # https://stackoverflow.com/a/44707371/1497139
     copyDB.execute("pragma user_version=0")
Example #23
0
    def testIssue15(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/15
        
        auto create view ddl in mergeschema
        
        '''
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=self.debug)
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=self.debug)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        viewDDL=Schema.getGeneralViewDDL(tableList,"PersonBase")
        if self.debug:
            print (viewDDL)
        expected="""CREATE VIEW PersonBase AS 
  SELECT name,lastmodified FROM Person
UNION
  SELECT name,lastmodified FROM Family"""
        self.assertEqual(expected,viewDDL)
        pass
 def testWordParser(self):
     '''
     try finding quantiles
     
     see https://stackoverflow.com/questions/2374640/how-do-i-calculate-percentiles-with-python-numpy
     '''
     lookup = Lookup("test Word parser")
     sqlDB = lookup.getSQLDB()
     if sqlDB is not None:
         totalWordUsages = []
         for source in ['wikidata', 'crossref', 'dblp', 'CEUR-WS']:
             listOfDicts = TestWordParser.getProceedingsTitles(
                 sqlDB, source)
             cwp = CorpusWordParser()
             wordusages = cwp.parse(listOfDicts)
             lens = {}
             for wordusage in wordusages:
                 totalWordUsages.append(wordusage.__dict__)
                 if wordusage.eventId in lens:
                     lens[wordusage.eventId] += 1
                 else:
                     lens[wordusage.eventId] = 1
             df = DataFrame(lens.values())
             print(df.quantile(1))
             quantileValues = df.quantile(.90)
             print(quantileValues)
             plot = Plot(lens.values(),
                         "%s wordcount histogram" % source,
                         xlabel="wordcount",
                         ylabel="frequency")
             plot.hist(mode='save')
         wordUsageDBFile = Lookup.getDBFile("wordusage")
         wSQLDB = SQLDB(wordUsageDBFile)
         entityInfo = wSQLDB.createTable(totalWordUsages,
                                         "wordusage",
                                         withDrop=True)
         wSQLDB.store(totalWordUsages, entityInfo)
Example #25
0
    def getWikidataCityPopulation(self,sqlDB,endpoint=None):
        '''
        Args:
            sqlDB(SQLDB): target SQL database
            endpoint(str): url of the wikidata endpoint or None if default should be used
        '''
        dbFile=self.db_path+"/city_wikidata_population.db"
        rawTableName="cityPops"
        # is the wikidata population database available?
        if not os.path.exists(dbFile):
            # shall we created it from a wikidata query?
            if endpoint is not None:
                wikidata=Wikidata()
                wikidata.endpoint=endpoint
                cityList=wikidata.getCityPopulations()
                wikiCitiesDB=SQLDB(dbFile) 
                entityInfo=wikiCitiesDB.createTable(cityList[:300],rawTableName,primaryKey=None,withDrop=True)
                wikiCitiesDB.store(cityList,entityInfo,fixNone=True)
            else:
                # just download a copy 
                print("Downloading %s ... this might take a few seconds" % dbFile)
                dbUrl="http://wiki.bitplan.com/images/confident/city_wikidata_population.db"
                urllib.request.urlretrieve(dbUrl,dbFile)
        # (re) open the database
        wikiCitiesDB=SQLDB(dbFile) 
          
        # check whether the table is populated
        tableList=sqlDB.getTableList()        
        tableName="citiesWithPopulation"     
      
        if self.db_recordCount(tableList, tableName)<10000:
            # check that database is writable
            # https://stackoverflow.com/a/44707371/1497139
            sqlDB.execute("pragma user_version=0")
            # makes sure both tables are in target sqlDB
            wikiCitiesDB.copyTo(sqlDB)
            # create joined table
            sqlQuery="""
              select 
    geoname_id,
    city_name,
    cp.cityLabel,
    country_iso_code,
    country_name,
    subdivision_1_iso_code,
    subdivision_1_name,
    cp.city as wikidataurl,
    cp.cityPop 
  from cities c 
  join cityPops cp 
  on c.geoname_id=cp.geoNameId 
union  
  select 
    geoNameId as geoname_id,
    null as city_name,
    cityLabel,
    countryIsoCode as country_iso_code,
    countryLabel as country_name,
    null as subdivision_1_iso_code,
    null as subdivision_1_name,
    city as wikidataurl,
    cityPop 
  from cityPops 
  where cityPop is not Null
group by geoNameId
order by cityPop desc
            """
            cityList=sqlDB.query(sqlQuery) 
            entityInfo=sqlDB.createTable(cityList,tableName,primaryKey=None,withDrop=True,sampleRecordCount=500)
            sqlDB.store(cityList,entityInfo,fixNone=True)
Example #26
0
    def main(cls, args):
        '''
        command line activation with parsed args
        
        Args:
            args(list): the command line arguments
        '''
        debug = args.debug
        endpoints = EndpointManager.getEndpoints(args.endpointPath)
        qm = QueryManager(lang=args.language,
                          debug=debug,
                          queriesPath=args.queriesPath)
        query = None
        queryCode = args.query
        endpointConf = None
        formats = None
        # preload ValueFormatter
        ValueFormatter.getFormats(args.formatsPath)
        if args.list:
            for name, query in qm.queriesByName.items():
                print(f"{name}:{query.title}")
        elif args.listEndpoints:
            # list endpoints
            for endpoint in endpoints.values():
                if hasattr(endpoint,
                           "lang") and endpoint.lang == args.language:
                    print(endpoint)

        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            query = qm.queriesByName[args.queryName]
            formats = query.formats
            queryCode = query.query
            if debug or args.showQuery:
                if hasattr(query,
                           "description") and query.description is not None:
                    print(query.description)
        if query is None:
            name = "?"
            if queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            query = Query(name="?", query=queryCode, lang=args.language)
        if queryCode:
            if debug or args.showQuery:
                print(f"{args.language}:\n{queryCode}")
            if args.endpointName:
                endpointConf = endpoints.get(args.endpointName)
            if args.language == "sparql":
                method = 'POST'
                if args.endpointName:
                    endPointUrl = endpointConf.endpoint
                    method = endpointConf.method
                    query.tryItUrl = endpointConf.website
                    query.database = endpointConf.database
                else:
                    endPointUrl = query.endpoint
                if args.method:
                    method = method
                sparql = SPARQL(endPointUrl, method=method)
                if args.prefixes and endpointConf is not None:
                    queryCode = f"{endpointConf.prefixes}\n{queryCode}"
                if args.raw:
                    qres = cls.rawQuery(endPointUrl,
                                        query=query.query,
                                        resultFormat=args.format,
                                        mimeType=args.mimeType)
                    print(qres)
                    return
                if "wikidata" in args.endpointName and formats is None:
                    formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(queryCode)
            elif args.language == "sql":
                sqlDB = SQLDB(endpointConf.endpoint)
                qlod = sqlDB.query(queryCode)
            else:
                raise Exception(
                    f"language {args.language} not known/supported")
            if args.format is Format.csv:
                csv = CSV.toCSV(qlod)
                print(csv)
            elif args.format in [
                    Format.latex, Format.github, Format.mediawiki
            ]:
                doc = query.documentQueryResult(qlod,
                                                tablefmt=str(args.format),
                                                floatfmt=".0f")
                docstr = doc.asText()
                print(docstr)
            elif args.format in [Format.json
                                 ] or args.format is None:  # set as default
                # https://stackoverflow.com/a/36142844/1497139
                print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
            elif args.format in [Format.xml]:
                lod2xml = Lod2Xml(qlod)
                xml = lod2xml.asXml()
                print(xml)

            else:
                raise Exception(f"format {args.format} not supported yet")
Example #27
0
    def testEntityInfo(self):
        '''
        test creating entityInfo from the sample record
        '''
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True)
        self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd)
        self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd)
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        tableList=self.sqlDB.getTableList()
        if self.debug:
            print (tableList)
        self.assertEqual(1,len(tableList))
        personTable=tableList[0]
        self.assertEqual("Person",personTable['name'])
        self.assertEqual(7,len(personTable['columns']))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False)
        if self.debug:
            print(plantUml)
        expected="""package Royals {
  class Person << Entity >> {
   age : FLOAT 
   born : DATE 
   lastmodified : TIMESTAMP 
   name : TEXT <<PK>>
   numberInLine : INTEGER 
   ofAge : BOOLEAN 
   wikidataurl : TEXT 
  }
}
"""
        self.assertEqual(expected,plantUml)
        
        # testGeneralization
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        self.assertEqual(2,len(tableList))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False)
        if self.debug:
            print(plantUml)
        expected='''class PersonBase << Entity >> {
 lastmodified : TIMESTAMP 
 name : TEXT <<PK>>
}
class Person << Entity >> {
 age : FLOAT 
 born : DATE 
 numberInLine : INTEGER 
 ofAge : BOOLEAN 
 wikidataurl : TEXT 
}
class Family << Entity >> {
 country : TEXT 
}
PersonBase <|-- Person
PersonBase <|-- Family
'''
        self.assertEqual(expected,plantUml)