Beispiel #1
0
 def testEntityManager(self):
     '''
     test the entity Manager handling
     '''
     self.debug = True
     for i, royals in enumerate(
         [Sample.getRoyals(),
          Sample.getRoyalsInstances()]):
         if self.debug:
             print(f"{i+1}:{royals}")
         sparqlConfig = StorageConfig.getSPARQL(
             "http://example.bitplan.com",
             "http://localhost:3030/example",
             host="localhost")
         # TODO use sparql Config
         for config in [
                 StorageConfig.getDefault(debug=self.debug),
                 StorageConfig.getJSON(debug=self.debug),
                 StorageConfig.getJsonPickle(self.debug)
         ]:
             self.configure(config)
             name = "royal" if i == 0 else "royalorm"
             clazz = None if i == 0 else Royal
             em = EntityManager(name=name,
                                entityName="Royal",
                                entityPluralName="Royals",
                                clazz=clazz,
                                listName="royals",
                                config=config)
             em.royals = royals
             if i == 0:
                 cacheFile = em.storeLoD(royals)
             else:
                 cacheFile = em.store()
             if cacheFile is not None:
                 self.assertTrue(os.path.isfile(cacheFile))
             royalsLod = em.fromStore()
             self.assertTrue(isinstance(royalsLod, list))
             hint = f"{i}({config.mode}):{name}"
             for item in royalsLod:
                 self.assertTrue(isinstance(item, dict),
                                 f"{hint}:expecting dict")
             royalsList = em.getList()
             self.assertEqual(len(royals), len(royalsList))
             for j, item in enumerate(royalsList):
                 hint = f"{hint}/{j}"
                 royal = royals[j]
                 # TODO check type handling e.g. "born"
                 self.checkItem(
                     royal, item,
                     ["name", "age", "numberInLine", "wikidataurl"], hint)
         pass
Beispiel #2
0
 def testSqllite3Speed(self):
     '''
     test sqlite3 speed with some 100000 artificial sample records
     consisting of two columns with a running index
     '''
     limit=100000
     listOfRecords=Sample.getSample(limit)
     self.checkListOfRecords(listOfRecords, 'Sample', 'pKey')  
Beispiel #3
0
 def testListOfCities(self):
     '''
     test sqlite3 with some 120000 city records
     '''
     listOfRecords=Sample.getCities()
     for fixDates in [True,False]:
         retrievedList=self.checkListOfRecords(listOfRecords,'City',fixDates=fixDates)
         self.assertEqual(len(listOfRecords),len(retrievedList))
Beispiel #4
0
 def testSqlite3(self):
     '''
     test sqlite3 with a few records from the royal family
     '''
     listOfRecords=Sample.getRoyals()
     resultList=self.checkListOfRecords(listOfRecords, 'Person', 'name',debug=True)
     if self.debug:
         print(resultList)
     self.assertEqual(listOfRecords,resultList)
Beispiel #5
0
 def testDob(self):
     '''
     test the DOB (date of birth) function that converts from ISO-Date to
     datetime.date
     '''
     dt = Sample.dob("1926-04-21")
     self.assertEqual(1926, dt.year)
     self.assertEqual(4, dt.month)
     self.assertEqual(21, dt.day)
Beispiel #6
0
    def testCities(self):
        '''
        test a list of cities
        '''
        cityList=Sample.getCities()
        self.assertEqual(128769,(len(cityList)))
        cityIter=iter(cityList)
        #limit=len(cityList)
        limit=1000
        if getpass.getuser()=="travis":
            limit=4000
        for i in range(limit):
            city=next(cityIter)
            city['dgraph.type']='City'
            lat=float(city['lat'])
            lng=float(city['lng'])
            city['location']={'type': 'Point', 'coordinates': [lng,lat] }
            #print("%d: %s" % (i,city))
        dgraph=self.getDGraph()
        dgraph.drop_all()
        schema='''
name: string @index(exact) .
country: string .   
lat: float .
lng: float .
location: geo .
type City {
   name
   lat
   lng
   location
   country
}'''
        dgraph.addSchema(schema)
        startTime=time.time()
        dgraph.addData(obj=cityList,limit=limit,batchSize=250)
        query='''{ 
  # get cities
  cities(func: has(name)) {
        country
        name
        lat
        lng
        location
  }
}
        '''
        elapsed=time.time()-startTime
        print ("dgraph:adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed))
        startTime=time.time()
        queryResult=dgraph.query(query)
        elapsed=time.time()-startTime
        print ("dgraph:query of %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed))
        self.assertTrue('cities' in queryResult)
        qCityList=queryResult['cities']
        self.assertEqual(limit,len(qCityList))
        dgraph.close()
 def testIssue25(self):
     '''
     see https://github.com/WolfgangFahl/pyLoDStorage/issues/25
     '''
     listOfRecords = Sample.getRoyals()
     df= pd.DataFrame(listOfRecords)
     self.assertEqual(len(df), len(listOfRecords))
     self.assertEqual(len(df.columns.values), len(listOfRecords[0].keys()))
     averageAge= df['age'].mean()
     self.assertIsNotNone(averageAge)
     self.assertGreater(averageAge,53)
Beispiel #8
0
 def getSampleTableDB(withDrop=False,debug=False,failIfTooFew=False,sampleSize=1000):
     listOfRecords=Sample.getSample(sampleSize)
     sqlDB=SQLDB()
     entityName="sample"
     primaryKey='pKey'
     sampleRecordCount=sampleSize*10
     sqlDB.debug=debug
     entityInfo=sqlDB.createTable(listOfRecords, entityName, primaryKey=primaryKey, withDrop=withDrop, sampleRecordCount=sampleRecordCount,failIfTooFew=failIfTooFew)
     executeMany=True
     fixNone=True
     sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany,fixNone=fixNone)
     return sqlDB
Beispiel #9
0
    def testListOfDictInsert(self):
        '''
        test inserting a list of Dicts and retrieving the values again
        using a person based example
        instead of
        https://en.wikipedia.org/wiki/FOAF_(ontology)
        
        we use an object oriented derivate of FOAF with a focus on datatypes
        '''
        listofDicts = Sample.getRoyals()
        typedLiteralModes = [True, False]
        entityType = 'foafo:Person'
        primaryKey = 'name'
        prefixes = 'PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>'
        for typedLiteralMode in typedLiteralModes:
            jena = self.getJena(mode='update',
                                typedLiterals=typedLiteralMode,
                                debug=self.debug)
            deleteString = """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
            DELETE WHERE {  
              ?person a 'foafo:Person'.
              ?person ?p ?o. 
            }
            """
            jena.query(deleteString)
            errors = jena.insertListOfDicts(listofDicts, entityType,
                                            primaryKey, prefixes)
            self.checkErrors(errors)

            jena = self.getJena(mode="query", debug=self.debug)
            queryString = """
            PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
            SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE { 
                ?person a 'foafo:Person'.
                ?person foafo:Person_name ?name.
                ?person foafo:Person_born ?born.
                ?person foafo:Person_numberInLine ?numberInLine.
                ?person foafo:Person_wikidataurl ?wikidataurl.
                ?person foafo:Person_age ?age.
                ?person foafo:Person_ofAge ?ofAge.
                ?person foafo:Person_lastmodified ?lastmodified. 
            }"""
            personResults = jena.query(queryString)
            self.assertEqual(len(listofDicts), len(personResults))
            personList = jena.asListOfDicts(personResults)
            for index, person in enumerate(personList):
                if self.debug:
                    print("%d: %s" % (index, person))
            # check the correct round-trip behavior
            self.assertEqual(listofDicts, personList)
Beispiel #10
0
 def testRoyals(self):
     '''
     test conversion of royals
     '''
     return
     # TODO - fix me
     inlod = Sample.getRoyals()
     csv = CSV.toCSV(inlod)
     if self.debug:
         print(csv)
     # https://stackoverflow.com/questions/3717785/what-is-a-convenient-way-to-store-and-retrieve-boolean-values-in-a-csv-file
     outlod = CSV.fromCSV(csv)
     if self.debug:
         print(outlod)
Beispiel #11
0
 def testBackup(self):
     '''
     test creating a backup of the SQL database
     '''
     if sys.version_info >= (3, 7):
         listOfRecords=Sample.getCities()
         self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False)
         backupDB="/tmp/testSqlite.db"
         showProgress=200 if self.debug else 0
         self.sqlDB.backup(backupDB,profile=self.debug,showProgress=showProgress)
         size=os.stat(backupDB).st_size
         if self.debug:
             print ("size of backup DB is %d" % size)
         self.assertTrue(size>600000)
         self.sqlDB.close()
         # restore
         ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=self.debug,showProgress=showProgress)
         entityInfo=EntityInfo(listOfRecords[:50],'City',debug=self.debug)
         allCities=ramDB.queryAll(entityInfo)
         self.assertEqual(len(allCities),len(listOfRecords))
Beispiel #12
0
 def testCopy(self):
     '''
     test copying databases into another database
     '''
     dbFile="/tmp/DAWT_Sample3x1000.db"
     copyDB=SQLDB(dbFile)
     for sampleNo in range(3):
         listOfRecords=Sample.getSample(1000)
         self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False)  
         self.sqlDB.copyTo(copyDB)
     size=os.stat(dbFile).st_size
     if self.debug:
         print ("size of copy DB is %d" % size)
     self.assertTrue(size>70000)
     tableList=copyDB.getTableList()
     if self.debug:
         print(tableList)
     for sampleNo in range(3):
         self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name'])
     # check that database is writable
     # https://stackoverflow.com/a/44707371/1497139
     copyDB.execute("pragma user_version=0")
Beispiel #13
0
 def testListOfDictSpeed(self):
     '''
     test the speed of adding data
     '''
     limit = 5000
     for batchSize in [None, 1000]:
         listOfDicts = Sample.getSample(limit)
         jena = self.getJena(mode='update', profile=self.profile)
         entityType = "ex:TestRecord"
         primaryKey = 'pkey'
         prefixes = 'PREFIX ex: <http://example.com/>'
         startTime = time.time()
         errors = jena.insertListOfDicts(listOfDicts,
                                         entityType,
                                         primaryKey,
                                         prefixes,
                                         batchSize=batchSize)
         self.checkErrors(errors)
         elapsed = time.time() - startTime
         if self.profile:
             print("adding %d records took %5.3f s => %5.f records/s" %
                   (limit, elapsed, limit / elapsed))
Beispiel #14
0
    def testIssue15(self):
        '''
        https://github.com/WolfgangFahl/pyLoDStorage/issues/15
        
        auto create view ddl in mergeschema
        
        '''
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=self.debug)
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=self.debug)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        viewDDL=Schema.getGeneralViewDDL(tableList,"PersonBase")
        if self.debug:
            print (viewDDL)
        expected="""CREATE VIEW PersonBase AS 
  SELECT name,lastmodified FROM Person
UNION
  SELECT name,lastmodified FROM Family"""
        self.assertEqual(expected,viewDDL)
        pass
 def testIssue24_IntegrateTabulate(self):
     '''
     https://github.com/WolfgangFahl/pyLoDStorage/issues/24
     
     test https://pypi.org/project/tabulate/ support
     '''
     show=self.debug
     #show=True
     royals=Royals(load=True)
     for fmt in ["latex","grid","mediawiki","github"]:
         table=tabulate(royals.royals,headers="keys",tablefmt=fmt)
         if show:
             print (table)
 
     cities=Sample.getCities()    
     counter=Counter()
     for city in cities:
         counter[city["country"]]+=1;
     tabulateCounter=TabulateCounter(counter)
     for fmt in ["latex","grid","mediawiki","github"]:
         table=tabulateCounter.mostCommonTable(tablefmt=fmt,limit=7)
         if show:
             print(table)
     pass
Beispiel #16
0
    def testEntityInfo(self):
        '''
        test creating entityInfo from the sample record
        '''
        listOfRecords=Sample.getRoyals()
        entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True)
        self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd)
        self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd)
        self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
        tableList=self.sqlDB.getTableList()
        if self.debug:
            print (tableList)
        self.assertEqual(1,len(tableList))
        personTable=tableList[0]
        self.assertEqual("Person",personTable['name'])
        self.assertEqual(7,len(personTable['columns']))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False)
        if self.debug:
            print(plantUml)
        expected="""package Royals {
  class Person << Entity >> {
   age : FLOAT 
   born : DATE 
   lastmodified : TIMESTAMP 
   name : TEXT <<PK>>
   numberInLine : INTEGER 
   ofAge : BOOLEAN 
   wikidataurl : TEXT 
  }
}
"""
        self.assertEqual(expected,plantUml)
        
        # testGeneralization
        listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
        entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
        tableList=self.sqlDB.getTableList()
        self.assertEqual(2,len(tableList))
        uml=UML()
        plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False)
        if self.debug:
            print(plantUml)
        expected='''class PersonBase << Entity >> {
 lastmodified : TIMESTAMP 
 name : TEXT <<PK>>
}
class Person << Entity >> {
 age : FLOAT 
 born : DATE 
 numberInLine : INTEGER 
 ofAge : BOOLEAN 
 wikidataurl : TEXT 
}
class Family << Entity >> {
 country : TEXT 
}
PersonBase <|-- Person
PersonBase <|-- Family
'''
        self.assertEqual(expected,plantUml)
Beispiel #17
0
    def testCountries(self):
        ''' 
        test handling countries
        '''
        countryList=Sample.getCountries()
        #print(countryList)    
        dgraph=self.getDGraph()
        dgraph.drop_all()
        schema='''
name: string @index(exact) .
code: string @index(exact) .     
capital: string .   
location: geo .
type Country {
   code
   name
   location
   capital
}'''
        dgraph.addSchema(schema)
        for country in countryList:
            # rename dictionary keys
            #country['name']=country.pop('Name')
            country['code']=country.pop('country_code')
            country['dgraph.type']='Country'
            lat,lng=country.pop('latlng')
            country['location']={'type': 'Point', 'coordinates': [lng,lat] }
            print(country) 
        dgraph.addData(countryList)
        query='''{
# list of countries
  countries(func: has(code)) {
    uid
    name
    code
    capital
    location
  }
}'''
        queryResult=dgraph.query(query) 
        self.assertTrue("countries" in queryResult)
        countries=queryResult["countries"]
        self.assertEqual(247,len(countries))
        schemaResult=dgraph.query("schema{}")
        print(schemaResult)
        self.assertTrue("schema" in schemaResult)
        schema=schemaResult["schema"]
        self.assertTrue(len(schema)>=7)
        # see https://discuss.dgraph.io/t/running-upsert-in-python/9364
        """mutation='''
        upsert {  
  query {
    # get the uids of all Country nodes
     countries as var (func: has(<dgraph.type>)) @filter(eq(<dgraph.type>, "Country")) {
        uid
    }
  }
  mutation {
    delete {
      uid(countries) * * .
    }
  }
}'''
        dgraph.mutate(mutation)"""
        dgraph.close