Ejemplo n.º 1
0
 def getStorageConfig(debug:bool=False,mode='sql')->StorageConfig:
     '''
     get the storageConfiguration
     
     Args:
         debug(bool): if True show debug information
         mode(str): sql or json
     
     Return:
         StorageConfig: the storage configuration to be used
     '''
     if mode=='sql':
         config=StorageConfig.getSQL(debug=debug)
     elif mode=='json':
         config=StorageConfig.getJSON()
     elif mode=='jsonpickle':
         config=StorageConfig.getJsonPickle(debug=debug)
     else:
         raise Exception(f"invalid mode {mode}")
     config.cacheDirName="dms"
     cachedir=config.getCachePath() 
     config.profile=DMSStorage.profile
     config.withShowProgress=DMSStorage.withShowProgress
     if mode=='sql':
         config.cacheFile=f"{cachedir}/dms.db"
     return config
 def testIssue_59_db_download(self):
     '''
     tests if the cache database is downloaded if not present
     '''
     with tempfile.TemporaryDirectory() as tmpdir:
         config=StorageConfig(cacheFile="locations.db", cacheRootDir=tmpdir)
         config.cacheFile = f"{config.getCachePath()}/{config.cacheFile}"
         loc=LocationContext.fromCache(config=config)
         locations=loc.locateLocation("Germany")
         self.assertTrue(len(locations)>0)
Ejemplo n.º 3
0
 def testEntityManager(self):
     '''
     test the entity Manager handling
     '''
     self.debug = True
     for i, royals in enumerate(
         [Sample.getRoyals(),
          Sample.getRoyalsInstances()]):
         if self.debug:
             print(f"{i+1}:{royals}")
         sparqlConfig = StorageConfig.getSPARQL(
             "http://example.bitplan.com",
             "http://localhost:3030/example",
             host="localhost")
         # TODO use sparql Config
         for config in [
                 StorageConfig.getDefault(debug=self.debug),
                 StorageConfig.getJSON(debug=self.debug),
                 StorageConfig.getJsonPickle(self.debug)
         ]:
             self.configure(config)
             name = "royal" if i == 0 else "royalorm"
             clazz = None if i == 0 else Royal
             em = EntityManager(name=name,
                                entityName="Royal",
                                entityPluralName="Royals",
                                clazz=clazz,
                                listName="royals",
                                config=config)
             em.royals = royals
             if i == 0:
                 cacheFile = em.storeLoD(royals)
             else:
                 cacheFile = em.store()
             if cacheFile is not None:
                 self.assertTrue(os.path.isfile(cacheFile))
             royalsLod = em.fromStore()
             self.assertTrue(isinstance(royalsLod, list))
             hint = f"{i}({config.mode}):{name}"
             for item in royalsLod:
                 self.assertTrue(isinstance(item, dict),
                                 f"{hint}:expecting dict")
             royalsList = em.getList()
             self.assertEqual(len(royals), len(royalsList))
             for j, item in enumerate(royalsList):
                 hint = f"{hint}/{j}"
                 royal = royals[j]
                 # TODO check type handling e.g. "born"
                 self.checkItem(
                     royal, item,
                     ["name", "age", "numberInLine", "wikidataurl"], hint)
         pass
 def testHasData(self):
     '''
     check has data and populate functionality
     '''
     testDownload=False
     if self.inCI() or testDownload:
         with tempfile.TemporaryDirectory() as cacheRootDir:
             config=StorageConfig(cacheRootDir=cacheRootDir, cacheDirName='geograpy3')
             config.cacheFile = f"{config.getCachePath()}/{LocationContext.db_filename}"
             loc=Locator(storageConfig=config)
             if os.path.isfile(loc.db_file):
                 os.remove(loc.db_file)
             # reinit sqlDB
             loc=Locator(storageConfig=config)
             self.assertFalse(loc.db_has_data())
             loc.populate_db()
             self.assertTrue(loc.db_has_data())
Ejemplo n.º 5
0
 def testStoreMode(self):
     '''
     test store mode display
     '''
     config = StorageConfig.getDefault()
     self.configure(config)
     em = EntityManager("tst", "Test", "Tests", config=config)
     if self.debug:
         print(em.storeMode().name)
     self.assertEqual(StoreMode.SQL, em.storeMode())
Ejemplo n.º 6
0
 def testWikidataCities(self):
     '''
     test getting city information from wikidata
     
     '''
     # Wikidata time outs in CI environment need to be avoided
     if getpass.getuser() != "wf":
         return
     config = StorageConfig.getSQL(debug=self.debug)
     config.cacheRootDir = "/tmp/wdhs"
     cachedir = config.getCachePath()
     config.cacheFile = f"{cachedir}/hs.db"
     # use 2018 wikidata copy
     # wikidata.endpoint="http://blazegraph.bitplan.com/sparql"
     # use 2020 wikidata copy
     wikidata = Wikidata()
     wikidata.endpoint = "https://confident.dbis.rwth-aachen.de/jena/wdhs/sparql"
     #wikidata.endpoint="http://jena.bitplan.com/wdhs/sparql"
     regions = [{
         "name": "Singapore",
         "country": "Q334",
         "region": None,
         "cities": 46
     }, {
         "name": "Beijing",
         "country": None,
         "region": "Q956",
         "cities": 25
     }, {
         "name": "Paris",
         "country": None,
         "region": "Q13917",
         "cities": 1242
     }, {
         "name": "Barcelona",
         "country": None,
         "region": "Q5705",
         "cities": 1242
     }, {
         "name": "Rome",
         "country": None,
         "region": "Q1282",
         "cities": 1242
     }]
     limit = 1000000  #if self.inCI() else 100
     cityList = wikidata.getCities(limit=limit)
     sqlDB = SQLDB(config.cacheFile)
     entityInfo = sqlDB.createTable(cityList, "hs", withDrop=True)
     sqlDB.store(cityList, entityInfo, fixNone=True)
     expected = 200000  # if self.inCI() else limit
     self.assertTrue(len(cityList) >= expected)
Ejemplo n.º 7
0
 def __init__(self, name, url=None, title=None, config=None):
     '''
     Constructor
     
     Args:
         name(string): the name of this event manager e.g. "confref"
         url(string): the url of the event source  e.g. "http://portal.confref.org/"
         title(string): title of the event source e.g. "confref.org"
     '''
     if config is None:
         config = StorageConfig.getDefault()
     config.tableName = "Event_%s" % name
     super().__init__(name,
                      entityName="Event",
                      entityPluralName="Events",
                      config=config)
     self.url = url
     self.title = title
     self.events = {}
     self.eventsByAcronym = {}
     self.eventsByCheckedAcronym = {}
    def testOpenResearchCaching(self):
        ''' test caching of open research results '''
        config = StorageConfig.getDefault(self.debug)
        opr = OpenResearch(config=config)
        # only cache if not cached yet
        if not opr.em.isCached():
            opr.cacheEvents(opr.em, limit=20000, batch=2000)
            minexpected = 8500
            self.assertTrue(len(opr.em.events) >= minexpected)
            opr.em.store()
        else:
            opr.em.fromStore()
        self.assertTrue(opr.em.isCached())

        opr2 = OpenResearch()
        opr2.em.fromStore()
        self.assertEqual(len(opr2.em.events), len(opr.em.events))
        events = opr2.em.lookup("ZEUS 2018")
        self.assertEqual(1, len(events))
        zeus2018 = events[0]
        self.assertTrue(zeus2018 is not None)
        print(zeus2018.asJson())
Ejemplo n.º 9
0
 def getConfig(tmpdir: str):
     config = StorageConfig(cacheFile="locations.db",
                            cacheDirName="geograpyTest",
                            cacheRootDir=tmpdir)
     config.cacheFile = f"{config.getCachePath()}/{config.cacheFile}"
     return config
Ejemplo n.º 10
0
 def configure(self, config: StorageConfig):
     config.cacheDirName = "lodstorage-test"
Ejemplo n.º 11
0
import datetime
import os

from lodstorage.entity import EntityManager
from lodstorage.jsonable import JSONAble
from lodstorage.storageconfig import StorageConfig

CEURWS_URL = "http://ceur-ws.org"
CACHE_FILE = os.path.dirname(__file__) + "/resources/ceurws.db"
CONFIG = StorageConfig(cacheFile=CACHE_FILE)


class Volume(JSONAble):
    """
    Represents a volume in ceur-ws
    """
    def getSamples(self):
        samples = [{
            "number": 2436,
            "url": "http://ceur-ws.org/Vol-2436/",
            "title":
            "Evaluation and Experimental Design in Data Mining and Machine Learning",
            "fullTitle":
            "1st Workshop on Evaluation and Experimental Design in Data Mining and Machine Learning",
            "acronym": "EDML 2019",
            "lang": "en",
            "location": "Calgary, Alberta, Canada",
            "country": "Canada",
            "region": "Alberta",
            "city": "Calgary",
            "ordinal": 1,