def testMarkHarvesterAfterExceptionChange(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (100, 80, 20, 93)) self.assertEqual([100, 80, 20], event_counts(state, 'records_harvested', 'records_uploaded', 'records_deleted')) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() state.markHarvested((42, 31, 11, 135), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([142, 111, 31], event_counts(state, 'records_harvested', 'records_uploaded', 'records_deleted')) self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def testLoadEmptyFile(self): tempfile = join(self.tempdir, 'json.json') open(tempfile, 'w').close() self.assertRaises(JSONDecodeError, lambda: JsonDict.load(tempfile)) self.assertEquals({}, JsonDict.load(tempfile, emptyOnError=True)) self.assertRaises(JSONDecodeError, lambda: JsonList.load(tempfile)) self.assertEquals([], JsonList.load(tempfile, emptyOnError=True))
def testMarkDeletedAfterExceptionChange(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (9999, 9999, 9999, 9999)) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() state.markDeleted() self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def testLoadEmptyFile(self): tempfile = join(self.tempdir, 'json.json') with open(tempfile, 'w') as fp: pass self.assertRaises(json.JSONDecodeError, lambda: JsonDict.load(tempfile)) self.assertEqual({}, JsonDict.load(tempfile, emptyOnError=True)) self.assertRaises(json.JSONDecodeError, lambda: JsonList.load(tempfile)) self.assertEqual([], JsonList.load(tempfile, emptyOnError=True))
def testLoad(self): jd = JsonDict({'hello': 'world'}) tempfile = self.tmp_path / 'json.json' with open(tempfile, 'w') as fp: fp.write(str(jd)) with open(tempfile) as fp: jd2 = JsonDict.load(fp) jd3 = JsonDict.load(str(tempfile)) jd4 = JsonDict.load(tempfile) self.assertEqual(jd, jd2) self.assertEqual(jd, jd3) self.assertEqual(jd, jd4)
def testRehashIfNecessary(self): self.pwd.addUser(username='******', password='******') from argon2 import PasswordHasher myPh = PasswordHasher(parallelism=2, memory_cost=2048) hashed2 = myPh.hash('secret2') data = JsonDict.load(join(self.tempdir, 'passwd')) data['users']['two'] = hashed2 hashed1 = data['users']['one'] data.dump(join(self.tempdir, 'passwd')) self.assertTrue(self.pwd.validateUser('two', 'secret2')) self.assertTrue(self.pwd.validateUser('one', 'secret')) data = JsonDict.load(join(self.tempdir, 'passwd')) self.assertEqual(hashed1, data['users']['one']) self.assertNotEqual(hashed2, data['users']['two']) self.assertTrue(self.pwd.validateUser('two', 'secret2'))
def _readState(self): self._counts = JsonDict.load( self._countFilepath) if self._countFilepath.is_file( ) else JsonDict() if self._resumptionFilepath.is_file(): values = JsonDict.loads(self._resumptionFilepath.read_text()) self.token = values.get('resumptionToken', None) or None self.from_ = values.get('from', '') or None self.lastSuccessfulHarvest = values.get('lastSuccessfulHarvest', '') or None return # The mechanism below will only be carried out once in case the resumption file does not yet exist. if self._statsfilepath.is_file(): self._statsfile = self._statsfilepath.open() logline = None for logline in self._filterNonErrorLogLine(self._statsfile): if not self.token: self.from_ = getStartDate(logline) self.token = getResumptionToken(logline) if logline and self._isDeleted(logline): self.from_ = None self.token = None self._statsfile.close() self._statsfile = None
def _read(self): result = JsonDict.load(self._filename) assert result['version'] == self.version, 'Expected database version %s' % self.version groups = set(self._groups) groups.update(set(result['data']['groups'])) self._groups = list(groups) self._users.update(result['data']['users'])
def getRepositoryIds(self, domainId, repositoryGroupId=None): result = JsonList() allIds = self.getRepositoryGroupIds(domainId) if repositoryGroupId is None else [repositoryGroupId] for repositoryGroupId in allIds: jsonData = JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, repositoryGroupId)))) result.extend(jsonData.get('repositoryIds', [])) return result
def testMarkException(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([1, 0, 1], event_counts(state, 'started', 'errors', 'harvested')) self.assertRepoStats( 'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n' ) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken", 'lastSuccessfulHarvest': '2012-08-13T12:15:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (9999, 9999, 9999, 9999)) self.assertEqual([2, 1, 1], event_counts(state, 'started', 'errors', 'harvested')) self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Error: <class 'ValueError'>: whatever """) self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running')))
def urlJsonDict(self, **kwargs): arguments = dict((k ,v) for k, v in kwargs.items() if v) result = JsonDict.load( self._urlopen("{}/get?{}".format(self._internalurl, urlencode(arguments))) ) if 'error' in result: raise ValueError(result['error']['message']) return result
def urlJsonDict(self, **kwargs): arguments = dict((k, v) for k, v in list(kwargs.items()) if v) result = JsonDict.load( self._urlopen("{}/get?{}".format(self._internalurl, urlencode(arguments)))) if 'error' in result: raise ValueError(result['error']['message']) return result
def testMarkDeleted(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([1, 0], event_counts(state, 'harvested', 'deleted')) self.assertRepoStats( 'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n' ) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken", 'lastSuccessfulHarvest': '2012-08-13T12:15:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markDeleted() self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids. """) self.assertEqual( { "from": "", "resumptionToken": "", 'lastSuccessfulHarvest': None }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def deleteData(self, identifier, datatype): domainDir, filename = self._filename(identifier, datatype) fpath = join(self._dataPath, domainDir, filename) curId = JsonDict.load(fpath)['@id'] fIdPath = join(self._dataIdPath, domainDir, f'{filename}.{curId}') isdir(dirname(fIdPath)) or makedirs(dirname(fIdPath)) rename(fpath, fIdPath)
def load(cls, filePath): state = cls(filePath=filePath) if isfile(filePath): d = JsonDict.load(filePath) state.datetime = d.get('datetime') state.harvestingReady = d.get('harvestingReady', False) state.error = d.get('error') state.resumptionAttributes = d.get('resumptionAttributes') return state
def getRepositories(self, domainId, repositoryGroupId=None): try: repositoryIds = self.getRepositoryIds(domainId=domainId, repositoryGroupId=repositoryGroupId) except IOError: raise ValueError("idDoesNotExist") return JsonList([ JsonDict.load(open(join(self._dataPath, '%s.%s.repository' % (domainId, repositoryId)))) for repositoryId in repositoryIds ])
def getData(self, identifier, datatype, guid=None): domainDir, filename = self._filename(identifier, datatype) fpath = join(self._dataPath, domainDir, filename) if guid is not None: raise NotImplementedError() try: d = JsonDict.load(fpath) except IOError: raise ValueError(filename) return d
def _markRunningState(self, status, message=""): runningDict = JsonDict.load( self._runningFilepath) if self._runningFilepath.is_file() else {} if status != runningDict.get( 'status', None) or message != runningDict.get('message', None): JsonDict({ 'changedate': self.getTime(), 'status': status, 'message': message }).dump(self._runningFilepath)
def _download(self, url, **kwargs): try: configuration = JsonDict.load(urlopen(url, **kwargs)) self._cache.update(configuration) except (HTTPError, URLError, timeout), e: sys.stderr.write("""%s (%s). Tried: %s ----- """ % (e.__class__.__name__, str(e), url)) configuration = self._cache.retrieve() if configuration is None: sys.stderr.write('%s: configuration cachefile "%s" not found!\n' % (self.__class__.__name__, self._cache.filepath)) sys.stderr.flush() raise sys.stderr.write('%s: configuration cachefile "%s" found.\n' % (self.__class__.__name__, self._cache.filepath)) sys.stderr.flush()
def getData(self, identifier, datatype, guid=None): domainDir, filename = self._filename(identifier, datatype) fpath = join(self._dataPath, domainDir, filename) if guid is not None: fpath = join(self._dataIdPath, domainDir, f'{filename}.{guid}') try: d = JsonDict.load(fpath) except IOError: if guid is not None: result = self.getData(identifier, datatype) if result['@id'] == guid: return result raise ValueError(filename) if guid is None and '@id' not in d: self.addData(identifier, datatype, d) return d
def testShutdownPersistsStateOnAutocommit(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, autoCommit=False) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) state = oaiDownloadProcessor.getState() self.assertFalse(isfile(join(self.tempdir, 'harvester.state'))) oaiDownloadProcessor.handleShutdown() self.assertEqual( { "errorState": None, 'from': '2002-06-01T19:20:30Z', "resumptionToken": state.resumptionToken }, JsonDict.load(join(self.tempdir, 'harvester.state')))
def __init__(self): if len(argv[1:]) == 0: argv.append('-h') self.parser = OptionParser() args = self.parse_args() self.__dict__.update(args.__dict__) if not self.domainId: self.parser.error("Specify domain") if self._concurrency < 1: self.parser.error("Concurrency must be at least 1.") config = JsonDict.load(urlopen(self.serverUrl + '/info/config')) if self._logDir is None: self._logDir = config['logPath'] if self._stateDir is None: self._stateDir = config['statePath'] self.proxy = InternalServerProxy(self.serverUrl, self.setActionDone) self.repository = self.repositoryId and self.proxy.getRepositoryObject( identifier=self.repositoryId, domainId=self.domainId)
def testSetToLastCleanState(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "", "2012-08-13T12:14:00") with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-14T12:17:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-14T12:16:00") with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-15T12:19:00Z') state.setToLastCleanState() self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: \nStarted: 2012-08-14 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-14 12:17:00, ResumptionToken: resumptionToken Started: 2012-08-15 12:19:00, Done: Reset to last clean state. ResumptionToken: \n""" ) self.assertEqual( { "from": "2012-08-14T12:16:00", "resumptionToken": "", 'lastSuccessfulHarvest': '2012-08-15T12:19:00Z' }, JsonDict.load(join(self.statePath, 'repo.next')))
def getMapping(self, identifier): try: return JsonDict.load(open(join(self._dataPath, '%s.mapping' % identifier))) except IOError: raise ValueError("idDoesNotExist")
def getRepository(self, identifier, domainId): try: return JsonDict.load(open(join(self._dataPath, '%s.%s.repository' % (domainId, identifier)))) except IOError: raise ValueError("idDoesNotExist")
def testMarkHarvested(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') self.assertEqual([0, 0], event_counts(state, 'started', 'harvested')) state.markStarted() self.assertEqual([1, 0], event_counts(state, 'started', 'harvested')) state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([1, 1], event_counts(state, 'started', 'harvested')) self.assertRepoStats( 'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n' ) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken", 'lastSuccessfulHarvest': '2012-08-13T12:15:00Z' }, JsonDict.load(self.statePath / 'repo.next')) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(self.statePath / 'repo.running')) with self._State('repo') as state: self.assertEqual('2012-08-13T12:14:00', state.from_) self.assertEqual('resumptionToken', state.token) state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') self.assertEqual([1, 1], event_counts(state, 'started', 'harvested')) state.markStarted() self.assertEqual([2, 1], event_counts(state, 'started', 'harvested')) state.markHarvested((9999, 9999, 9999, 9999), "newToken", "2012-08-13T12:16:00Z") self.assertEqual([2, 2], event_counts(state, 'started', 'harvested')) self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:17:00, ResumptionToken: newToken """) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "newToken", 'lastSuccessfulHarvest': '2012-08-13T12:17:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: self.assertEqual('2012-08-13T12:14:00', state.from_) self.assertEqual('newToken', state.token) state.getZTime = lambda: ZuluTime('2012-08-13T12:20:00Z') state.verbose = True state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), token=None, responseDate="2012-08-13T12:19:00Z") self.assertEqual([3, 3], event_counts(state, 'started', 'harvested')) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "", 'lastSuccessfulHarvest': '2012-08-13T12:20:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def getRepositoryGroupId(self, domainId, repositoryId): return JsonDict.load(open(join(self._dataPath, '%s.%s.repository' % (domainId, repositoryId))))['repositoryGroupId']
def _load(self): if not isfile(self._filePath): return {} return JsonDict.load(self._filePath)
def getRepositoryGroup(self, identifier, domainId): return JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, identifier))))
def testLoadFromFilename(self): jd = JsonDict({'hello': 'world'}) tempfile = join(self.tempdir, 'json.json') open(tempfile, 'w').write(str(jd)) jd2 = JsonDict.load(tempfile) self.assertEquals(jd, jd2)
def getUserInfo(self, username): result = JsonDict.load(self._filepath)['users'].get(username, {}) result['username'] = username return result
def getDomain(self, identifier): domainFile = join(self._dataPath, '{0}.domain'.format(identifier)) try: return JsonDict.load(open(domainFile)) except IOError: raise ValueError('idDoesNotExist')
def setUserInfo(self, username, data): _cur = JsonDict.load(self._filepath) _cur['users'][username] = data _cur.dump(self._filepath)
def __init__(self, stateDir, identifier): self._filepath = join(stateDir, identifier + '.group') self.exists = isfile(self._filepath) self._data = JsonDict(identifier=identifier) if self.exists: self._data = JsonDict.load(self._filepath)
def listObjects(self): return JsonDict.load(self._registryFile)
def loadDefinitions(path): definitions = JsonDict.load(path) if path else JsonDict() definitions['repository_fields'] = list( map(_fieldcheck, definitions.get('repository_fields', []))) return definitions
def _read(self): result = JsonDict.load(self._filename) assert result['version'] == self.version, 'Expected database version %s' % self.version self._info = result['users']
def getRepositoryGroupIds(self, domainId): return JsonDict.load(open(join(self._dataPath, '%s.domain' % domainId))).get('repositoryGroupIds',[])
def _read(self): result = JsonDict.load(self._filename) assert result[ 'version'] == self.version, 'Expected database version %s' % self.version self._info = result['users']