def _httpConfigAndServices(self, apiVersion, arguments, serviceIdentifier=None, prettyPrint=False, **ignored): result = {} additionalConfigDict = result fullServiceInfo = arguments.get('allServiceInfo', ['False'])[0] == 'True' useVpn = arguments.get('useVpn', ['False'])[0] == 'True' retrieveAll = arguments.get('__all__', ['False'])[0] == 'True' keys = self._allKeys() if retrieveAll else self._keysFromArgs(arguments) for key in _requestedKeys(keys): try: if key == 'services': additionalConfigDict[key] = self.call.listServices(activeOnly=not fullServiceInfo, includeState=fullServiceInfo, convertIpsToVpn=useVpn) elif key == 'config': additionalConfigDict[key] = self.call.getConfig() else: additionalConfigDict[key] = self.call[key].getConfiguration(allConfiguration=retrieveAll) except NoneOfTheObserversRespond: result.setdefault('errors', []).append("Key '%s' not found." % key) if serviceIdentifier: this_service = self.call.getService(identifier=serviceIdentifier) if this_service is not None: result['this_service'] = this_service result['this_service']['state'] = self.call.getPrivateStateFor(identifier=serviceIdentifier) result = JsonDict(api_version=apiVersion, domain=self.call.getDomain(), **result) if self._softwareVersion is not None: result['software_version'] = self._softwareVersion yield okJson yield result.pretty_print() if prettyPrint else str(result)
def testMarkHarvesterAfterExceptionChange(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (100, 80, 20, 93)) self.assertEqual([100, 80, 20], event_counts(state, 'records_harvested', 'records_uploaded', 'records_deleted')) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() state.markHarvested((42, 31, 11, 135), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([142, 111, 31], event_counts(state, 'records_harvested', 'records_uploaded', 'records_deleted')) self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def testLoadEmptyFile(self): tempfile = join(self.tempdir, 'json.json') open(tempfile, 'w').close() self.assertRaises(JSONDecodeError, lambda: JsonDict.load(tempfile)) self.assertEquals({}, JsonDict.load(tempfile, emptyOnError=True)) self.assertRaises(JSONDecodeError, lambda: JsonList.load(tempfile)) self.assertEquals([], JsonList.load(tempfile, emptyOnError=True))
def testMarkDeletedAfterExceptionChange(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (9999, 9999, 9999, 9999)) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() state.markDeleted() self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def jsonResponse(self, **kwargs): t0 = self._timeNow() result = yield self.any.executeQuery(**kwargs) queryTime = self._timeNow() - t0 total, hits = result.total, result.hits jsonResponse = JsonDict({'total': total}) if hits: if hasattr(result, 'items'): jsonResponse['items'] = result.items else: jsonResponse['items'] = [] for hit in hits: jsonResponse['items'].append((yield self.any.retrieveData( identifier=hit.id, name=self._defaultRecordSchema))) drilldownData = getattr(result, 'drilldownData', None) if drilldownData is not None: jsonFacets = jsonResponse.setdefault('facets', {}) for facet in drilldownData: jsonFacets.setdefault(facet['fieldname'], facet["terms"]) searchTime = self._timeNow() - t0 jsonResponse['querytimes'] = { 'handlingTime': self._querytime(searchTime), 'queryTime': self._querytime(queryTime), } if result.queryTime: jsonResponse["querytimes"]["indexTime"] = self._querytime( result.queryTime / 1000.0) raise StopIteration(jsonResponse)
def handleGet(self, arguments, **kwargs): yield okJson verb = arguments.get('verb', [None])[0] messageKwargs = dict((k, values[0]) for k, values in list(arguments.items()) if k != 'verb') request = dict(**messageKwargs) message = None if verb is not None: message = verb[0].lower() + verb[1:] request['verb'] = verb response = JsonDict(request=request) try: if message is None: raise ValueError('badVerb') if not message.startswith('get'): raise ValueError('badVerb') response['response'] = { verb: self.call.unknown(message=message, **messageKwargs) } except NoneOfTheObserversRespond: response['error'] = error('badVerb') except Exception as e: response['error'] = error(str(e), repr(e)) yield response.dumps()
def _configure(self): configuration = JsonDict({ "path": { "data": ensureDir(self.stateDir, 'data'), "logs": ensureDir(self.stateDir, 'logs'), "work": ensureDir(self.stateDir, 'work'), # temporary files "conf": self.configDir, "plugins": ensureDir(self.stateDir, 'plugins'), }, "cluster":{ "name": self.name, }, "http":{ "port": self.port, }, "transport": { "tcp": { "port": self.transportPort } } }) self._configureIndex(configuration) if self.identifier: configuration.setdefault("node", dict())['name'] = self.identifier with open(self.configFile, 'w') as f: configuration.dump(f, indent=4, sort_keys=True)
def _readState(self): self._counts = JsonDict.load( self._countFilepath) if self._countFilepath.is_file( ) else JsonDict() if self._resumptionFilepath.is_file(): values = JsonDict.loads(self._resumptionFilepath.read_text()) self.token = values.get('resumptionToken', None) or None self.from_ = values.get('from', '') or None self.lastSuccessfulHarvest = values.get('lastSuccessfulHarvest', '') or None return # The mechanism below will only be carried out once in case the resumption file does not yet exist. if self._statsfilepath.is_file(): self._statsfile = self._statsfilepath.open() logline = None for logline in self._filterNonErrorLogLine(self._statsfile): if not self.token: self.from_ = getStartDate(logline) self.token = getResumptionToken(logline) if logline and self._isDeleted(logline): self.from_ = None self.token = None self._statsfile.close() self._statsfile = None
def testAddDomain(self): header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=False)), path="/actions/addDomain", Body=bytes(urlencode(dict(identifier="aap")), encoding="utf-8"), Method='Post'))) self.assertEqual(0, len(self.observable.calledMethods)) self.assertEqual("200", header['StatusCode']) self.assertEqual("application/json", header['Headers']['Content-Type']) response = JsonDict.loads(body) self.assertFalse(response['success']) self.assertEqual("Not allowed", response['message']) header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=True)), path="/actions/addDomain", Body=bytes(urlencode(dict(identifier="aap")), encoding="utf-8"), Method='Post'))) self.assertEqual("200", header['StatusCode']) self.assertEqual("application/json", header['Headers']['Content-Type']) response = JsonDict.loads(body) self.assertTrue(response['success']) self.assertEqual(1, len(self.observable.calledMethods)) self.assertEqual("addDomain", self.observable.calledMethods[0].name) self.assertEqual(dict(identifier='aap'), self.observable.calledMethods[0].kwargs)
def dna(reactor, port, dataPath, logPath, statePath, harvesterStatusUrl, **ignored): harvesterData = HarvesterData(dataPath) repositoryStatus = be((RepositoryStatus(logPath, statePath), (harvesterData,) )) configDict = JsonDict( logPath=logPath, statePath=statePath, harvesterStatusUrl=harvesterStatusUrl, dataPath=dataPath, ) return \ (Observable(), (ObservableHttpServer(reactor, port), (ApacheLogger(stdout), (PathFilter("/info/version"), (StringServer(VERSION_STRING, ContentTypePlainText), ) ), (PathFilter("/info/config"), (StringServer(configDict.dumps(), ContentTypeJson), ) ), (PathFilter("/static"), (PathRename(lambda name: name[len('/static/'):]), (FileServer(seecrWebLibPath),) ) ), (PathFilter('/', excluding=['/info/version', '/info/config', '/static', '/action', '/get']), (DynamicHtml( [dynamicHtmlPath], reactor=reactor, additionalGlobals = { 'time': time, 'harvesterStatusUrl': harvesterStatusUrl, 'escapeXml': escapeXml, 'compose': compose, }, indexPage="/index.html", ), (harvesterData,), (repositoryStatus,), ) ), (PathFilter('/action'), (HarvesterDataActions(), (harvesterData,) ), ), (PathFilter('/get'), (HarvesterDataRetrieve(), (harvesterData,), (repositoryStatus,), ) ) ) ) )
def _markRunningState(self, status, message=""): runningDict = JsonDict.load( self._runningFilepath) if self._runningFilepath.is_file() else {} if status != runningDict.get( 'status', None) or message != runningDict.get('message', None): JsonDict({ 'changedate': self.getTime(), 'status': status, 'message': message }).dump(self._runningFilepath)
def testLoadEmptyFile(self): tempfile = join(self.tempdir, 'json.json') with open(tempfile, 'w') as fp: pass self.assertRaises(json.JSONDecodeError, lambda: JsonDict.load(tempfile)) self.assertEqual({}, JsonDict.load(tempfile, emptyOnError=True)) self.assertRaises(json.JSONDecodeError, lambda: JsonList.load(tempfile)) self.assertEqual([], JsonList.load(tempfile, emptyOnError=True))
def testLoad(self): jd = JsonDict({'hello': 'world'}) tempfile = self.tmp_path / 'json.json' with open(tempfile, 'w') as fp: fp.write(str(jd)) with open(tempfile) as fp: jd2 = JsonDict.load(fp) jd3 = JsonDict.load(str(tempfile)) jd4 = JsonDict.load(tempfile) self.assertEqual(jd, jd2) self.assertEqual(jd, jd3) self.assertEqual(jd, jd4)
def asPostDict(self): drilldownFields = [] for fieldname, options in self.fieldRegistry.drilldownFieldNames.items(): drilldownFields.append({ "dim": fieldname, "hierarchical": options["hierarchical"], "multiValued": options["multiValued"], "fieldname": options["indexFieldName"] }) result = JsonDict(drilldownFields=drilldownFields) result.update((k[1:], v) for k, v in self.__dict__.iteritems() if k[1:] in SETTING_NAMES) return result
def validate(self): for core in self.cores: if core == self.resultsFrom: continue try: self._matchCoreSpecs(self.resultsFrom, core) except KeyError: raise ValueError("No match set for cores %s" % str((self.resultsFrom, core))) if self.relationalFilterJson: try: JsonDict.loads(self.relationalFilterJson) except JSONDecodeError: raise ValueError("Value '%s' for 'relationalFilterJson' can not be parsed as JSON." % self.relationalFilterJson)
def asPostDict(self): drilldownFields = [] for fieldname, options in self.fieldRegistry.drilldownFieldNames.items( ): drilldownFields.append({ "dim": fieldname, "hierarchical": options["hierarchical"], "multiValued": options["multiValued"], "fieldname": options["indexFieldName"] }) result = JsonDict(drilldownFields=drilldownFields) result.update((k[1:], v) for k, v in self.__dict__.iteritems() if k[1:] in SETTING_NAMES) return result
def testRehashIfNecessary(self): self.pwd.addUser(username='******', password='******') from argon2 import PasswordHasher myPh = PasswordHasher(parallelism=2, memory_cost=2048) hashed2 = myPh.hash('secret2') data = JsonDict.load(join(self.tempdir, 'passwd')) data['users']['two'] = hashed2 hashed1 = data['users']['one'] data.dump(join(self.tempdir, 'passwd')) self.assertTrue(self.pwd.validateUser('two', 'secret2')) self.assertTrue(self.pwd.validateUser('one', 'secret')) data = JsonDict.load(join(self.tempdir, 'passwd')) self.assertEqual(hashed1, data['users']['one']) self.assertNotEqual(hashed2, data['users']['two']) self.assertTrue(self.pwd.validateUser('two', 'secret2'))
def validate(self): for core in self.cores: if core == self.resultsFrom: continue try: self._matchCoreSpecs(self.resultsFrom, core) except KeyError: raise ValueError("No match set for cores %s" % str( (self.resultsFrom, core))) if self.relationalFilterJson: try: JsonDict.loads(self.relationalFilterJson) except JSONDecodeError: raise ValueError( "Value '%s' for 'relationalFilterJson' can not be parsed as JSON." % self.relationalFilterJson)
def _read(self): result = JsonDict.load(self._filename) assert result['version'] == self.version, 'Expected database version %s' % self.version groups = set(self._groups) groups.update(set(result['data']['groups'])) self._groups = list(groups) self._users.update(result['data']['users'])
def addSuggestions(self, identifier, key, values): titles = [v.get('title') for v in values] types = [v.get('type') for v in values] creators = [v.get('creator') for v in values] yield self._connect.send( "/add?{}".format(urlencode(dict(identifier=identifier))), JsonDict(key=key, values=titles, types=types, creators=creators))
def testGetStatusForDomain(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain'}, parse=False) data = JsonDict.loads(result) self.assertEquals(2, len(data['response']['GetStatus'])) self.assertEquals("adomain", data['request']['domainId'])
def testUpdateSettings(self): self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1)) settings = retval(self._lucene.getSettings()) self.assertEqual(['/settings/'], self.read) self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings) clusterFields = [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] self.response = "" consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields))) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEqual({ "numberOfConcurrentTasks": 10, "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0), "clustering": { "clusterMoreRecords": 200, "clusteringEps": 1.0, "clusteringMinPoints": 2, "fields": [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] } }, loads(self.post[0]['data'])) consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None)) self.assertEqual(2, len(self.post)) self.assertEqual('/lucene/settings/', self.post[1]['path']) self.assertEqual({ "numberOfConcurrentTasks": 5, }, loads(self.post[1]['data']))
def testErrorReportedToGustos(self): baseUrl = join(self.integrationTempdir, "choppy_oai.xml") filename = "{}?verb=ListRecords&metadataPrefix=oai_dc".format(baseUrl) with open(filename, "w") as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2017-10-31T15:12:52Z</responseDate><request from="2017-10-04T11:52:57Z" metadataPrefix="didl_mods" verb="ListRecords">https://surfsharekit.nl/oai/hhs/</request><ListRecords><record><header><identifier>oai:surfsharekit.nl:b6ea6503-e2fc-4974-8941-2a4a405dc72f</identifier><datestamp>2017-10-04T14:16:22Z</datestamp></header><metadata><didl:DIDL xmlns:didl="urn:mpeg:mpeg21:2002:02-DIDL-NS" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <didl:Item> <didl:Descriptor> <didl:Statement mimeType="application/xml"> <dii:Identifier xmlns:dii="urn:mpeg:mpeg21:2002:01-DII-NS">urn:nbn:nl:hs:18-b6ea6503-e2fc-4974-8941-2a4a405dc72f</dii:Identifier> </didl:Statement> </didl:Descrip""") errorCount = len(self.gustosUdpListener.log()) self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, baseUrl="file://{}".format(baseUrl)) t = Thread( target=lambda: self.startHarvester(concurrency=1, runOnce=True)) t.start() sleepWheel(5) last_logs = [ JsonDict.loads(l)['data'] for l in self.gustosUdpListener.log()[errorCount:] ] for data in reversed(last_logs): my_group_log = data.get(f'Harvester ({DOMAIN})', {}).get(f'{REPOSITORYGROUP}:{REPOSITORY}') if my_group_log is not None: break self.assertEqual({"count": 1}, my_group_log['errors'])
def testUpdateRepositoryActionForm_Action(self): header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=True)), Method='POST', path='/actions/updateRepositoryActionAttributes', Body=bUrlencode(dict( identifier='repo-id', domainId='domain-id', action="-", ), doseq=True)))) self.assertEqual('200', header['StatusCode']) self.assertEqual(dict(success=True), JsonDict.loads(body)) self.assertEqual(1, len(self.observable.calledMethods)) self.assertEqual('updateRepositoryAttributes', self.observable.calledMethods[0].name) self.assertEqual( { 'complete': False, 'continuous': None, 'domainId': 'domain-id', 'identifier': 'repo-id', 'maximumIgnore': 0, 'action': None, 'use': False }, self.observable.calledMethods[0].kwargs)
def testInfoOnQuery(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "hits": [{"id": "record:1", "score": 0.1234}] }).dumps() q = ComposedQuery('coreA') q.addFilterQuery('coreB', query='N=true') q.addMatch(dict(core='coreA', uniqueKey='A'), dict(core='coreB', key='B')) result = retval(self._multiLucene.executeComposedQuery(q)) self.assertEquals({ 'query': { 'cores': ['coreB', 'coreA'], 'drilldownQueries': {}, 'facets': {}, 'filterQueries': {'coreB': ['N=true']}, 'matches': {'coreA->coreB': [{'core': 'coreA', 'uniqueKey': 'A'}, {'core': 'coreB', 'key': 'B'}]}, 'otherCoreFacetFilters': {}, 'queries': {}, 'rankQueries': {}, 'resultsFrom': 'coreA', 'sortKeys': [], 'unites': [] }, 'type': 'ComposedQuery' }, result.info)
def testUpdateRepositoryFieldDefinition(self): header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=True)), Method='POST', path='/actions/updateRepositoryFieldDefinitions', Body=bUrlencode(dict( identifier='repo-id', domainId='domain-id', extra_name="Herman in de zon op een terras", extra_no_such_field="Bestaat niet"), doseq=True)))) self.assertEqual('200', header['StatusCode']) self.assertEqual(dict(success=True), JsonDict.loads(body)) self.assertEqual(1, len(self.observable.calledMethods)) self.assertEqual('updateRepositoryFieldDefinitions', self.observable.calledMethods[0].name) self.assertEqual( { 'identifier': 'repo-id', 'domainId': 'domain-id', 'extra_no_such_field': 'Bestaat niet', 'extra_name': "Herman in de zon op een terras" }, self.observable.calledMethods[0].kwargs)
def testAddTypeAndMissingValueToSortField(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "hits": [{"id": "record:1", "score": 0.1234}] }).dumps() cq = ComposedQuery('coreA') q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) cq.setCoreQuery('coreB', q) cq.sortKeys = [dict(sortBy='sortField', core='coreA', sortDescending=True)] cq.addMatch(dict(core='coreA', uniqueKey='A'), dict(core='coreB', key='B')) consume(self._multiLucene.executeComposedQuery(cq)) self.assertEqual({ "_sortKeys": [{'core': 'coreA', 'sortBy': 'sortField', 'sortDescending': True, 'type': 'String', 'missingValue': 'STRING_FIRST'}], "resultsFrom": "coreA", '_matches': {'coreA->coreB': [{'core': 'coreA', 'uniqueKey': 'A'}, {'core': 'coreB', 'key': 'B'}]}, "_facets": {}, "_otherCoreFacetFilters": {}, "_rankQueries": {}, "_drilldownQueries": {}, "_unites": [], '_queries': {'coreB': {'term': {'field': 'field', 'value': 'value'}, 'type': 'TermQuery'}}, "cores": ["coreB", "coreA"], "_filterQueries": {} }, loads(self.post[0]['data']))
def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as fp: JsonDict({ 'resumptionToken': None, 'from': "1999-12-01T16:37:41Z" }).dump(fp) with open(self.stateDir + '/tud.ids', 'w') as f: for i in range(113): f.write('oai:tudfakeid:%05i\n' % i) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('1999-12-01', self.listRecordsFrom) with open(self.stateDir + '/tud.stats') as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
def executeQuery(self, luceneQuery, start=None, stop=None, facets=None, sortKeys=None, suggestionRequest=None, dedupField=None, dedupSortField=None, clustering=False, storedFields=None, **kwargs): stop = 10 if stop is None else stop start = 0 if start is None else start for sortKey in sortKeys or []: self.updateSortKey(sortKey) jsonDict = JsonDict( query=luceneQuery, start=start, stop=stop, facets=facets or [], sortKeys=sortKeys or [], dedupField=dedupField, dedupSortField=dedupSortField, clustering=clustering, storedFields=storedFields or [], ) if suggestionRequest: jsonDict["suggestionRequest"] = suggestionRequest responseDict = (yield self._connect().send(jsonDict=jsonDict, path='/query/')) response = luceneResponseFromDict(responseDict) response.info = { 'type': 'Query', 'query': simplifiedDict(dict( luceneQuery=luceneQuery, start=start, stop=stop, facets=facets, suggestionRequest=suggestionRequest, **kwargs )) } raise StopIteration(response) yield
def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as f: JsonDict({ 'resumptionToken': None, 'from': "2015-01-01T00:12:13Z" }).dump(f) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)
def getRepositoryIds(self, domainId, repositoryGroupId=None): result = JsonList() allIds = self.getRepositoryGroupIds(domainId) if repositoryGroupId is None else [repositoryGroupId] for repositoryGroupId in allIds: jsonData = JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, repositoryGroupId)))) result.extend(jsonData.get('repositoryIds', [])) return result
def testComposedQuery(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "hits": [{"id": "record:1", "score": 0.1234}] }).dumps() cq = ComposedQuery('coreA') q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) cq.setCoreQuery("coreA", q) consume(self._multiLucene.executeComposedQuery(cq)) self.assertEqual(1, len(self.post)) self.assertEqual("/query/", self.post[0]['path']) self.assertEqual({ "_sortKeys": [], "resultsFrom": "coreA", "_matches": {}, "_facets": {}, "_otherCoreFacetFilters": {}, "_rankQueries": {}, "_drilldownQueries": {}, "_unites": [], "_queries": {"coreA": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}}, "cores": ["coreA"], "_filterQueries": {} }, loads(self.post[0]['data']))
def testUpdateRepositoryGroup(self): header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=True)), Method='POST', path='/somewhere/updateRepositoryGroup', Body=bUrlencode(dict( identifier='group', domainId='domain', nl_name="De nieuwe naam", en_name="The old name", ), doseq=True)))) self.assertEqual('200', header['StatusCode']) self.assertEqual(dict(success=True), JsonDict.loads(body)) self.assertEqual(1, len(self.observable.calledMethods)) self.assertEqual('updateRepositoryGroup', self.observable.calledMethods[0].name) self.assertEqual( { 'identifier': 'group', 'domainId': 'domain', 'name': { 'nl': 'De nieuwe naam', 'en': 'The old name' } }, self.observable.calledMethods[0].kwargs)
def testAddClosingHours(self): header, body = parseResponse( asBytes( self.dna.all.handleRequest( user=CallTrace(returnValues=dict(isAdmin=True)), Method='POST', path='/actions/addRepositoryClosingHours', Body=bUrlencode(dict(repositoryId='repo-id', domainId='domain-id', week="*", day="1", startHour="10", endHour="14"), doseq=True)))) self.assertEqual('200', header['StatusCode']) self.assertEqual(dict(success=True), JsonDict.loads(body)) self.assertEqual(1, len(self.observable.calledMethods)) self.assertEqual('addClosingHours', self.observable.calledMethods[0].name) self.assertEqual( { 'day': '1', 'domainId': 'domain-id', 'endHour': '14', 'identifier': 'repo-id', 'startHour': '10', 'week': '*' }, self.observable.calledMethods[0].kwargs)
def testMarkException(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([1, 0, 1], event_counts(state, 'started', 'errors', 'harvested')) self.assertRepoStats( 'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n' ) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken", 'lastSuccessfulHarvest': '2012-08-13T12:15:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markStarted() try: raise ValueError("whatever") except: exType, exValue, exTraceback = exc_info() state.markException(exType, exValue, (9999, 9999, 9999, 9999)) self.assertEqual([2, 1, 1], event_counts(state, 'started', 'errors', 'harvested')) self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Error: <class 'ValueError'>: whatever """) self.assertEqual( { "changedate": "2012-08-13 12:17:00", "status": "Error", "message": "whatever" }, JsonDict.load(join(self.statePath, 'repo.running')))
def convert(cls, src, dst): #TODO make this work with abstract storage users = dict() with open(src) as i: for user, pwhash in (l.strip().split(':') for l in i if ':' in l.strip()): users[user]=dict(salt='', password=pwhash) JsonDict(users=users, version=cls.version).dump(dst) return cls(dst)
def urlJsonDict(self, **kwargs): arguments = dict((k ,v) for k, v in kwargs.items() if v) result = JsonDict.load( self._urlopen("{}/get?{}".format(self._internalurl, urlencode(arguments))) ) if 'error' in result: raise ValueError(result['error']['message']) return result
def testMarkDeleted(self): with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z') state.markStarted() state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken", "2012-08-13T12:14:00") self.assertEqual([1, 0], event_counts(state, 'harvested', 'deleted')) self.assertRepoStats( 'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n' ) self.assertEqual( { "from": "2012-08-13T12:14:00", "resumptionToken": "resumptionToken", 'lastSuccessfulHarvest': '2012-08-13T12:15:00Z' }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running'))) with self._State('repo') as state: state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z') state.markDeleted() self.assertRepoStats( """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids. """) self.assertEqual( { "from": "", "resumptionToken": "", 'lastSuccessfulHarvest': None }, JsonDict.load(join(self.statePath, 'repo.next'))) self.assertEqual( { "changedate": "2012-08-13 12:15:00", "status": "Ok", "message": "" }, JsonDict.load(join(self.statePath, 'repo.running')))
def testClear(self): self.startHarvester(repository=REPOSITORY) self.assertEquals(BATCHSIZE, self.sizeDumpDir()) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False) data = JsonDict.loads(result) self.assertEquals(8, data['response']['GetStatus'][0]['total']) self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear') self.startHarvester(repository=REPOSITORY) self.assertEquals(18, self.sizeDumpDir()) for filename in sorted(listdir(self.dumpDir))[-8:]: self.assertTrue('_delete.updateRequest' in filename, filename) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False) self.assertEqual(0, JsonDict.loads(result)['response']['GetStatus'][0]['total'])
def testRemovingNotListedKeys(self): result = asString(self.dna.all.handleRequest( path='/service/v2/list', Method='GET', arguments={'keys':['-no']} )) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['api_version', 'config', 'domain', 'services', 'software_version'], sorted(dictBodyV2.keys()))
def getRepositories(self, domainId, repositoryGroupId=None): try: repositoryIds = self.getRepositoryIds(domainId=domainId, repositoryGroupId=repositoryGroupId) except IOError: raise ValueError("idDoesNotExist") return JsonList([ JsonDict.load(open(join(self._dataPath, '%s.%s.repository' % (domainId, repositoryId)))) for repositoryId in repositoryIds ])
def load(cls, filePath): state = cls(filePath=filePath) if isfile(filePath): d = JsonDict.load(filePath) state.datetime = d.get('datetime') state.harvestingReady = d.get('harvestingReady', False) state.error = d.get('error') state.resumptionAttributes = d.get('resumptionAttributes') return state
def testKeysAll(self): result = asString(self.dna.all.handleRequest( path='/service/v2/list', Method='GET', arguments={'__all__':['True']} )) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['api_version', 'collections', 'config', 'domain', 'other', 'services', 'software_version'], sorted(dictBodyV2.keys()))
def testShouldHaveGlobalConfigForVersion2(self): result = asString(self.dna.all.handleRequest( path='/service/v2/list', arguments={}, Method='GET', )) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['host', 'port'], sorted(dictBodyV2['config'].keys())) self.assertEquals(['api_version', 'config', 'domain', 'services', 'software_version'], sorted(dictBodyV2.keys()))
def testNonexistingKeys(self): result = asString(self.dna.all.handleRequest( path='/service/v2/list', Method='GET', arguments={'keys':['no']} )) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['api_version', 'config', 'domain', 'errors', 'services', 'software_version'], sorted(dictBodyV2.keys())) self.assertEquals(["Key 'no' not found."], dictBodyV2['errors'])
def testGetStatusForDomainAndRepositoryId(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse=False) data = JsonDict.loads(result) self.assertEquals("GetStatus", data['request']['verb']) self.assertEquals("adomain", data['request']['domainId']) self.assertEquals("integrationtest", data['request']['repositoryId']) self.assertEquals("IntegrationTest", data['response']['GetStatus'][0]['repositoryGroupId']) self.assertEquals(6, data['response']['GetStatus'][0]['invalid'])
def testAllKeys(self): result = asString(self.dna.all.handleRequest( path='/service/v2/list', Method='GET', arguments={'keys':['collections,other']} )) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['api_version', 'collections', 'config', 'domain', 'other', 'services', 'software_version'], sorted(dictBodyV2.keys())) self.assertEquals({'collection': {'provenanceSource': 'collection_source', 'enabled': True, 'name': 'collection'}}, dictBodyV2['collections']) self.assertEquals(['other'], dictBodyV2['other']) self.assertEquals({'host': 'localhost', 'port': 8000}, dictBodyV2['config']) self.assertEquals({}, dictBodyV2['services'])
def testGetSomethingIsAllowed(self): dataRetrieve = HarvesterDataRetrieve() observer = CallTrace('observer', returnValues=dict(getSomething='get something result', listSomething=['a', 'b'])) dataRetrieve.addObserver(observer) result = asString(dataRetrieve.handleRequest(arguments=dict(verb=['GetSomething'], argument=['value']))) header, body = result.split(CRLF*2,1) self.assertEqual(okJson, header+CRLF*2) self.assertEquals({'request': { 'verb': 'GetSomething', 'argument': 'value', }, 'response': {'GetSomething': 'get something result'}}, JsonDict.loads(body)) self.assertEquals(['getSomething'], observer.calledMethodNames()) self.assertEquals({'argument': 'value'}, observer.calledMethods[0].kwargs)
def testServiceRegistryOldFormat(self): uuid1 = str(uuid4()) uuid2 = str(uuid4()) with open(join(self.tempdir, 'serviceregistry.json'), 'w') as f: d = JsonDict({ uuid1: { "ipAddress": "5.153.228.85", "readable": True, "number": 1, "data": { "uptime": 366867, "VERSION": "1.5.12.3" }, "writable": True, "lastseen": 1423494771.904539, "type": "holding", "infoport": 35609, }, uuid2: { "ipAddress": "5.153.228.85", "readable": True, "number": 1, "data": { "uptime": 366867, "VERSION": "1.5.12.3" }, "writable": True, "lastseen": 1423494771.904539, "type": "plein", "infoport": 41609, } }) d.dump(f) registry = ServiceRegistry( stateDir=self.tempdir, domainname='zp.example.org', reactor=CallTrace(), ) self.assertEquals(set([uuid1, uuid2]), set(registry.listServices(activeOnly=False).keys()))
def parseHeaderAndBody(h, b=None, parseBody=True): if b is None: h, b = h header, body = parseResponse(h + CRLF * 2 + b) if body and parseBody and 'Content-Type' in header['Headers']: contentType = header['Headers']['Content-Type'] if 'xml' in contentType: return header, XML(body) if 'json' in contentType: try: return header, JsonDict.loads(body) if body[0] == '{' else JsonList.loads(body) except JSONDecodeError: return header, 'JSONDecodeError in: ' + body return header, body
def _load(self): if not isfile(self._jsonFilepath): return {} data = open(self._jsonFilepath).read().strip() result = {} if '[' != data[0]: for identifier, serviceDict in JsonDict.loads(data).items(): service = Service(domainname=self._domainname, timeout=self._timeout, identifier=identifier, ultimateTimeout=self._ultimateTimeout, **serviceDict) service.validate() result[service.identifier] = service return result for service in (Service(domainname=self._domainname, timeout=self._timeout, ultimateTimeout=self._ultimateTimeout, **item) for item in JsonList.loads(data)): service.validate() result[service.identifier] = service return result
def _download(self, url, **kwargs): try: configuration = JsonDict.load(urlopen(url, **kwargs)) self._cache.update(configuration) except (HTTPError, URLError, timeout), e: sys.stderr.write("""%s (%s). Tried: %s ----- """ % (e.__class__.__name__, str(e), url)) configuration = self._cache.retrieve() if configuration is None: sys.stderr.write('%s: configuration cachefile "%s" not found!\n' % (self.__class__.__name__, self._cache.filepath)) sys.stderr.flush() raise sys.stderr.write('%s: configuration cachefile "%s" found.\n' % (self.__class__.__name__, self._cache.filepath)) sys.stderr.flush()
def testShouldReturnOnlyRequestedKeysWithUpdate(self): hash = serviceUpdateHash(secret='guessme!', identifier='cc635329-c089-41a8-91be-2a4554851515', type='srv', ipAddress='127.0.0.1', infoport=1234) postBody = urlencode({ 'identifier': 'cc635329-c089-41a8-91be-2a4554851515', 'type': 'srv', 'ipAddress': '127.0.0.1', 'infoport': '1234', 'data': dumps({'VERSION': '2.718281828'}), 'hash': hash, }) result = ''.join(compose(self.dna.all.handleRequest( path='/service/v2/update', Method='POST', arguments={'keys':['collections']}, Body=postBody, ))) header, body = httpSplit(result) dictBodyV2 = JsonDict.loads(body) self.assertEquals(['api_version', 'collections', 'config', 'domain', 'services', 'software_version', 'this_service'], sorted(dictBodyV2.keys()))
def _add(self, values, identifier, **kwargs): self._validate(self, identifier=identifier, **kwargs) olddata = values.get(identifier, {}) data = dict() for key in self._register['keys']: data[key] = kwargs.get(key, [olddata.get(key, '')])[0] for key in self._register['listKeys']: data[key] = kwargs.get(key, olddata.get(key, [])) for key in self._register['jsonKeys']: newdata = kwargs.get(key, [None])[0] if newdata is None and key in olddata: data[key] = olddata[key] continue data[key] = JsonDict.loads(newdata or '{}') for key in self._register['booleanKeys']: data[key] = olddata.get(key, False) for key in kwargs.get('__booleanKeys__', self._register['booleanKeys']): if not key: continue data[key] = key in kwargs values[identifier] = data self._save(values)
def getMapping(self, identifier): try: return JsonDict.load(open(join(self._dataPath, '%s.mapping' % identifier))) except IOError: raise ValueError("idDoesNotExist")
def getDomain(self, identifier): domainFile = join(self._dataPath, '{0}.domain'.format(identifier)) try: return JsonDict.load(open(domainFile)) except IOError: raise ValueError('idDoesNotExist')
def getRepositoryGroupIds(self, domainId): return JsonDict.load(open(join(self._dataPath, '%s.domain' % domainId))).get('repositoryGroupIds',[])
def getRepositoryGroup(self, identifier, domainId): return JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, identifier))))