def _httpConfigAndServices(self, apiVersion, arguments, serviceIdentifier=None, prettyPrint=False, **ignored):
     result = {}
     additionalConfigDict = result
     fullServiceInfo = arguments.get('allServiceInfo', ['False'])[0] == 'True'
     useVpn = arguments.get('useVpn', ['False'])[0] == 'True'
     retrieveAll = arguments.get('__all__', ['False'])[0] == 'True'
     keys = self._allKeys() if retrieveAll else self._keysFromArgs(arguments)
     for key in _requestedKeys(keys):
         try:
             if key == 'services':
                 additionalConfigDict[key] = self.call.listServices(activeOnly=not fullServiceInfo, includeState=fullServiceInfo, convertIpsToVpn=useVpn)
             elif key == 'config':
                 additionalConfigDict[key] = self.call.getConfig()
             else:
                 additionalConfigDict[key] = self.call[key].getConfiguration(allConfiguration=retrieveAll)
         except NoneOfTheObserversRespond:
             result.setdefault('errors', []).append("Key '%s' not found." % key)
     if serviceIdentifier:
         this_service = self.call.getService(identifier=serviceIdentifier)
         if this_service is not None:
             result['this_service'] = this_service
             result['this_service']['state'] = self.call.getPrivateStateFor(identifier=serviceIdentifier)
     result = JsonDict(api_version=apiVersion, domain=self.call.getDomain(), **result)
     if self._softwareVersion is not None:
         result['software_version'] = self._softwareVersion
     yield okJson
     yield result.pretty_print() if prettyPrint else str(result)
Beispiel #2
0
    def testMarkHarvesterAfterExceptionChange(self):
        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z')
            state.markStarted()
            try:
                raise ValueError("whatever")
            except:
                exType, exValue, exTraceback = exc_info()
                state.markException(exType, exValue, (100, 80, 20, 93))
            self.assertEqual([100, 80, 20],
                             event_counts(state, 'records_harvested',
                                          'records_uploaded',
                                          'records_deleted'))
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:15:00",
                "status": "Error",
                "message": "whatever"
            }, JsonDict.load(join(self.statePath, 'repo.running')))

        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z')
            state.markStarted()
            state.markHarvested((42, 31, 11, 135), "resumptionToken",
                                "2012-08-13T12:14:00")
            self.assertEqual([142, 111, 31],
                             event_counts(state, 'records_harvested',
                                          'records_uploaded',
                                          'records_deleted'))
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:17:00",
                "status": "Ok",
                "message": ""
            }, JsonDict.load(join(self.statePath, 'repo.running')))
Beispiel #3
0
 def testLoadEmptyFile(self):
     tempfile = join(self.tempdir, 'json.json')
     open(tempfile, 'w').close()
     self.assertRaises(JSONDecodeError, lambda: JsonDict.load(tempfile))
     self.assertEquals({}, JsonDict.load(tempfile, emptyOnError=True))
     self.assertRaises(JSONDecodeError, lambda: JsonList.load(tempfile))
     self.assertEquals([], JsonList.load(tempfile, emptyOnError=True))
Beispiel #4
0
    def testMarkDeletedAfterExceptionChange(self):
        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z')
            state.markStarted()
            try:
                raise ValueError("whatever")
            except:
                exType, exValue, exTraceback = exc_info()
                state.markException(exType, exValue, (9999, 9999, 9999, 9999))
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:15:00",
                "status": "Error",
                "message": "whatever"
            }, JsonDict.load(join(self.statePath, 'repo.running')))

        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z')
            state.markStarted()
            state.markDeleted()
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:17:00",
                "status": "Ok",
                "message": ""
            }, JsonDict.load(join(self.statePath, 'repo.running')))
Beispiel #5
0
 def testLoadEmptyFile(self):
     tempfile = join(self.tempdir, 'json.json')
     open(tempfile, 'w').close()
     self.assertRaises(JSONDecodeError, lambda: JsonDict.load(tempfile))
     self.assertEquals({}, JsonDict.load(tempfile, emptyOnError=True))
     self.assertRaises(JSONDecodeError, lambda: JsonList.load(tempfile))
     self.assertEquals([], JsonList.load(tempfile, emptyOnError=True))
Beispiel #6
0
    def jsonResponse(self, **kwargs):
        t0 = self._timeNow()
        result = yield self.any.executeQuery(**kwargs)

        queryTime = self._timeNow() - t0
        total, hits = result.total, result.hits
        jsonResponse = JsonDict({'total': total})

        if hits:
            if hasattr(result, 'items'):
                jsonResponse['items'] = result.items
            else:
                jsonResponse['items'] = []
                for hit in hits:
                    jsonResponse['items'].append((yield self.any.retrieveData(
                        identifier=hit.id, name=self._defaultRecordSchema)))

        drilldownData = getattr(result, 'drilldownData', None)
        if drilldownData is not None:
            jsonFacets = jsonResponse.setdefault('facets', {})
            for facet in drilldownData:
                jsonFacets.setdefault(facet['fieldname'], facet["terms"])

        searchTime = self._timeNow() - t0
        jsonResponse['querytimes'] = {
            'handlingTime': self._querytime(searchTime),
            'queryTime': self._querytime(queryTime),
        }
        if result.queryTime:
            jsonResponse["querytimes"]["indexTime"] = self._querytime(
                result.queryTime / 1000.0)

        raise StopIteration(jsonResponse)
Beispiel #7
0
 def handleGet(self, arguments, **kwargs):
     yield okJson
     verb = arguments.get('verb', [None])[0]
     messageKwargs = dict((k, values[0])
                          for k, values in list(arguments.items())
                          if k != 'verb')
     request = dict(**messageKwargs)
     message = None
     if verb is not None:
         message = verb[0].lower() + verb[1:]
         request['verb'] = verb
     response = JsonDict(request=request)
     try:
         if message is None:
             raise ValueError('badVerb')
         if not message.startswith('get'):
             raise ValueError('badVerb')
         response['response'] = {
             verb: self.call.unknown(message=message, **messageKwargs)
         }
     except NoneOfTheObserversRespond:
         response['error'] = error('badVerb')
     except Exception as e:
         response['error'] = error(str(e), repr(e))
     yield response.dumps()
Beispiel #8
0
 def _configure(self):
     configuration = JsonDict({
         "path": {
             "data": ensureDir(self.stateDir, 'data'),
             "logs": ensureDir(self.stateDir, 'logs'),
             "work": ensureDir(self.stateDir, 'work'), # temporary files
             "conf": self.configDir,
             "plugins": ensureDir(self.stateDir, 'plugins'),
         },
         "cluster":{
             "name": self.name,
         },
         "http":{
             "port": self.port,
         },
         "transport": {
             "tcp": {
                 "port": self.transportPort
             }
         }
     })
     self._configureIndex(configuration)
     if self.identifier:
         configuration.setdefault("node", dict())['name'] = self.identifier
     with open(self.configFile, 'w') as f:
         configuration.dump(f, indent=4, sort_keys=True)
Beispiel #9
0
    def _readState(self):
        self._counts = JsonDict.load(
            self._countFilepath) if self._countFilepath.is_file(
            ) else JsonDict()
        if self._resumptionFilepath.is_file():
            values = JsonDict.loads(self._resumptionFilepath.read_text())
            self.token = values.get('resumptionToken', None) or None
            self.from_ = values.get('from', '') or None
            self.lastSuccessfulHarvest = values.get('lastSuccessfulHarvest',
                                                    '') or None
            return

        # The mechanism below will only be carried out once in case the resumption file does not yet exist.
        if self._statsfilepath.is_file():
            self._statsfile = self._statsfilepath.open()
            logline = None
            for logline in self._filterNonErrorLogLine(self._statsfile):
                if not self.token:
                    self.from_ = getStartDate(logline)
                self.token = getResumptionToken(logline)
            if logline and self._isDeleted(logline):
                self.from_ = None
                self.token = None
            self._statsfile.close()
            self._statsfile = None
    def testAddDomain(self):
        header, body = parseResponse(
            asBytes(
                self.dna.all.handleRequest(
                    user=CallTrace(returnValues=dict(isAdmin=False)),
                    path="/actions/addDomain",
                    Body=bytes(urlencode(dict(identifier="aap")),
                               encoding="utf-8"),
                    Method='Post')))
        self.assertEqual(0, len(self.observable.calledMethods))
        self.assertEqual("200", header['StatusCode'])
        self.assertEqual("application/json", header['Headers']['Content-Type'])
        response = JsonDict.loads(body)
        self.assertFalse(response['success'])
        self.assertEqual("Not allowed", response['message'])

        header, body = parseResponse(
            asBytes(
                self.dna.all.handleRequest(
                    user=CallTrace(returnValues=dict(isAdmin=True)),
                    path="/actions/addDomain",
                    Body=bytes(urlencode(dict(identifier="aap")),
                               encoding="utf-8"),
                    Method='Post')))
        self.assertEqual("200", header['StatusCode'])
        self.assertEqual("application/json", header['Headers']['Content-Type'])
        response = JsonDict.loads(body)
        self.assertTrue(response['success'])
        self.assertEqual(1, len(self.observable.calledMethods))
        self.assertEqual("addDomain", self.observable.calledMethods[0].name)
        self.assertEqual(dict(identifier='aap'),
                         self.observable.calledMethods[0].kwargs)
Beispiel #11
0
def dna(reactor, port, dataPath, logPath, statePath, harvesterStatusUrl, **ignored):
    harvesterData = HarvesterData(dataPath)
    repositoryStatus = be((RepositoryStatus(logPath, statePath),
            (harvesterData,)
        ))
    configDict = JsonDict(
            logPath=logPath,
            statePath=statePath,
            harvesterStatusUrl=harvesterStatusUrl,
            dataPath=dataPath,
        )

    return \
        (Observable(),
            (ObservableHttpServer(reactor, port),
                (ApacheLogger(stdout),
                    (PathFilter("/info/version"),
                        (StringServer(VERSION_STRING, ContentTypePlainText), )
                    ),
                    (PathFilter("/info/config"),
                        (StringServer(configDict.dumps(), ContentTypeJson), )
                    ),
                    (PathFilter("/static"),
                        (PathRename(lambda name: name[len('/static/'):]),
                            (FileServer(seecrWebLibPath),)
                        )
                    ),
                    (PathFilter('/', excluding=['/info/version', '/info/config', '/static', '/action', '/get']),
                        (DynamicHtml(
                                [dynamicHtmlPath],
                                reactor=reactor,
                                additionalGlobals = {
                                    'time': time,
                                    'harvesterStatusUrl': harvesterStatusUrl,
                                    'escapeXml': escapeXml,
                                    'compose': compose,
                                },
                                indexPage="/index.html",
                            ),
                            (harvesterData,),
                            (repositoryStatus,),
                        )
                    ),
                    (PathFilter('/action'),
                        (HarvesterDataActions(),
                            (harvesterData,)
                        ),
                    ),
                    (PathFilter('/get'),
                        (HarvesterDataRetrieve(),
                            (harvesterData,),
                            (repositoryStatus,),
                        )
                    )
                )
            )
        )
Beispiel #12
0
 def _markRunningState(self, status, message=""):
     runningDict = JsonDict.load(
         self._runningFilepath) if self._runningFilepath.is_file() else {}
     if status != runningDict.get(
             'status', None) or message != runningDict.get('message', None):
         JsonDict({
             'changedate': self.getTime(),
             'status': status,
             'message': message
         }).dump(self._runningFilepath)
Beispiel #13
0
 def testLoadEmptyFile(self):
     tempfile = join(self.tempdir, 'json.json')
     with open(tempfile, 'w') as fp:
         pass
     self.assertRaises(json.JSONDecodeError,
                       lambda: JsonDict.load(tempfile))
     self.assertEqual({}, JsonDict.load(tempfile, emptyOnError=True))
     self.assertRaises(json.JSONDecodeError,
                       lambda: JsonList.load(tempfile))
     self.assertEqual([], JsonList.load(tempfile, emptyOnError=True))
Beispiel #14
0
 def testLoad(self):
     jd = JsonDict({'hello': 'world'})
     tempfile = self.tmp_path / 'json.json'
     with open(tempfile, 'w') as fp:
         fp.write(str(jd))
     with open(tempfile) as fp:
         jd2 = JsonDict.load(fp)
     jd3 = JsonDict.load(str(tempfile))
     jd4 = JsonDict.load(tempfile)
     self.assertEqual(jd, jd2)
     self.assertEqual(jd, jd3)
     self.assertEqual(jd, jd4)
Beispiel #15
0
 def asPostDict(self):
     drilldownFields = []
     for fieldname, options in self.fieldRegistry.drilldownFieldNames.items():
         drilldownFields.append({
             "dim": fieldname,
             "hierarchical": options["hierarchical"],
             "multiValued": options["multiValued"],
             "fieldname": options["indexFieldName"]
         })
     result = JsonDict(drilldownFields=drilldownFields)
     result.update((k[1:], v) for k, v in self.__dict__.iteritems() if k[1:] in SETTING_NAMES)
     return result
Beispiel #16
0
 def validate(self):
     for core in self.cores:
         if core == self.resultsFrom:
             continue
         try:
             self._matchCoreSpecs(self.resultsFrom, core)
         except KeyError:
             raise ValueError("No match set for cores %s" % str((self.resultsFrom, core)))
     if self.relationalFilterJson:
         try:
             JsonDict.loads(self.relationalFilterJson)
         except JSONDecodeError:
             raise ValueError("Value '%s' for 'relationalFilterJson' can not be parsed as JSON." % self.relationalFilterJson)
 def asPostDict(self):
     drilldownFields = []
     for fieldname, options in self.fieldRegistry.drilldownFieldNames.items(
     ):
         drilldownFields.append({
             "dim": fieldname,
             "hierarchical": options["hierarchical"],
             "multiValued": options["multiValued"],
             "fieldname": options["indexFieldName"]
         })
     result = JsonDict(drilldownFields=drilldownFields)
     result.update((k[1:], v) for k, v in self.__dict__.iteritems()
                   if k[1:] in SETTING_NAMES)
     return result
Beispiel #18
0
 def testRehashIfNecessary(self):
     self.pwd.addUser(username='******', password='******')
     from argon2 import PasswordHasher
     myPh = PasswordHasher(parallelism=2, memory_cost=2048)
     hashed2 = myPh.hash('secret2')
     data = JsonDict.load(join(self.tempdir, 'passwd'))
     data['users']['two'] = hashed2
     hashed1 = data['users']['one']
     data.dump(join(self.tempdir, 'passwd'))
     self.assertTrue(self.pwd.validateUser('two', 'secret2'))
     self.assertTrue(self.pwd.validateUser('one', 'secret'))
     data = JsonDict.load(join(self.tempdir, 'passwd'))
     self.assertEqual(hashed1, data['users']['one'])
     self.assertNotEqual(hashed2, data['users']['two'])
     self.assertTrue(self.pwd.validateUser('two', 'secret2'))
Beispiel #19
0
 def validate(self):
     for core in self.cores:
         if core == self.resultsFrom:
             continue
         try:
             self._matchCoreSpecs(self.resultsFrom, core)
         except KeyError:
             raise ValueError("No match set for cores %s" % str(
                 (self.resultsFrom, core)))
     if self.relationalFilterJson:
         try:
             JsonDict.loads(self.relationalFilterJson)
         except JSONDecodeError:
             raise ValueError(
                 "Value '%s' for 'relationalFilterJson' can not be parsed as JSON."
                 % self.relationalFilterJson)
Beispiel #20
0
 def _read(self):
     result = JsonDict.load(self._filename)
     assert result['version'] == self.version, 'Expected database version %s' % self.version
     groups = set(self._groups)
     groups.update(set(result['data']['groups']))
     self._groups = list(groups)
     self._users.update(result['data']['users'])
 def addSuggestions(self, identifier, key, values):
     titles = [v.get('title') for v in values]
     types = [v.get('type') for v in values]
     creators = [v.get('creator') for v in values]
     yield self._connect.send(
         "/add?{}".format(urlencode(dict(identifier=identifier))),
         JsonDict(key=key, values=titles, types=types, creators=creators))
Beispiel #22
0
 def testGetStatusForDomain(self):
     self.controlHelper(action='allInvalid')
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain'}, parse=False)
     data = JsonDict.loads(result)
     self.assertEquals(2, len(data['response']['GetStatus']))
     self.assertEquals("adomain", data['request']['domainId'])
Beispiel #23
0
    def testUpdateSettings(self):
        self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1))
        settings = retval(self._lucene.getSettings())
        self.assertEqual(['/settings/'], self.read)
        self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings)

        clusterFields = [
            {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
        ]
        self.response = ""
        consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields)))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 10,
                "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0),
                "clustering": {
                    "clusterMoreRecords": 200,
                    "clusteringEps": 1.0,
                    "clusteringMinPoints": 2,
                    "fields": [
                        {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
                    ]
                }
            }, loads(self.post[0]['data']))

        consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None))
        self.assertEqual(2, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[1]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 5,
            }, loads(self.post[1]['data']))
Beispiel #24
0
    def testErrorReportedToGustos(self):
        baseUrl = join(self.integrationTempdir, "choppy_oai.xml")
        filename = "{}?verb=ListRecords&metadataPrefix=oai_dc".format(baseUrl)
        with open(filename, "w") as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
            <OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2017-10-31T15:12:52Z</responseDate><request from="2017-10-04T11:52:57Z" metadataPrefix="didl_mods" verb="ListRecords">https://surfsharekit.nl/oai/hhs/</request><ListRecords><record><header><identifier>oai:surfsharekit.nl:b6ea6503-e2fc-4974-8941-2a4a405dc72f</identifier><datestamp>2017-10-04T14:16:22Z</datestamp></header><metadata><didl:DIDL xmlns:didl="urn:mpeg:mpeg21:2002:02-DIDL-NS" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
              <didl:Item> <didl:Descriptor> <didl:Statement mimeType="application/xml"> <dii:Identifier xmlns:dii="urn:mpeg:mpeg21:2002:01-DII-NS">urn:nbn:nl:hs:18-b6ea6503-e2fc-4974-8941-2a4a405dc72f</dii:Identifier>
                                      </didl:Statement> </didl:Descrip""")

        errorCount = len(self.gustosUdpListener.log())
        self.saveRepository(DOMAIN,
                            REPOSITORY,
                            REPOSITORYGROUP,
                            baseUrl="file://{}".format(baseUrl))
        t = Thread(
            target=lambda: self.startHarvester(concurrency=1, runOnce=True))
        t.start()

        sleepWheel(5)
        last_logs = [
            JsonDict.loads(l)['data']
            for l in self.gustosUdpListener.log()[errorCount:]
        ]
        for data in reversed(last_logs):
            my_group_log = data.get(f'Harvester ({DOMAIN})',
                                    {}).get(f'{REPOSITORYGROUP}:{REPOSITORY}')
            if my_group_log is not None:
                break
        self.assertEqual({"count": 1}, my_group_log['errors'])
 def testUpdateRepositoryActionForm_Action(self):
     header, body = parseResponse(
         asBytes(
             self.dna.all.handleRequest(
                 user=CallTrace(returnValues=dict(isAdmin=True)),
                 Method='POST',
                 path='/actions/updateRepositoryActionAttributes',
                 Body=bUrlencode(dict(
                     identifier='repo-id',
                     domainId='domain-id',
                     action="-",
                 ),
                                 doseq=True))))
     self.assertEqual('200', header['StatusCode'])
     self.assertEqual(dict(success=True), JsonDict.loads(body))
     self.assertEqual(1, len(self.observable.calledMethods))
     self.assertEqual('updateRepositoryAttributes',
                      self.observable.calledMethods[0].name)
     self.assertEqual(
         {
             'complete': False,
             'continuous': None,
             'domainId': 'domain-id',
             'identifier': 'repo-id',
             'maximumIgnore': 0,
             'action': None,
             'use': False
         }, self.observable.calledMethods[0].kwargs)
Beispiel #26
0
    def testInfoOnQuery(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "hits": [{"id": "record:1", "score": 0.1234}]
            }).dumps()

        q = ComposedQuery('coreA')
        q.addFilterQuery('coreB', query='N=true')
        q.addMatch(dict(core='coreA', uniqueKey='A'), dict(core='coreB', key='B'))
        result = retval(self._multiLucene.executeComposedQuery(q))
        self.assertEquals({
            'query': {
                'cores': ['coreB', 'coreA'],
                'drilldownQueries': {},
                'facets': {},
                'filterQueries': {'coreB': ['N=true']},
                'matches': {'coreA->coreB': [{'core': 'coreA', 'uniqueKey': 'A'}, {'core': 'coreB', 'key': 'B'}]},
                'otherCoreFacetFilters': {},
                'queries': {},
                'rankQueries': {},
                'resultsFrom': 'coreA',
                'sortKeys': [],
                'unites': []
            },
            'type': 'ComposedQuery'
        }, result.info)
    def testUpdateRepositoryFieldDefinition(self):
        header, body = parseResponse(
            asBytes(
                self.dna.all.handleRequest(
                    user=CallTrace(returnValues=dict(isAdmin=True)),
                    Method='POST',
                    path='/actions/updateRepositoryFieldDefinitions',
                    Body=bUrlencode(dict(
                        identifier='repo-id',
                        domainId='domain-id',
                        extra_name="Herman in de zon op een terras",
                        extra_no_such_field="Bestaat niet"),
                                    doseq=True))))
        self.assertEqual('200', header['StatusCode'])
        self.assertEqual(dict(success=True), JsonDict.loads(body))
        self.assertEqual(1, len(self.observable.calledMethods))
        self.assertEqual('updateRepositoryFieldDefinitions',
                         self.observable.calledMethods[0].name)

        self.assertEqual(
            {
                'identifier': 'repo-id',
                'domainId': 'domain-id',
                'extra_no_such_field': 'Bestaat niet',
                'extra_name': "Herman in de zon op een terras"
            }, self.observable.calledMethods[0].kwargs)
Beispiel #28
0
    def testAddTypeAndMissingValueToSortField(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "hits": [{"id": "record:1", "score": 0.1234}]
            }).dumps()

        cq = ComposedQuery('coreA')
        q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        cq.setCoreQuery('coreB', q)
        cq.sortKeys = [dict(sortBy='sortField', core='coreA', sortDescending=True)]
        cq.addMatch(dict(core='coreA', uniqueKey='A'), dict(core='coreB', key='B'))
        consume(self._multiLucene.executeComposedQuery(cq))
        self.assertEqual({
                "_sortKeys": [{'core': 'coreA', 'sortBy': 'sortField', 'sortDescending': True, 'type': 'String', 'missingValue': 'STRING_FIRST'}],
                "resultsFrom": "coreA",
                '_matches': {'coreA->coreB': [{'core': 'coreA', 'uniqueKey': 'A'}, {'core': 'coreB', 'key': 'B'}]},
                "_facets": {},
                "_otherCoreFacetFilters": {},
                "_rankQueries": {},
                "_drilldownQueries": {},
                "_unites": [],
                '_queries': {'coreB': {'term': {'field': 'field', 'value': 'value'}, 'type': 'TermQuery'}},
                "cores": ["coreB", "coreA"],
                "_filterQueries": {}
            }, loads(self.post[0]['data']))
Beispiel #29
0
    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )
        with open(self.stateDir + '/tud.next', 'w') as fp:
            JsonDict({
                'resumptionToken': None,
                'from': "1999-12-01T16:37:41Z"
            }).dump(fp)

        with open(self.stateDir + '/tud.ids', 'w') as f:
            for i in range(113):
                f.write('oai:tudfakeid:%05i\n' % i)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('1999-12-01', self.listRecordsFrom)
        with open(self.stateDir + '/tud.stats') as f:
            lines = f.readlines()
        self.assertEqual(2, len(lines))
        self.assertEqual(('3', '3', '0', '116'),
                         getHarvestedUploadedRecords(lines[1]))
Beispiel #30
0
    def executeQuery(self, luceneQuery, start=None, stop=None, facets=None, sortKeys=None, suggestionRequest=None, dedupField=None, dedupSortField=None, clustering=False, storedFields=None, **kwargs):
        stop = 10 if stop is None else stop
        start = 0 if start is None else start

        for sortKey in sortKeys or []:
            self.updateSortKey(sortKey)
        jsonDict = JsonDict(
            query=luceneQuery,
            start=start,
            stop=stop,
            facets=facets or [],
            sortKeys=sortKeys or [],
            dedupField=dedupField,
            dedupSortField=dedupSortField,
            clustering=clustering,
            storedFields=storedFields or [],
        )
        if suggestionRequest:
            jsonDict["suggestionRequest"] = suggestionRequest
        responseDict = (yield self._connect().send(jsonDict=jsonDict, path='/query/'))
        response = luceneResponseFromDict(responseDict)
        response.info = {
            'type': 'Query',
            'query': simplifiedDict(dict(
                    luceneQuery=luceneQuery,
                    start=start,
                    stop=stop,
                    facets=facets,
                    suggestionRequest=suggestionRequest,
                    **kwargs
                ))
            }
        raise StopIteration(response)
        yield
Beispiel #31
0
    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )

        with open(self.stateDir + '/tud.next', 'w') as f:
            JsonDict({
                'resumptionToken': None,
                'from': "2015-01-01T00:12:13Z"
            }).dump(f)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None,
                                          'tud',
                                          continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)
 def getRepositoryIds(self, domainId, repositoryGroupId=None):
     result = JsonList()
     allIds = self.getRepositoryGroupIds(domainId) if repositoryGroupId is None else [repositoryGroupId]
     for repositoryGroupId in allIds:
         jsonData = JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, repositoryGroupId))))
         result.extend(jsonData.get('repositoryIds', []))
     return result
Beispiel #33
0
    def testComposedQuery(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "hits": [{"id": "record:1", "score": 0.1234}]
            }).dumps()

        cq = ComposedQuery('coreA')
        q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        cq.setCoreQuery("coreA", q)

        consume(self._multiLucene.executeComposedQuery(cq))
        self.assertEqual(1, len(self.post))
        self.assertEqual("/query/", self.post[0]['path'])
        self.assertEqual({
                "_sortKeys": [],
                "resultsFrom": "coreA",
                "_matches": {},
                "_facets": {},
                "_otherCoreFacetFilters": {},
                "_rankQueries": {},
                "_drilldownQueries": {},
                "_unites": [],
                "_queries": {"coreA": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}},
                "cores": ["coreA"],
                "_filterQueries": {}
            }, loads(self.post[0]['data']))
 def testUpdateRepositoryGroup(self):
     header, body = parseResponse(
         asBytes(
             self.dna.all.handleRequest(
                 user=CallTrace(returnValues=dict(isAdmin=True)),
                 Method='POST',
                 path='/somewhere/updateRepositoryGroup',
                 Body=bUrlencode(dict(
                     identifier='group',
                     domainId='domain',
                     nl_name="De nieuwe naam",
                     en_name="The old name",
                 ),
                                 doseq=True))))
     self.assertEqual('200', header['StatusCode'])
     self.assertEqual(dict(success=True), JsonDict.loads(body))
     self.assertEqual(1, len(self.observable.calledMethods))
     self.assertEqual('updateRepositoryGroup',
                      self.observable.calledMethods[0].name)
     self.assertEqual(
         {
             'identifier': 'group',
             'domainId': 'domain',
             'name': {
                 'nl': 'De nieuwe naam',
                 'en': 'The old name'
             }
         }, self.observable.calledMethods[0].kwargs)
 def testAddClosingHours(self):
     header, body = parseResponse(
         asBytes(
             self.dna.all.handleRequest(
                 user=CallTrace(returnValues=dict(isAdmin=True)),
                 Method='POST',
                 path='/actions/addRepositoryClosingHours',
                 Body=bUrlencode(dict(repositoryId='repo-id',
                                      domainId='domain-id',
                                      week="*",
                                      day="1",
                                      startHour="10",
                                      endHour="14"),
                                 doseq=True))))
     self.assertEqual('200', header['StatusCode'])
     self.assertEqual(dict(success=True), JsonDict.loads(body))
     self.assertEqual(1, len(self.observable.calledMethods))
     self.assertEqual('addClosingHours',
                      self.observable.calledMethods[0].name)
     self.assertEqual(
         {
             'day': '1',
             'domainId': 'domain-id',
             'endHour': '14',
             'identifier': 'repo-id',
             'startHour': '10',
             'week': '*'
         }, self.observable.calledMethods[0].kwargs)
Beispiel #36
0
    def testMarkException(self):
        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z')
            state.markStarted()
            state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken",
                                "2012-08-13T12:14:00")
            self.assertEqual([1, 0, 1],
                             event_counts(state, 'started', 'errors',
                                          'harvested'))

        self.assertRepoStats(
            'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n'
        )
        self.assertEqual(
            {
                "from": "2012-08-13T12:14:00",
                "resumptionToken": "resumptionToken",
                'lastSuccessfulHarvest': '2012-08-13T12:15:00Z'
            }, JsonDict.load(join(self.statePath, 'repo.next')))
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:15:00",
                "status": "Ok",
                "message": ""
            }, JsonDict.load(join(self.statePath, 'repo.running')))

        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z')
            state.markStarted()
            try:
                raise ValueError("whatever")
            except:
                exType, exValue, exTraceback = exc_info()
                state.markException(exType, exValue, (9999, 9999, 9999, 9999))
            self.assertEqual([2, 1, 1],
                             event_counts(state, 'started', 'errors',
                                          'harvested'))
        self.assertRepoStats(
            """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken
Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Error: <class 'ValueError'>: whatever
""")
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:17:00",
                "status": "Error",
                "message": "whatever"
            }, JsonDict.load(join(self.statePath, 'repo.running')))
Beispiel #37
0
 def convert(cls, src, dst):
     #TODO make this work with abstract storage
     users = dict()
     with open(src) as i:
         for user, pwhash in (l.strip().split(':') for l in i if ':' in l.strip()):
             users[user]=dict(salt='', password=pwhash)
     JsonDict(users=users, version=cls.version).dump(dst)
     return cls(dst)
 def urlJsonDict(self, **kwargs):
     arguments = dict((k ,v) for k, v in kwargs.items() if v)
     result = JsonDict.load(
             self._urlopen("{}/get?{}".format(self._internalurl, urlencode(arguments)))
         )
     if 'error' in result:
         raise ValueError(result['error']['message'])
     return result
Beispiel #39
0
    def testMarkDeleted(self):
        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:15:00Z')
            state.markStarted()
            state.markHarvested((9999, 9999, 9999, 9999), "resumptionToken",
                                "2012-08-13T12:14:00")
            self.assertEqual([1, 0], event_counts(state, 'harvested',
                                                  'deleted'))

        self.assertRepoStats(
            'Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken\n'
        )
        self.assertEqual(
            {
                "from": "2012-08-13T12:14:00",
                "resumptionToken": "resumptionToken",
                'lastSuccessfulHarvest': '2012-08-13T12:15:00Z'
            }, JsonDict.load(join(self.statePath, 'repo.next')))

        self.assertEqual(
            {
                "changedate": "2012-08-13 12:15:00",
                "status": "Ok",
                "message": ""
            }, JsonDict.load(join(self.statePath, 'repo.running')))

        with self._State('repo') as state:
            state.getZTime = lambda: ZuluTime('2012-08-13T12:17:00Z')
            state.markDeleted()

        self.assertRepoStats(
            """Started: 2012-08-13 12:15:00, Harvested/Uploaded/Deleted/Total: 9999/9999/9999/9999, Done: 2012-08-13 12:15:00, ResumptionToken: resumptionToken
Started: 2012-08-13 12:17:00, Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids.
""")
        self.assertEqual(
            {
                "from": "",
                "resumptionToken": "",
                'lastSuccessfulHarvest': None
            }, JsonDict.load(join(self.statePath, 'repo.next')))
        self.assertEqual(
            {
                "changedate": "2012-08-13 12:15:00",
                "status": "Ok",
                "message": ""
            }, JsonDict.load(join(self.statePath, 'repo.running')))
Beispiel #40
0
    def testClear(self):
        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(BATCHSIZE, self.sizeDumpDir())

        header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False)
        data = JsonDict.loads(result)
        self.assertEquals(8, data['response']['GetStatus'][0]['total'])

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear')

        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(18, self.sizeDumpDir())
        for filename in sorted(listdir(self.dumpDir))[-8:]:
            self.assertTrue('_delete.updateRequest' in filename, filename)

        header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False)
        self.assertEqual(0, JsonDict.loads(result)['response']['GetStatus'][0]['total'])
 def testRemovingNotListedKeys(self):
     result = asString(self.dna.all.handleRequest(
         path='/service/v2/list',
         Method='GET',
         arguments={'keys':['-no']}
     ))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['api_version', 'config', 'domain', 'services', 'software_version'], sorted(dictBodyV2.keys()))
 def getRepositories(self, domainId, repositoryGroupId=None):
     try:
         repositoryIds = self.getRepositoryIds(domainId=domainId, repositoryGroupId=repositoryGroupId)
     except IOError:
         raise ValueError("idDoesNotExist")
     return JsonList([
             JsonDict.load(open(join(self._dataPath, '%s.%s.repository' % (domainId, repositoryId))))
             for repositoryId in repositoryIds
         ])
Beispiel #43
0
 def load(cls, filePath):
     state = cls(filePath=filePath)
     if isfile(filePath):
         d = JsonDict.load(filePath)
         state.datetime = d.get('datetime')
         state.harvestingReady = d.get('harvestingReady', False)
         state.error = d.get('error')
         state.resumptionAttributes = d.get('resumptionAttributes')
     return state
 def testKeysAll(self):
     result = asString(self.dna.all.handleRequest(
         path='/service/v2/list',
         Method='GET',
         arguments={'__all__':['True']}
     ))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['api_version', 'collections', 'config', 'domain', 'other', 'services', 'software_version'], sorted(dictBodyV2.keys()))
 def testShouldHaveGlobalConfigForVersion2(self):
     result = asString(self.dna.all.handleRequest(
         path='/service/v2/list',
         arguments={},
         Method='GET',
     ))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['host', 'port'], sorted(dictBodyV2['config'].keys()))
     self.assertEquals(['api_version', 'config', 'domain', 'services', 'software_version'], sorted(dictBodyV2.keys()))
 def testNonexistingKeys(self):
     result = asString(self.dna.all.handleRequest(
         path='/service/v2/list',
         Method='GET',
         arguments={'keys':['no']}
     ))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['api_version', 'config', 'domain', 'errors', 'services', 'software_version'], sorted(dictBodyV2.keys()))
     self.assertEquals(["Key 'no' not found."], dictBodyV2['errors'])
Beispiel #47
0
 def testGetStatusForDomainAndRepositoryId(self):
     self.controlHelper(action='allInvalid')
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse=False)
     data = JsonDict.loads(result)
     self.assertEquals("GetStatus", data['request']['verb'])
     self.assertEquals("adomain", data['request']['domainId'])
     self.assertEquals("integrationtest", data['request']['repositoryId'])
     self.assertEquals("IntegrationTest", data['response']['GetStatus'][0]['repositoryGroupId'])
     self.assertEquals(6, data['response']['GetStatus'][0]['invalid'])
 def testAllKeys(self):
     result = asString(self.dna.all.handleRequest(
         path='/service/v2/list',
         Method='GET',
         arguments={'keys':['collections,other']}
     ))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['api_version', 'collections', 'config', 'domain', 'other', 'services', 'software_version'], sorted(dictBodyV2.keys()))
     self.assertEquals({'collection': {'provenanceSource': 'collection_source', 'enabled': True, 'name': 'collection'}}, dictBodyV2['collections'])
     self.assertEquals(['other'], dictBodyV2['other'])
     self.assertEquals({'host': 'localhost', 'port': 8000}, dictBodyV2['config'])
     self.assertEquals({}, dictBodyV2['services'])
    def testGetSomethingIsAllowed(self):
        dataRetrieve = HarvesterDataRetrieve()
        observer = CallTrace('observer', returnValues=dict(getSomething='get something result', listSomething=['a', 'b']))
        dataRetrieve.addObserver(observer)

        result = asString(dataRetrieve.handleRequest(arguments=dict(verb=['GetSomething'], argument=['value'])))
        header, body = result.split(CRLF*2,1)
        self.assertEqual(okJson, header+CRLF*2)
        self.assertEquals({'request': {
            'verb': 'GetSomething', 'argument': 'value',
            }, 'response': {'GetSomething': 'get something result'}}, JsonDict.loads(body))
        self.assertEquals(['getSomething'], observer.calledMethodNames())
        self.assertEquals({'argument': 'value'}, observer.calledMethods[0].kwargs)
 def testServiceRegistryOldFormat(self):
     uuid1 = str(uuid4())
     uuid2 = str(uuid4())
     with open(join(self.tempdir, 'serviceregistry.json'), 'w') as f:
         d = JsonDict({
                 uuid1: {
                     "ipAddress": "5.153.228.85",
                     "readable": True,
                     "number": 1,
                     "data": {
                         "uptime": 366867,
                         "VERSION": "1.5.12.3"
                     },
                     "writable": True,
                     "lastseen": 1423494771.904539,
                     "type": "holding",
                     "infoport": 35609,
                 },
                 uuid2: {
                     "ipAddress": "5.153.228.85",
                     "readable": True,
                     "number": 1,
                     "data": {
                         "uptime": 366867,
                         "VERSION": "1.5.12.3"
                     },
                     "writable": True,
                     "lastseen": 1423494771.904539,
                     "type": "plein",
                     "infoport": 41609,
                 }
             })
         d.dump(f)
     registry = ServiceRegistry(
         stateDir=self.tempdir,
         domainname='zp.example.org',
         reactor=CallTrace(),
     )
     self.assertEquals(set([uuid1, uuid2]), set(registry.listServices(activeOnly=False).keys()))
Beispiel #51
0
def parseHeaderAndBody(h, b=None, parseBody=True):
    if b is None:
        h, b = h
    header, body = parseResponse(h + CRLF * 2 + b)
    if body and parseBody and 'Content-Type' in header['Headers']:
        contentType = header['Headers']['Content-Type']
        if 'xml' in contentType:
            return header, XML(body)
        if 'json' in contentType:
            try:
                return header, JsonDict.loads(body) if body[0] == '{' else JsonList.loads(body)
            except JSONDecodeError:
                return header, 'JSONDecodeError in: ' + body
    return header, body
 def _load(self):
     if not isfile(self._jsonFilepath):
         return {}
     data = open(self._jsonFilepath).read().strip()
     result = {}
     if '[' != data[0]:
         for identifier, serviceDict in JsonDict.loads(data).items():
             service = Service(domainname=self._domainname, timeout=self._timeout, identifier=identifier, ultimateTimeout=self._ultimateTimeout, **serviceDict)
             service.validate()
             result[service.identifier] = service
         return result
     for service in (Service(domainname=self._domainname, timeout=self._timeout, ultimateTimeout=self._ultimateTimeout, **item) for item in JsonList.loads(data)):
         service.validate()
         result[service.identifier] = service
     return result
    def _download(self, url, **kwargs):
        try:
            configuration = JsonDict.load(urlopen(url, **kwargs))
            self._cache.update(configuration)
        except (HTTPError, URLError, timeout), e:
            sys.stderr.write("""%s (%s).
Tried: %s
-----
""" % (e.__class__.__name__, str(e), url))
            configuration = self._cache.retrieve()
            if configuration is None:
                sys.stderr.write('%s: configuration cachefile "%s" not found!\n' % (self.__class__.__name__, self._cache.filepath))
                sys.stderr.flush()
                raise
            sys.stderr.write('%s: configuration cachefile "%s" found.\n' % (self.__class__.__name__, self._cache.filepath))
            sys.stderr.flush()
 def testShouldReturnOnlyRequestedKeysWithUpdate(self):
     hash = serviceUpdateHash(secret='guessme!', identifier='cc635329-c089-41a8-91be-2a4554851515', type='srv', ipAddress='127.0.0.1', infoport=1234)
     postBody = urlencode({
         'identifier': 'cc635329-c089-41a8-91be-2a4554851515',
         'type': 'srv',
         'ipAddress': '127.0.0.1',
         'infoport': '1234',
         'data': dumps({'VERSION': '2.718281828'}),
         'hash': hash,
     })
     result = ''.join(compose(self.dna.all.handleRequest(
         path='/service/v2/update',
         Method='POST',
         arguments={'keys':['collections']},
         Body=postBody,
     )))
     header, body = httpSplit(result)
     dictBodyV2 = JsonDict.loads(body)
     self.assertEquals(['api_version', 'collections', 'config', 'domain', 'services', 'software_version', 'this_service'], sorted(dictBodyV2.keys()))
Beispiel #55
0
 def _add(self, values, identifier, **kwargs):
     self._validate(self, identifier=identifier, **kwargs)
     olddata = values.get(identifier, {})
     data = dict()
     for key in self._register['keys']:
         data[key] = kwargs.get(key, [olddata.get(key, '')])[0]
     for key in self._register['listKeys']:
         data[key] = kwargs.get(key, olddata.get(key, []))
     for key in self._register['jsonKeys']:
         newdata = kwargs.get(key, [None])[0]
         if newdata is None and key in olddata:
             data[key] = olddata[key]
             continue
         data[key] = JsonDict.loads(newdata or '{}')
     for key in self._register['booleanKeys']:
         data[key] = olddata.get(key, False)
     for key in kwargs.get('__booleanKeys__', self._register['booleanKeys']):
         if not key:
             continue
         data[key] = key in kwargs
     values[identifier] = data
     self._save(values)
 def getMapping(self, identifier):
     try:
         return JsonDict.load(open(join(self._dataPath, '%s.mapping' % identifier)))
     except IOError:
         raise ValueError("idDoesNotExist")
 def getDomain(self, identifier):
     domainFile = join(self._dataPath, '{0}.domain'.format(identifier))
     try:
         return JsonDict.load(open(domainFile))
     except IOError:
         raise ValueError('idDoesNotExist')
 def getRepositoryGroupIds(self, domainId):
     return JsonDict.load(open(join(self._dataPath, '%s.domain' % domainId))).get('repositoryGroupIds',[])
 def getRepositoryGroup(self, identifier, domainId):
     return JsonDict.load(open(join(self._dataPath, '%s.%s.repositoryGroup' % (domainId, identifier))))