Beispiel #1
0
    def testIncrementalHarvestScheduleSetToNone(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            incrementalHarvestSchedule=Schedule(period=0),
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertNotEqual(None,
                            oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())

        observer.calledMethods.reset()
        oaiDownloadProcessor.setFrom(from_=None)
        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=None)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())
Beispiel #2
0
 def testHandleWithTwoRecords(self):
     observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])})
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     oaiDownloadProcessor.addObserver(observer)
     secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>'
     list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(
                     StringIO(LISTRECORDS_RESPONSE %
                              (secondRecord + RESUMPTION_TOKEN))))))
     self.assertEqual(['startOaiBatch', 'add', 'add', 'stopOaiBatch'],
                      [m.name for m in observer.calledMethods])
     addMethod0, addMethod1 = observer.calledMethods[1:3]
     self.assertEqual(0, len(addMethod0.args))
     self.assertEqualsWS(ONE_RECORD,
                         lxmltostring(addMethod0.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:34:00Z',
                      addMethod0.kwargs['datestamp'])
     self.assertEqual('oai:identifier:1', addMethod0.kwargs['identifier'])
     self.assertEqualsWS(secondRecord,
                         lxmltostring(addMethod1.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:41:00Z',
                      addMethod1.kwargs['datestamp'])
     self.assertEqual('oai:identifier:2', addMethod1.kwargs['identifier'])
Beispiel #3
0
 def testRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Beispiel #4
0
 def testSetInRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="setName",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=setName&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="set-_.!~*'()",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=set-_.%%21~%%2A%%27%%28%%29&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="setName",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&resumptionToken=u%%7Cc1286437597991025%%7Cmprefix%%7Cs%%7Cf&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Beispiel #5
0
 def testRequestWithAdditionalHeaders(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     request = oaiDownloadProcessor.buildRequest(
         additionalHeaders={'Host': 'example.org'})
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nHost: example.org\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier, request)
Beispiel #6
0
 def testRaiseErrorOnBadResponse(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     badRecord = '<record>No Header</record>'
     try:
         list(
             compose(
                 oaiDownloadProcessor.handle(
                     parse(StringIO(LISTRECORDS_RESPONSE % badRecord)))))
         self.fail()
     except IndexError:
         pass
Beispiel #7
0
    def testSignalHarvestingDone(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path='/p',
            metadataPrefix='p',
            workingDirectory=self.tempdir,
            incrementalHarvestSchedule=None)
        oaiDownloadProcessor.addObserver(observer)

        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())
Beispiel #8
0
 def testYieldSuspendFromAdd(self):
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.addObserver(observer)
     suspend = Suspend()
     observer.returnValues['add'] = (x for x in [suspend])
     yields = list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(StringIO(LISTRECORDS_RESPONSE % '')))))
     self.assertEqual([suspend, None], yields)
Beispiel #9
0
 def testIncrementalHarvestScheduleNone(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         incrementalHarvestSchedule=None)
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
     self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
Beispiel #10
0
 def testReadResumptionTokenWhenNoState(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
Beispiel #11
0
 def testIncrementalHarvestWithFromWithDefaultScheduleMidnight(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor._time = oaiDownloadProcessor._incrementalHarvestSchedule._time = lambda: 0o1 * 60 * 60
     oaiDownloadProcessor._incrementalHarvestSchedule._utcnow = lambda: datetime.strptime(
         "01:00", "%H:%M")
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual(24 * 60 * 60.0,
                      oaiDownloadProcessor._earliestNextRequestTime)
Beispiel #12
0
 def testListRecordsRequestError(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE))))
     self.assertEqual(0, len(observer.calledMethods))
     self.assertEqual("someError: Some error occurred.\n",
                      oaiDownloadProcessor._err.getvalue())
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode(
             [('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'),
              ('x-wait', 'True')]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
Beispiel #13
0
 def testUpdateRequestAfterSetResumptionToken(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="aSet",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.setPath('/otherOai')
     oaiDownloadProcessor.setFrom('2014')
     oaiDownloadProcessor.setResumptionToken('ReSumptionToken')
     self.assertEqual(
         """GET /otherOai?verb=ListRecords&resumptionToken=ReSumptionToken HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Beispiel #14
0
 def testReadInvalidState(self):
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("invalid")
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
Beispiel #15
0
 def startOaiHarvester(self, portNumber, observer):
     with Reactor() as reactor:
         server = be(
             (Observable(),
              (PeriodicDownload(reactor, 'localhost', portNumber),
               (XmlParseLxml(fromKwarg="data", toKwarg="lxmlNode"), (
                   OaiDownloadProcessor('/', 'prefix', self.tempdir),
                   (observer, ),
               )))))
         list(compose(server.once.observer_init()))
         self._loopReactor(reactor)
Beispiel #16
0
 def testSetIncrementalHarvestScheduleNotAllowedInCaseOfRestartAfterFinish(
         self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         restartAfterFinish=True)
     self.assertRaises(
         ValueError, lambda: oaiDownloadProcessor.
         setIncrementalHarvestSchedule(schedule=Schedule(period=3)))
Beispiel #17
0
 def testHandle(self):
     observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])})
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.addObserver(observer)
     list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(StringIO(LISTRECORDS_RESPONSE % '')))))
     self.assertEqual(
         ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
         [m.name for m in observer.calledMethods])
     addMethod = observer.calledMethods[1]
     self.assertEqual(0, len(addMethod.args))
     self.assertEqualsWS(ONE_RECORD,
                         lxmltostring(addMethod.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:34:00Z', addMethod.kwargs['datestamp'])
     self.assertEqual('oai:identifier:1', addMethod.kwargs['identifier'])
Beispiel #18
0
 def testReadResumptionTokenFromStateWithNewline(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual(resumptionToken,
                      oaiDownloadProcessor._resumptionToken)
Beispiel #19
0
    def testOaiListRequestOnCallstack(self):
        listRequests = []

        def addMethod(**kwargs):
            listRequests.append(local('__callstack_var_oaiListRequest__'))
            return
            yield

        observer = CallTrace(methods={'add': addMethod})
        top = be(
            (Observable(), (OaiDownloadProcessor(path="/oai",
                                                 metadataPrefix="oai_dc",
                                                 workingDirectory=self.tempdir,
                                                 xWait=False), (observer, ))))
        consume(top.all.handle(parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            [m.name for m in observer.calledMethods])
        self.assertEqual([{
            'set': None,
            'metadataPrefix': 'oai_dc'
        }], listRequests)

        listRequests = []
        observer.calledMethods.reset()
        top = be(
            (Observable(), (OaiDownloadProcessor(path="/oai",
                                                 metadataPrefix="other",
                                                 set='aSet',
                                                 workingDirectory=self.tempdir,
                                                 xWait=True), (observer, ))))
        consume(
            top.all.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        self.assertEqual([{
            'set': 'aSet',
            'metadataPrefix': 'other'
        }], listRequests)
Beispiel #20
0
    def testHarvesterStateWithError(self):
        resumptionToken = "u|c1286437597991025|mprefix|s|f"
        with open(join(self.tempdir, 'harvester.state'), 'w') as f:
            f.write("Resumptiontoken: %s\n" % resumptionToken)
        observer = CallTrace()
        observer.exceptions = {'add': Exception("Could be anything")}
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO(),
            name="Name")
        oaiDownloadProcessor.addObserver(observer)
        self.assertRaises(
            Exception, lambda: list(
                compose(
                    oaiDownloadProcessor.handle(
                        parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)
                              )))))
        state = oaiDownloadProcessor.getState()
        self.assertEqual(resumptionToken, state.resumptionToken)
        self.assertEqual(None, state.from_)
        self.assertEqual(
            "ERROR while processing 'oai:identifier:1': Could be anything",
            state.errorState)
        self.assertEqual("Name", state.name)

        oaiDownloadProcessor2 = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO())
        state2 = oaiDownloadProcessor2.getState()
        self.assertEqual(resumptionToken, state2.resumptionToken)
        self.assertEqual(
            "ERROR while processing 'oai:identifier:1': Could be anything",
            state2.errorState)
Beispiel #21
0
 def testBuildRequestNoneWhenNoResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual(None, oaiDownloadProcessor.buildRequest())
Beispiel #22
0
    def testShutdownPersistsStateOnAutocommit(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            autoCommit=False)
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        state = oaiDownloadProcessor.getState()
        self.assertFalse(isfile(join(self.tempdir, 'harvester.state')))

        oaiDownloadProcessor.handleShutdown()
        self.assertEqual(
            {
                "errorState": None,
                'from': '2002-06-01T19:20:30Z',
                "resumptionToken": state.resumptionToken
            }, JsonDict.load(join(self.tempdir, 'harvester.state')))
Beispiel #23
0
 def testSetIncrementalHarvestSchedule(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         incrementalHarvestSchedule=None)
     oaiDownloadProcessor._time = lambda: 10
     oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule(
         period=3))
     self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
Beispiel #24
0
 def testUseResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
     self.assertEqual(
         'GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
Beispiel #25
0
    def testResponseDateAsFrom(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
Beispiel #26
0
 def testKeepResumptionTokenOnFailingAddCall(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     observer.exceptions = {'add': Exception("Could be anything")}
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     self.assertRaises(
         Exception, lambda: list(
             compose(
                 oaiDownloadProcessor.handle(
                     parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)
                           )))))
     self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                      [m.name for m in observer.calledMethods])
     errorOutput = oaiDownloadProcessor._err.getvalue()
     self.assertTrue(errorOutput.startswith('Traceback'), errorOutput)
     self.assertTrue(
         'Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1'
         in errorOutput, errorOutput)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
Beispiel #27
0
 def testRestartAfterFinish(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         restartAfterFinish=True)
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     request = oaiDownloadProcessor.buildRequest()
     self.assertTrue(
         request.startswith(
             'GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: '
         ), request)
Beispiel #28
0
 def testIncrementalHarvestReScheduleIfNoRecordsMatch(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         incrementalHarvestSchedule=Schedule(period=0),
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(NO_RECORDS_MATCH_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._errorState)
     self.assertEqual('2012-06-01T19:20:30Z', oaiDownloadProcessor._from)
Beispiel #29
0
    def testPersistentIdentifier(self):
        identifierFilepath = join(self.tempdir, 'harvester.identifier')
        self.assertFalse(isfile(identifierFilepath))
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True)
        currentIdentifier = oaiDownloadProcessor._identifier
        self.assertTrue(isfile(identifierFilepath))
        with open(identifierFilepath) as f:
            self.assertEqual(currentIdentifier, f.read())
        self.assertEqual(
            """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
            % currentIdentifier, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True)
        self.assertEqual(
            """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
            % currentIdentifier, oaiDownloadProcessor.buildRequest())
Beispiel #30
0
    def testHandleYieldsAtLeastOnceAfterEachRecord(self):
        def add(**kwargs):
            return
            yield

        observer = CallTrace(methods={'add': add})
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False)
        oaiDownloadProcessor.addObserver(observer)
        yields = list(
            compose(
                oaiDownloadProcessor.handle(
                    parse(StringIO(LISTRECORDS_RESPONSE % '')))))
        self.assertEqual(1, len(yields))

        secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>'
        yields = list(
            compose(
                oaiDownloadProcessor.handle(
                    parse(StringIO(LISTRECORDS_RESPONSE % secondRecord)))))
        self.assertEqual(2, len(yields))