コード例 #1
0
    def testIncrementalHarvestWithFromAfterSomePeriod(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO(),
            incrementalHarvestSchedule=Schedule(period=10))
        oaiDownloadProcessor._time = lambda: 1.0
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 6.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 10.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 11.1
        request = oaiDownloadProcessor.buildRequest()
        self.assertTrue(
            request.startswith(
                'GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc'
            ), request)
コード例 #2
0
    def testIncrementalHarvestScheduleSetToNone(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            incrementalHarvestSchedule=Schedule(period=0),
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertNotEqual(None,
                            oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())

        observer.calledMethods.reset()
        oaiDownloadProcessor.setFrom(from_=None)
        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=None)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())
コード例 #3
0
 def testListRecordsRequestError(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE))))
     self.assertEqual(0, len(observer.calledMethods))
     self.assertEqual("someError: Some error occurred.\n",
                      oaiDownloadProcessor._err.getvalue())
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode(
             [('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'),
              ('x-wait', 'True')]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
コード例 #4
0
 def testHandleWithTwoRecords(self):
     observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])})
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     oaiDownloadProcessor.addObserver(observer)
     secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>'
     list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(
                     StringIO(LISTRECORDS_RESPONSE %
                              (secondRecord + RESUMPTION_TOKEN))))))
     self.assertEqual(['startOaiBatch', 'add', 'add', 'stopOaiBatch'],
                      [m.name for m in observer.calledMethods])
     addMethod0, addMethod1 = observer.calledMethods[1:3]
     self.assertEqual(0, len(addMethod0.args))
     self.assertEqualsWS(ONE_RECORD,
                         lxmltostring(addMethod0.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:34:00Z',
                      addMethod0.kwargs['datestamp'])
     self.assertEqual('oai:identifier:1', addMethod0.kwargs['identifier'])
     self.assertEqualsWS(secondRecord,
                         lxmltostring(addMethod1.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:41:00Z',
                      addMethod1.kwargs['datestamp'])
     self.assertEqual('oai:identifier:2', addMethod1.kwargs['identifier'])
コード例 #5
0
    def testHarvesterState(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        state = oaiDownloadProcessor.getState()
        self.assertEqual(None, state.resumptionToken)
        self.assertEqual(None, state.from_)
        self.assertEqual(None, state.errorState)
        self.assertEqual(None, state.name)
        self.assertEqual("/oai", state.path)
        self.assertEqual("oai_dc", state.metadataPrefix)
        self.assertEqual(None, state.set)
        self.assertEqual(0, state.nextRequestTime)
        oaiDownloadProcessor.setSet('s')
        oaiDownloadProcessor.setPath('/p')
        oaiDownloadProcessor.setMetadataPrefix('pref')
        oaiDownloadProcessor.observable_setName('aName')
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        state = oaiDownloadProcessor.getState()
        self.assertEqual("x?y&z", state.resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', state.from_)
        self.assertEqual(None, state.errorState)
        self.assertEqual('aName', state.name)
        self.assertEqual("/p", state.path)
        self.assertEqual("pref", state.metadataPrefix)
        self.assertEqual('s', state.set)
        self.assertEqual(0, state.nextRequestTime)

        # Change state of oaiDownloadProcessor -> changes stateView.
        oaiDownloadProcessor.setSet('x')
        self.assertEqual('x', state.set)

        oaiDownloadProcessor2 = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO())
        state2 = oaiDownloadProcessor2.getState()
        self.assertEqual(None, state2.name)
        self.assertEqual("oai_dc", state2.metadataPrefix)
        self.assertEqual("x?y&z", state2.resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', state2.from_)
        self.assertEqual(None, state2.errorState)
        self.assertEqual(0, state.nextRequestTime)
コード例 #6
0
 def testBuildRequestNoneWhenNoResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual(None, oaiDownloadProcessor.buildRequest())
コード例 #7
0
 def testYieldSuspendFromAdd(self):
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.addObserver(observer)
     suspend = Suspend()
     observer.returnValues['add'] = (x for x in [suspend])
     yields = list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(StringIO(LISTRECORDS_RESPONSE % '')))))
     self.assertEqual([suspend, None], yields)
コード例 #8
0
    def testSignalHarvestingDone(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path='/p',
            metadataPrefix='p',
            workingDirectory=self.tempdir,
            incrementalHarvestSchedule=None)
        oaiDownloadProcessor.addObserver(observer)

        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())
コード例 #9
0
 def testIncrementalHarvestScheduleNone(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         incrementalHarvestSchedule=None)
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
     self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
コード例 #10
0
 def testIncrementalHarvestWithFromWithDefaultScheduleMidnight(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor._time = oaiDownloadProcessor._incrementalHarvestSchedule._time = lambda: 0o1 * 60 * 60
     oaiDownloadProcessor._incrementalHarvestSchedule._utcnow = lambda: datetime.strptime(
         "01:00", "%H:%M")
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual(24 * 60 * 60.0,
                      oaiDownloadProcessor._earliestNextRequestTime)
コード例 #11
0
 def testRestartAfterFinish(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         restartAfterFinish=True)
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     request = oaiDownloadProcessor.buildRequest()
     self.assertTrue(
         request.startswith(
             'GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: '
         ), request)
コード例 #12
0
 def testIncrementalHarvestReScheduleIfNoRecordsMatch(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         incrementalHarvestSchedule=Schedule(period=0),
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(NO_RECORDS_MATCH_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._errorState)
     self.assertEqual('2012-06-01T19:20:30Z', oaiDownloadProcessor._from)
コード例 #13
0
 def testHandle(self):
     observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])})
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.addObserver(observer)
     list(
         compose(
             oaiDownloadProcessor.handle(
                 parse(StringIO(LISTRECORDS_RESPONSE % '')))))
     self.assertEqual(
         ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
         [m.name for m in observer.calledMethods])
     addMethod = observer.calledMethods[1]
     self.assertEqual(0, len(addMethod.args))
     self.assertEqualsWS(ONE_RECORD,
                         lxmltostring(addMethod.kwargs['lxmlNode']))
     self.assertEqual('2011-08-22T07:34:00Z', addMethod.kwargs['datestamp'])
     self.assertEqual('oai:identifier:1', addMethod.kwargs['identifier'])
コード例 #14
0
    def testShutdownPersistsStateOnAutocommit(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            autoCommit=False)
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        state = oaiDownloadProcessor.getState()
        self.assertFalse(isfile(join(self.tempdir, 'harvester.state')))

        oaiDownloadProcessor.handleShutdown()
        self.assertEqual(
            {
                "errorState": None,
                'from': '2002-06-01T19:20:30Z',
                "resumptionToken": state.resumptionToken
            }, JsonDict.load(join(self.tempdir, 'harvester.state')))
コード例 #15
0
    def testResponseDateAsFrom(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
コード例 #16
0
 def testKeepResumptionTokenOnFailingAddCall(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     observer.exceptions = {'add': Exception("Could be anything")}
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     self.assertRaises(
         Exception, lambda: list(
             compose(
                 oaiDownloadProcessor.handle(
                     parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)
                           )))))
     self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                      [m.name for m in observer.calledMethods])
     errorOutput = oaiDownloadProcessor._err.getvalue()
     self.assertTrue(errorOutput.startswith('Traceback'), errorOutput)
     self.assertTrue(
         'Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1'
         in errorOutput, errorOutput)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
コード例 #17
0
    def testHarvesterStateWithError(self):
        resumptionToken = "u|c1286437597991025|mprefix|s|f"
        with open(join(self.tempdir, 'harvester.state'), 'w') as f:
            f.write("Resumptiontoken: %s\n" % resumptionToken)
        observer = CallTrace()
        observer.exceptions = {'add': Exception("Could be anything")}
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO(),
            name="Name")
        oaiDownloadProcessor.addObserver(observer)
        self.assertRaises(
            Exception, lambda: list(
                compose(
                    oaiDownloadProcessor.handle(
                        parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)
                              )))))
        state = oaiDownloadProcessor.getState()
        self.assertEqual(resumptionToken, state.resumptionToken)
        self.assertEqual(None, state.from_)
        self.assertEqual(
            "ERROR while processing 'oai:identifier:1': Could be anything",
            state.errorState)
        self.assertEqual("Name", state.name)

        oaiDownloadProcessor2 = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True,
            err=StringIO())
        state2 = oaiDownloadProcessor2.getState()
        self.assertEqual(resumptionToken, state2.resumptionToken)
        self.assertEqual(
            "ERROR while processing 'oai:identifier:1': Could be anything",
            state2.errorState)
コード例 #18
0
 def testUseResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
     self.assertEqual(
         'GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
コード例 #19
0
    def testHandleYieldsAtLeastOnceAfterEachRecord(self):
        def add(**kwargs):
            return
            yield

        observer = CallTrace(methods={'add': add})
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False)
        oaiDownloadProcessor.addObserver(observer)
        yields = list(
            compose(
                oaiDownloadProcessor.handle(
                    parse(StringIO(LISTRECORDS_RESPONSE % '')))))
        self.assertEqual(1, len(yields))

        secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>'
        yields = list(
            compose(
                oaiDownloadProcessor.handle(
                    parse(StringIO(LISTRECORDS_RESPONSE % secondRecord)))))
        self.assertEqual(2, len(yields))