Example #1
0
    def testIncrementalHarvestScheduleNoneOverruledWithSetIncrementalHarvestSchedule(self):
        oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None)
        oaiDownloadProcessor._time = lambda: 10
        consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEquals(None, oaiDownloadProcessor._resumptionToken)
        self.assertEquals('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEquals(None, oaiDownloadProcessor._earliestNextRequestTime)

        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule(period=3))
        self.assertEquals(None, oaiDownloadProcessor.buildRequest())
        self.assertEquals(None, oaiDownloadProcessor._earliestNextRequestTime)
        oaiDownloadProcessor.scheduleNextRequest()
        self.assertNotEquals(None, oaiDownloadProcessor.buildRequest())
        self.assertEquals(0, oaiDownloadProcessor._earliestNextRequestTime)
        consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEquals(13, oaiDownloadProcessor._earliestNextRequestTime)
Example #2
0
 def testBuildRequestNoneWhenNoResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEquals(None, oaiDownloadProcessor._resumptionToken)
     self.assertEquals(None, oaiDownloadProcessor.buildRequest())
Example #3
0
    def testIncrementalHarvestWithFromAfterSomePeriod(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=Schedule(period=10))
        oaiDownloadProcessor._time = lambda: 1.0
        oaiDownloadProcessor.addObserver(observer)
        consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEquals(None, oaiDownloadProcessor._resumptionToken)
        self.assertEquals('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        self.assertEquals(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 6.0
        self.assertEquals(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 10.0
        self.assertEquals(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 11.1
        request = oaiDownloadProcessor.buildRequest()
        self.assertTrue(request.startswith('GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc'), request)
Example #4
0
 def testRestartAfterFinish(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), restartAfterFinish=True)
     oaiDownloadProcessor.addObserver(observer)
     consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEquals(None, oaiDownloadProcessor._resumptionToken)
     request = oaiDownloadProcessor.buildRequest()
     self.assertTrue(request.startswith('GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: '), request)
Example #5
0
    def testScheduleNextRequest(self):
        oaiDownloadProcessor = OaiDownloadProcessor(path='/p', metadataPrefix='p', workingDirectory=self.tempdir)
        oaiDownloadProcessor._time = lambda: 17
        consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertTrue(oaiDownloadProcessor._earliestNextRequestTime > 17)

        oaiDownloadProcessor.scheduleNextRequest()
        self.assertEquals(0, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEquals(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=0))
        self.assertEquals(17, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEquals(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=120))
        self.assertEquals(137, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEquals(False, oaiDownloadProcessor._timeForNextRequest())
        self.assertEquals(None, oaiDownloadProcessor.buildRequest())
Example #6
0
 def testUpdateRequestAfterSetResumptionToken(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", set="aSet", workingDirectory=self.tempdir, xWait=False)
     oaiDownloadProcessor.setPath('/otherOai')
     oaiDownloadProcessor.setFrom('2014')
     oaiDownloadProcessor.setResumptionToken('ReSumptionToken')
     self.assertEquals("""GET /otherOai?verb=ListRecords&resumptionToken=ReSumptionToken HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
Example #7
0
 def testUpdateRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True)
     oaiDownloadProcessor.setPath('/otherOai')
     oaiDownloadProcessor.setMetadataPrefix('otherPrefix')
     oaiDownloadProcessor.setSet('aSet')
     oaiDownloadProcessor.setFrom('2014')
     self.assertEquals("""GET /otherOai?verb=ListRecords&from=2014&metadataPrefix=otherPrefix&set=aSet&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
Example #8
0
 def testPartitionRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, partition="1/2")
     self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-partition=1%%2F2&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
Example #9
0
 def testRequestWithAdditionalUserAgent(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, userAgentAddition="From a certain server")
     self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x (From a certain server)\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
Example #10
0
    def testPersistentIdentifier(self):
        identifierFilepath = join(self.tempdir, 'harvester.identifier')
        self.assertFalse(isfile(identifierFilepath))
        oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True)
        currentIdentifier = oaiDownloadProcessor._identifier
        self.assertTrue(isfile(identifierFilepath))
        self.assertEquals(currentIdentifier, open(identifierFilepath).read())
        self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True)
        self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest())
Example #11
0
 def testKeepResumptionTokenOnFailingAddCall(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     open(join(self.tempdir, 'harvester.state'), 'w').write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     observer.exceptions={'add': Exception("Could be anything")}
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEquals('GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
     self.assertRaises(Exception, lambda: list(compose(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))))
     self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
     errorOutput = oaiDownloadProcessor._err.getvalue()
     self.assertTrue(errorOutput.startswith('Traceback'), errorOutput)
     self.assertTrue('Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1' in errorOutput, errorOutput)
     self.assertEquals('GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
Example #12
0
 def testUseResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
     self.assertEquals('x?y&z', oaiDownloadProcessor._resumptionToken)
     self.assertEquals('GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO())
     self.assertEquals('x?y&z', oaiDownloadProcessor._resumptionToken)
Example #13
0
 def testListRecordsRequestError(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     open(join(self.tempdir, 'harvester.state'), 'w').write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEquals('GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
     consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE))))
     self.assertEquals(0, len(observer.calledMethods))
     self.assertEquals("someError: Some error occurred.\n", oaiDownloadProcessor._err.getvalue())
     self.assertEquals('GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
Example #14
0
 def testSetInRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True)
     self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=setName&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", set="set-_.!~*'()", workingDirectory=self.tempdir, xWait=True)
     self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=set-_.%%21%%7E%%2A%%27%%28%%29&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     open(join(self.tempdir, 'harvester.state'), 'w').write("Resumptiontoken: %s\n" % resumptionToken)
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True)
     self.assertEquals("""GET /oai?verb=ListRecords&resumptionToken=u%%7Cc1286437597991025%%7Cmprefix%%7Cs%%7Cf&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
Example #15
0
 def testRequestWithAdditionalHeaders(self):
     oaiDownloadProcessor = OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True)
     request = oaiDownloadProcessor.buildRequest(additionalHeaders={'Host': 'example.org'})
     self.assertEquals("""GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nHost: example.org\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, request)