def testIncrementalHarvestWithFromAfterSomePeriod(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=Schedule(period=10)) oaiDownloadProcessor._time = lambda: 1.0 oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 6.0 self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 10.0 self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 11.1 request = oaiDownloadProcessor.buildRequest() self.assertTrue( request.startswith( 'GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc' ), request)
def testIncrementalHarvestScheduleNoneOverruledWithSetIncrementalHarvestSchedule( self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None) oaiDownloadProcessor._time = lambda: 10 consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule( period=3)) self.assertEqual(None, oaiDownloadProcessor.buildRequest()) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) oaiDownloadProcessor.scheduleNextRequest() self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
def testListRecordsRequestError(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) observer = CallTrace() oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest()) consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE)))) self.assertEqual(0, len(observer.calledMethods)) self.assertEqual("someError: Some error occurred.\n", oaiDownloadProcessor._err.getvalue()) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode( [('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
def testSetInRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=setName&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="set-_.!~*'()", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=set-_.%%21~%%2A%%27%%28%%29&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&resumptionToken=u%%7Cc1286437597991025%%7Cmprefix%%7Cs%%7Cf&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testRequestWithAdditionalHeaders(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) request = oaiDownloadProcessor.buildRequest( additionalHeaders={'Host': 'example.org'}) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nHost: example.org\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, request)
def testKeepResumptionTokenOnFailingAddCall(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) observer = CallTrace() observer.exceptions = {'add': Exception("Could be anything")} oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest()) self.assertRaises( Exception, lambda: list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN) ))))) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) errorOutput = oaiDownloadProcessor._err.getvalue() self.assertTrue(errorOutput.startswith('Traceback'), errorOutput) self.assertTrue( 'Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1' in errorOutput, errorOutput) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
def testBuildRequestNoneWhenNoResumptionToken(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual(None, oaiDownloadProcessor.buildRequest())
def testUpdateRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) oaiDownloadProcessor.setPath('/otherOai') oaiDownloadProcessor.setMetadataPrefix('otherPrefix') oaiDownloadProcessor.setSet('aSet') oaiDownloadProcessor.setFrom('2014') self.assertEqual( """GET /otherOai?verb=ListRecords&from=2014&metadataPrefix=otherPrefix&set=aSet&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testScheduleNextRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path='/p', metadataPrefix='p', workingDirectory=self.tempdir) oaiDownloadProcessor._time = lambda: 17 consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertTrue(oaiDownloadProcessor._earliestNextRequestTime > 17) oaiDownloadProcessor.scheduleNextRequest() self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest()) self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor.scheduleNextRequest(Schedule(period=0)) self.assertEqual(17, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest()) self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor.scheduleNextRequest(Schedule(period=120)) self.assertEqual(137, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(False, oaiDownloadProcessor._timeForNextRequest()) self.assertEqual(None, oaiDownloadProcessor.buildRequest())
def testUpdateRequestAfterSetResumptionToken(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="aSet", workingDirectory=self.tempdir, xWait=False) oaiDownloadProcessor.setPath('/otherOai') oaiDownloadProcessor.setFrom('2014') oaiDownloadProcessor.setResumptionToken('ReSumptionToken') self.assertEqual( """GET /otherOai?verb=ListRecords&resumptionToken=ReSumptionToken HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testPersistentIdentifier(self): identifierFilepath = join(self.tempdir, 'harvester.identifier') self.assertFalse(isfile(identifierFilepath)) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) currentIdentifier = oaiDownloadProcessor._identifier self.assertTrue(isfile(identifierFilepath)) with open(identifierFilepath) as f: self.assertEqual(currentIdentifier, f.read()) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest())
def testRestartAfterFinish(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), restartAfterFinish=True) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) request = oaiDownloadProcessor.buildRequest() self.assertTrue( request.startswith( 'GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: ' ), request)
def testUseResumptionToken(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken) self.assertEqual( 'GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)