def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as f: JsonDict({ 'resumptionToken': None, 'from': "2015-01-01T00:12:13Z" }).dump(f) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)
def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as fp: JsonDict({ 'resumptionToken': None, 'from': "1999-12-01T16:37:41Z" }).dump(fp) with open(self.stateDir + '/tud.ids', 'w') as f: for i in range(113): f.write('oai:tudfakeid:%05i\n' % i) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('1999-12-01', self.listRecordsFrom) with open(self.stateDir + '/tud.stats') as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
class OaiRequestTest(SeecrTestCase): def setUp(self): super(OaiRequestTest, self).setUp() self.request = MockOaiRequest('mocktud') def testUserAgentDefault(self): args = {} def myOwnUrlOpen(*fArgs, **fKwargs): args['args'] = fArgs args['kwargs'] = fKwargs return StringIO(oaiResponseXML()) request = OaiRequest("http://harvest.me", _urlopen=myOwnUrlOpen) request.identify() self.assertEquals("Meresco Harvester trunk", args['args'][0].headers['User-agent']) def testContextSetToTLS12(self): from ssl import SSLError, PROTOCOL_TLSv1_2 calls = [] def loggingUrlOpen(*fArgs, **fKwargs): calls.append(fKwargs) raise SSLError("Some error") request = OaiRequest("http://harvest.me", _urlopen=loggingUrlOpen) try: request.identify() self.fail("Should have failed") except: pass self.assertEqual(2, len(calls)) self.assertEqual(None, calls[0]['context']) context=calls[1]['context'] self.assertEqual(PROTOCOL_TLSv1_2, context.protocol) def testMockOaiRequest(self): response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'}) self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testOtherOaiRequest(self): response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'}) self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testListRecordsError(self): try: self.request.listRecords(resumptionToken='BadResumptionToken') self.fail() except OAIError, e: self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage()) self.assertEquals(u'badResumptionToken', e.errorCode())
def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n' ) f.close() repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEqual('ga+hier+verder', self.listRecordsToken)
class OaiRequestTest(unittest.TestCase): def setUp(self): self.request = MockOaiRequest('mocktud') def testMockOaiRequest(self): response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'}) self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testOtherOaiRequest(self): response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'}) self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testListRecordsError(self): try: self.request.listRecords(resumptionToken='BadResumptionToken') self.fail() except OAIError, e: self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage()) self.assertEquals(u'badResumptionToken', e.errorCode())
def testOtherMetadataPrefix(self): self.logger = self.createLogger('tud') repository = self.MockRepository('tud', None) repository.metadataPrefix = 'lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver( repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEqual(['tud:oai:lorenet:147'], self.sendId)
def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( 'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n' ) f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n' ) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('aap', self.listRecordsFrom)
def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = self.createLogger(name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester
def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEquals('ga+hier+verder', self.listRecordsToken)
def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w')) repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)
def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('aap', self.listRecordsFrom)
def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w')) f = open(self.stateDir + '/tud.ids', 'w') for i in range(113): f.write('oai:tudfakeid:%05i\n'%i) f.close() repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1999-12-01', self.listRecordsFrom) lines = open(self.stateDir + '/tud.stats').readlines() self.assertEquals(2, len(lines)) self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
def setUp(self): self.request = MockOaiRequest('mocktud')
def testHarvestSet(self): self.mockRepository = MockOaiRequest('mocktud') harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self) harvester.harvest() self.assertEquals('withfulltext:yes', self.listRecordsSet)
def setUp(self): super(OaiRequestTest, self).setUp() self.request = MockOaiRequest('mocktud')
class OaiRequestTest(SeecrTestCase): def setUp(self): super(OaiRequestTest, self).setUp() self.request = MockOaiRequest('mocktud') def testUserAgentDefault(self): args = {} def myOwnUrlOpen(*fArgs, **fKwargs): args['args'] = fArgs args['kwargs'] = fKwargs return StringIO(oaiResponseXML()) request = OaiRequest("http://harvest.me", _urlopen=myOwnUrlOpen) request.identify() self.assertEqual("Meresco Harvester {}".format(VERSION), args['args'][0].headers['User-agent']) def testHeaders(self): self.assertEqual( {"User-Agent": "Meresco Harvester {}".format(VERSION)}, OaiRequest('http://example.com')._headers()) self.assertEqual({"User-Agent": "User Agent 3.0"}, OaiRequest('http://example.com', userAgent="User Agent 3.0")._headers()) self.assertEqual( {"User-Agent": "Meresco Harvester {}".format(VERSION)}, OaiRequest('http://example.com', userAgent='')._headers()) self.assertEqual( {"User-Agent": "Meresco Harvester {}".format(VERSION)}, OaiRequest('http://example.com', userAgent=' ')._headers()) self.assertEqual( { "User-Agent": "Meresco Harvester {}".format(VERSION), "Authorization": "Bearer GivenKey" }, OaiRequest('http://example.com', authorizationKey='GivenKey')._headers()) def testContextSetToTLS12(self): from ssl import SSLError, PROTOCOL_TLSv1_2 calls = [] def loggingUrlOpen(*fArgs, **fKwargs): calls.append(fKwargs) raise SSLError("Some error") request = OaiRequest("http://harvest.me", _urlopen=loggingUrlOpen) try: request.identify() self.fail("Should have failed") except: pass self.assertEqual(2, len(calls)) self.assertEqual(None, calls[0]['context']) context = calls[1]['context'] self.assertEqual(PROTOCOL_TLSv1_2, context.protocol) def testMockOaiRequest(self): response = self.request.request({ 'verb': 'ListRecords', 'metadataPrefix': 'oai_dc' }) self.assertEqual( '2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testOtherOaiRequest(self): response = self.request.request({ 'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345' }) self.assertEqual( '2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()')) def testListRecordsError(self): try: self.request.listRecords(resumptionToken='BadResumptionToken') self.fail() except OAIError as e: self.assertEqual( 'The value of the resumptionToken argument is invalid or expired.', e.errorMessage()) self.assertEqual('badResumptionToken', e.errorCode()) def testListRecords(self): response = self.request.listRecords(metadataPrefix='oai_dc') self.assertEqual("TestToken", response.resumptionToken) self.assertEqual("2004-12-29T13:19:27Z", response.responseDate) self.assertEqual(3, len(response.records)) self.assertEqual( 'oai:tudelft.nl:007087', xpathFirst(response.records[0], 'oai:header/oai:identifier/text()')) self.assertEqual(None, xpathFirst(response.records[0], 'oai:header/@status')) def mockRequest(self, args): self.mockRequest_args = args with open('mocktud/00001.xml') as fp: return parse(fp) def testListRecordArgs(self): self.request._request = self.mockRequest self.request.listRecords(metadataPrefix='kaas') kwargs = dict(self.mockRequest_args) self.assertEqual('kaas', kwargs['metadataPrefix']) self.assertTrue('resumptionToken' not in kwargs) self.assertEqual(('verb', 'ListRecords'), self.mockRequest_args[0]) self.request.listRecords(from_='from', until='until', set='set', metadataPrefix='prefix') kwargs = dict(self.mockRequest_args) self.assertEqual(('verb', 'ListRecords'), self.mockRequest_args[0]) self.assertEqual('from', kwargs['from']) self.assertEqual('until', kwargs['until']) self.assertEqual('set', kwargs['set']) self.assertEqual('prefix', kwargs['metadataPrefix']) def testGetRecord(self): response = self.request.getRecord(identifier='oai:rep:12345', metadataPrefix='oai_dc') self.assertEqual( 'oai:rep:12345', xpathFirst(response.record, 'oai:header/oai:identifier/text()')) def testListRecordsWithAnEmptyList(self): response = self.request.listRecords(resumptionToken='EmptyListToken') self.assertEqual(0, len(response.records)) self.assertEqual("", response.resumptionToken) self.assertEqual("2005-01-12T14:34:49Z", response.responseDate) def testBuildRequestUrl(self): oaiRequest = OaiRequest("http://x.y.z/oai") self.assertEqual( "http://x.y.z/oai?verb=ListRecords&metadataPrefix=oai_dc", oaiRequest._buildRequestUrl([('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc')])) oaiRequest = OaiRequest("http://x.y.z/oai?apikey=xyz123") self.assertEqual( "http://x.y.z/oai?apikey=xyz123&verb=ListRecords&metadataPrefix=oai_dc", oaiRequest._buildRequestUrl([('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc')])) def testShouldUseOwnClockTimeAsResponseDateIfNonePresent(self): originalZuluMethod = OaiResponse._zulu OaiResponse._zulu = staticmethod(lambda: '2020-12-12T12:12:12Z') try: response = oaiResponse(responseDate='') self.assertEqual('2020-12-12T12:12:12Z', response.responseDate) finally: OaiResponse._zulu = originalZuluMethod
class HarvesterTest(unittest.TestCase): def setUp(self): self.sendCalled = 0 self.sendException = None self.upload = None self.sendParts = [] self.sendId = [] self.listRecordsSet = None self.listRecordsToken = None self.startCalled = 0 self.stopCalled = 0 self.logDir = self.stateDir = mkdtemp() self.logger = None def tearDown(self): if not self.logger is None: self.logger.close() rmtree(self.logDir) def createLogger(self, name='tud'): self.logger = State(stateDir=self.stateDir, logDir=self.logDir, name=name).getHarvesterLog() return self.logger def createServer(self, url='http://repository.tudelft.nl/oai'): return OaiRequest(url) def testCreateHarvester(self): harvester = self.createHarvesterWithMockUploader('tud') self.assertEqual((0, 0), (self.startCalled, self.stopCalled)) harvester.harvest() self.assertEqual((1, 1), (self.startCalled, self.stopCalled)) harvester = self.createHarvesterWithMockUploader('eur') self.assertEqual((1, 1), (self.startCalled, self.stopCalled)) harvester.harvest() self.assertEqual((2, 2), (self.startCalled, self.stopCalled)) def testDoUpload(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assertEqual(3, self.sendCalled) self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2]) record = parse(StringIO(self.sendParts[2]['record'])) subjects = record.xpath( '/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces) self.assertEqual([ 'quantitative electron microscopy', 'statistical experimental design', 'parameter estimation' ], subjects) with open(os.path.join(self.stateDir, 'tud.stats')) as f: self.assertEqual('ResumptionToken: TestToken', f.read()[-27:-1]) def testLogIDsForRemoval(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() with open(self.stateDir + '/tud.ids') as idsfile: self.assertEqual('tud:oai:tudelft.nl:007087', idsfile.readline().strip()) self.assertEqual('tud:oai:tudelft.nl:007192', idsfile.readline().strip()) self.assertEqual('tud:oai:tudelft.nl:007193', idsfile.readline().strip()) def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = self.createLogger(name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester def testSimpleStat(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assertTrue(os.path.isfile(self.stateDir + '/tud.stats')) with open(self.stateDir + '/tud.stats') as fp: stats = fp.readline().strip().split(',') year = strftime('%Y') self.assertEqual('Started: %s-' % year, stats[0][:14]) self.assertEqual(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1]) self.assertEqual(' Done: %s-' % year, stats[2][:12]) def testErrorStat(self): harvester = self.createHarvesterWithMockUploader('tud') self.sendException = Exception('send failed') try: harvester.harvest() except: pass with open(self.stateDir + '/tud.stats') as fp: stats = fp.readline().strip().split(',') self.assertTrue(stats[2].startswith(' Error: '), stats[2]) self.assertTrue(stats[2].endswith('send failed'), stats[2]) def testResumptionTokenLog(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() with open(self.stateDir + '/tud.stats') as fp: stats = fp.readline().strip().split(',') self.assertEqual(' ResumptionToken: TestToken', stats[3]) def testOtherMetadataPrefix(self): self.logger = self.createLogger('tud') repository = self.MockRepository('tud', None) repository.metadataPrefix = 'lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver( repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEqual(['tud:oai:lorenet:147'], self.sendId) def testWriteAndSeek(self): with open('test', 'w') as f: f.write('enige info: ') pos = f.tell() f.write('20000') f.seek(pos) f.write('12345') with open("test", "r") as f: self.assertEqual('enige info: 12345', f.readline().strip()) os.remove('test') def testException(self): try: raise Exception('aap') self.fail() except: self.assertEqual('aap', str(sys.exc_info()[1])) self.assertEqual("<class 'Exception'>", str(sys.exc_info()[0])) def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as fp: JsonDict({ 'resumptionToken': None, 'from': "1999-12-01T16:37:41Z" }).dump(fp) with open(self.stateDir + '/tud.ids', 'w') as f: for i in range(113): f.write('oai:tudfakeid:%05i\n' % i) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('1999-12-01', self.listRecordsFrom) with open(self.stateDir + '/tud.stats') as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1])) def testNotIncrementalInCaseOfError(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( 'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n' ) f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n' ) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('1998-12-01', self.listRecordsFrom) def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( 'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n' ) f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n' ) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('aap', self.listRecordsFrom) def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n' ) f.close() repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEqual('ga+hier+verder', self.listRecordsToken) def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as f: JsonDict({ 'resumptionToken': None, 'from': "2015-01-01T00:12:13Z" }).dump(f) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom) def testHarvestSet(self): self.mockRepository = MockOaiRequest('mocktud') harvester = self.createHarvesterWithMockUploader( 'um', set='withfulltext:yes', mockRequest=self) harvester.harvest() self.assertEqual('withfulltext:yes', self.listRecordsSet) def mockHarvest(self, repository, logger, uploader): if not hasattr(self, 'mockHarvestArgs'): self.mockHarvestArgs = [] self.mockHarvestArgs.append({ 'name': repository.id, 'baseurl': repository.baseurl, 'set': repository.set, 'repositoryGroupId': repository.repositoryGroupId }) def testNoDateHarvester(self): "runs a test with xml containing no dates" harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token = 'NoDateToken' harvester.harvest() def testNothingInRepository(self): harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token = 'EmptyListToken' harvester.harvest() with open(self.stateDir + '/tud.stats') as fp: lines = fp.readlines() self.assertTrue( 'Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0]) def testUploadRecord(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid')) self.assertEqual(['tud:mockid'], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testSkippedRecord(self): harvester = self.createHarvesterWithMockUploader('tud') def createUpload(repository, oaiResponse): upload = Upload(repository=repository, oaiResponse=oaiResponse) upload.id = "tud:mockid" upload.skip = True return upload self.mapper.createUpload = createUpload harvester.upload(oaiResponse(identifier='mockid')) self.assertEqual([], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testDelete(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid', deleted=True)) self.assertEqual([], self.sendId) self.assertEqual('tud:mockid', self.delete_id) def testDcIdentifierTake2(self): self.sendFulltexturl = None harvester = self.createHarvesterWithMockUploader('tud') self.logger.token = 'DcIdentifierHttp2' harvester.harvest() with open(self.stateDir + '/tud.stats') as f: f.readlines() def testHarvesterStopsIgnoringAfter100records(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 101 observer.exceptions['send'] = InvalidDataException( upload.id, "message") repository = CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) self.assertRaises( TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid'))) self.assertEqual([ 'createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds" ], [m.name for m in observer.calledMethods]) def testHarvesterIgnoringInvalidDataErrors(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 0 observer.exceptions['send'] = InvalidDataException( upload.id, "message") repository = CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) harvester.upload(oaiResponse()) self.assertEqual([ 'createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning' ], [m.name for m in observer.calledMethods]) #self shunt: def send(self, upload): self.sendCalled += 1 self.sendId.append(upload.id) self.sendParts.append(upload.parts) self.upload = upload if self.sendException: raise self.sendException def delete(self, anUpload): self.delete_id = anUpload.id def uploaderInfo(self): return 'The uploader is connected to /dev/null' def start(self): self.startCalled += 1 def stop(self): self.stopCalled += 1 def listRecordsButWaitLong(self, a, b, c, d): sleep(20) def MockRepository(self, id, set): return _MockRepository(id, 'http://mock.server', set, 'inst' + id, self) def MockRepository2(self, nr): return _MockRepository('reponame' + nr, 'url' + nr, 'set' + nr, 'instname' + nr, self) def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False): return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous) def mockssetarget(self): return self def createUploader(self, logger): return self def listRecords(self, metadataPrefix=None, from_="aap", resumptionToken='mies', set=None): self.listRecordsFrom = from_ self.listRecordsToken = resumptionToken self.listRecordsSet = set if metadataPrefix: if set: return self.mockRepository.listRecords( metadataPrefix=metadataPrefix, set=set) return self.mockRepository.listRecords( metadataPrefix=metadataPrefix) return self.mockRepository.listRecords(resumptionToken=resumptionToken)
def testHarvestSet(self): self.mockRepository = MockOaiRequest('mocktud') harvester = self.createHarvesterWithMockUploader( 'um', set='withfulltext:yes', mockRequest=self) harvester.harvest() self.assertEqual('withfulltext:yes', self.listRecordsSet)
class HarvesterTest(unittest.TestCase): def setUp(self): self.sendCalled=0 self.sendException = None self.upload = None self.sendParts=[] self.sendId=[] self.listRecordsSet = None self.listRecordsToken = None self.startCalled=0 self.stopCalled=0 self.logDir = self.stateDir = mkdtemp() def tearDown(self): rmtree(self.logDir) def createLogger(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') return self.logger def createServer(self, url='http://repository.tudelft.nl/oai'): return OaiRequest(url) def testCreateHarvester(self): harvester = self.createHarvesterWithMockUploader('tud') self.assertEquals((0,0),(self.startCalled,self.stopCalled)) harvester.harvest() self.assertEquals((1,1),(self.startCalled,self.stopCalled)) harvester = self.createHarvesterWithMockUploader('eur') self.assertEquals((1,1),(self.startCalled,self.stopCalled)) harvester.harvest() self.assertEquals((2,2),(self.startCalled,self.stopCalled)) def testDoUpload(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assertEqual(3, self.sendCalled) self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2]) record = parse(StringIO(self.sendParts[2]['record'])) subjects = record.xpath('/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces) self.assertEqual(['quantitative electron microscopy', 'statistical experimental design', 'parameter estimation'], subjects) self.assertEquals('ResumptionToken: TestToken', file(os.path.join(self.stateDir, 'tud.stats')).read()[-27:-1]) def testLogIDsForRemoval(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() idsfile = open(self.stateDir+'/tud.ids') try: self.assertEquals('tud:oai:tudelft.nl:007087',idsfile.readline().strip()) self.assertEquals('tud:oai:tudelft.nl:007192',idsfile.readline().strip()) self.assertEquals('tud:oai:tudelft.nl:007193',idsfile.readline().strip()) finally: idsfile.close() def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester def testSimpleStat(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assert_(os.path.isfile(self.stateDir+'/tud.stats')) stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') year = strftime('%Y') self.assertEquals('Started: %s-'%year, stats[0][:14]) self.assertEquals(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1]) self.assertEquals(' Done: %s-'%year, stats[2][:12]) def testErrorStat(self): harvester = self.createHarvesterWithMockUploader('tud') self.sendException = Exception('send failed') try: harvester.harvest() except: pass stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') self.assertTrue(stats[2].startswith(' Error: '), stats[2]) self.assertTrue(stats[2].endswith('send failed'), stats[2]) def testResumptionTokenLog(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') self.assertEquals(' ResumptionToken: TestToken', stats[3]) def testOtherMetadataPrefix(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') repository = self.MockRepository('tud', None) repository.metadataPrefix='lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEquals(['tud:oai:lorenet:147'],self.sendId) def testWriteAndSeek(self): f = open('test','w') f.write('enige info: ') pos = f.tell() f.write('20000') f.seek(pos) f.write('12345') f.close() self.assertEquals('enige info: 12345', open('test','r').readline().strip()) os.remove('test') def testException(self): try: raise Exception('aap') self.fail() except: self.assertEquals('aap', str(sys.exc_value)) self.assertTrue('exceptions.Exception' in str(sys.exc_type), str(sys.exc_type)) def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w')) f = open(self.stateDir + '/tud.ids', 'w') for i in range(113): f.write('oai:tudfakeid:%05i\n'%i) f.close() repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1999-12-01', self.listRecordsFrom) lines = open(self.stateDir + '/tud.stats').readlines() self.assertEquals(2, len(lines)) self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1])) def testNotIncrementalInCaseOfError(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1998-12-01', self.listRecordsFrom) def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('aap', self.listRecordsFrom) def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEquals('ga+hier+verder', self.listRecordsToken) def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w')) repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom) def testHarvestSet(self): self.mockRepository = MockOaiRequest('mocktud') harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self) harvester.harvest() self.assertEquals('withfulltext:yes', self.listRecordsSet) def mockHarvest(self, repository, logger, uploader): if not hasattr(self, 'mockHarvestArgs'): self.mockHarvestArgs=[] self.mockHarvestArgs.append({'name':repository.id,'baseurl':repository.baseurl,'set':repository.set,'repositoryGroupId':repository.repositoryGroupId}) def testNoDateHarvester(self): "runs a test with xml containing no dates" harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token='NoDateToken' harvester.harvest() def testNothingInRepository(self): harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token='EmptyListToken' harvester.harvest() lines = open(self.stateDir+'/tud.stats').readlines() self.assert_('Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0]) def testUploadRecord(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid')) self.assertEquals(['tud:mockid'], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testSkippedRecord(self): harvester = self.createHarvesterWithMockUploader('tud') def createUpload(repository, oaiResponse): upload = Upload(repository=repository, oaiResponse=oaiResponse) upload.id = "tud:mockid" upload.skip = True return upload self.mapper.createUpload = createUpload harvester.upload(oaiResponse(identifier='mockid')) self.assertEquals([], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testDelete(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid', deleted=True)) self.assertEquals([], self.sendId) self.assertEquals('tud:mockid', self.delete_id) def testDcIdentifierTake2(self): self.sendFulltexturl=None harvester = self.createHarvesterWithMockUploader('tud') self.logger.token='DcIdentifierHttp2' harvester.harvest() open(self.stateDir+'/tud.stats').readlines() def testHarvesterStopsIgnoringAfter100records(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 101 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid'))) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods]) def testHarvesterIgnoringInvalidDataErrors(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 0 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) harvester.upload(oaiResponse()) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods]) #self shunt: def send(self, upload): self.sendCalled+=1 self.sendId.append(upload.id) self.sendParts.append(upload.parts) self.upload = upload if self.sendException: raise self.sendException def delete(self, anUpload): self.delete_id = anUpload.id def uploaderInfo(self): return 'The uploader is connected to /dev/null' def start(self): self.startCalled += 1 def stop(self): self.stopCalled += 1 def listRecordsButWaitLong(self, a, b, c, d): sleep(20) def MockRepository (self, id, set): return _MockRepository(id, 'http://mock.server', set, 'inst'+id,self) def MockRepository2 (self, nr): return _MockRepository('reponame'+nr, 'url'+nr, 'set'+nr, 'instname'+nr,self) def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False): return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous) def mockssetarget(self): return self def createUploader(self, logger): return self def listRecords(self, metadataPrefix = None, from_ = "aap", resumptionToken = 'mies', set = None): self.listRecordsFrom = from_ self.listRecordsToken = resumptionToken self.listRecordsSet = set if metadataPrefix: if set: return self.mockRepository.listRecords(metadataPrefix = metadataPrefix, set = set) return self.mockRepository.listRecords(metadataPrefix = metadataPrefix) return self.mockRepository.listRecords(resumptionToken = resumptionToken)