def testHarvesterIgnoringInvalidDataErrors(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 0 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) harvester.upload(oaiResponse()) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods])
def testHarvesterStopsIgnoringAfter100records(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 101 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid'))) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods])
def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = self.createLogger(name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester
def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester
def testHarvesterStopsIgnoringAfter100records(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 101 observer.exceptions['send'] = InvalidDataException( upload.id, "message") repository = CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) self.assertRaises( TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid'))) self.assertEqual([ 'createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds" ], [m.name for m in observer.calledMethods])
def testHarvesterIgnoringInvalidDataErrors(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 0 observer.exceptions['send'] = InvalidDataException( upload.id, "message") repository = CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) harvester.upload(oaiResponse()) self.assertEqual([ 'createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning' ], [m.name for m in observer.calledMethods])
def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as f: JsonDict({ 'resumptionToken': None, 'from': "2015-01-01T00:12:13Z" }).dump(f) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)
def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n' ) f.close() repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEqual('ga+hier+verder', self.listRecordsToken)
def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( 'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n' ) f.write( 'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n' ) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('aap', self.listRecordsFrom)
def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') with open(self.stateDir + '/tud.stats', 'w') as f: f.write( ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n' ) with open(self.stateDir + '/tud.next', 'w') as fp: JsonDict({ 'resumptionToken': None, 'from': "1999-12-01T16:37:41Z" }).dump(fp) with open(self.stateDir + '/tud.ids', 'w') as f: for i in range(113): f.write('oai:tudfakeid:%05i\n' % i) repository = self.MockRepository3('tud', 'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEqual('1999-12-01', self.listRecordsFrom) with open(self.stateDir + '/tud.stats') as f: lines = f.readlines() self.assertEqual(2, len(lines)) self.assertEqual(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
def testOtherMetadataPrefix(self): self.logger = self.createLogger('tud') repository = self.MockRepository('tud', None) repository.metadataPrefix = 'lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver( repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEqual(['tud:oai:lorenet:147'], self.sendId)
def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w')) repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)
def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEquals('ga+hier+verder', self.listRecordsToken)
def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('aap', self.listRecordsFrom)
def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w')) f = open(self.stateDir + '/tud.ids', 'w') for i in range(113): f.write('oai:tudfakeid:%05i\n'%i) f.close() repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1999-12-01', self.listRecordsFrom) lines = open(self.stateDir + '/tud.stats').readlines() self.assertEquals(2, len(lines)) self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
def testOtherMetadataPrefix(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') repository = self.MockRepository('tud', None) repository.metadataPrefix='lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEquals(['tud:oai:lorenet:147'],self.sendId)