def testHasWorkBeforeAndAfterDoingWork(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') self.assertTrue(logger.hasWork()) logger.startRepository() logger.endRepository(None, strftime("%Y-%m-%dT%H:%M:%SZ", logger._state._gmtime())) logger.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') self.assertFalse(logger.hasWork())
def testLoggingAlwaysStartsNewline(self): "Tests an old situation that when a log was interrupted, it continued on the same line" f = open(self.stateDir+'/name.stats','w') f.write('Started: 2005-01-02 16:12:56, Harvested/Uploaded/Total: 199/200/1650, Don"crack"') f.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') logger.startRepository() logger.close() lines = open(self.stateDir+'/name.stats').readlines() self.assertEqual(2,len(lines))
def testLogWithoutDoubleIDs(self): f = open(self.stateDir+'/name.ids','w') f.writelines(['id:1\n','id:2\n','id:1\n']) f.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') logger.startRepository() self.assertEquals(2,logger.totalIds()) logger.uploadIdentifier('id:3') self.assertEquals(3,logger.totalIds()) logger.uploadIdentifier('id:3') logger.uploadIdentifier('id:2') self.assertEquals(3,logger.totalIds())
def testOtherMetadataPrefix(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') repository = self.MockRepository('tud', None) repository.metadataPrefix='lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEquals(['tud:oai:lorenet:147'],self.sendId)
def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester
def testMarkDeleted(self): f = open(self.stateDir+'/name.stats','w') f.write('Started: 2005-01-02 16:12:56, Harvested/Uploaded/Total: 199/200/1650, Done: 2005-04-22 11:48:30, ResumptionToken: resumption') f.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') self.assertEquals('resumption', logger._state.token) logger.markDeleted() logger.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') self.assertEquals(None, logger._state.token) self.assertEquals(None, logger._state.from_) self.assertEquals(0, logger.totalIds())
def testHasWork(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='someuni') self.assertEquals(None, logger._state.from_) self.assertEquals(None, logger._state.token) self.assertTrue(logger.hasWork()) logger._state.from_=strftime('%Y-%m-%d', gmtime()) self.assertTrue(not logger.hasWork()) logger._state.token='SomeToken' self.assertTrue(logger.hasWork()) logger._state.from_='2005-01-02' self.assertTrue(logger.hasWork()) logger._state.token=None self.assertTrue(logger.hasWork())
def testSameDate(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='someuni') date=logger._state.getTime()[:10] self.assertTrue(logger.isCurrentDay(date)) self.assertFalse(logger.isCurrentDay('2005-01-02'))
def testClearInvalidData(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') logger.startRepository() logger.notifyHarvestedRecord('repoid:oai:bla/bla') logger.logInvalidData('repoid:oai:bla/bla', "Error") self.assertTrue(isfile(self.logDir + '/invalid/repoid/oai:bla%2Fbla')) logger.notifyHarvestedRecord('repoid:recordid') logger.logInvalidData('repoid:recordid', "Error") self.assertTrue(isfile(self.logDir + '/invalid/repoid/recordid')) logger.notifyHarvestedRecord('repo2:1') logger.logInvalidData('repo2:1', "Error") self.assertTrue(isfile(self.logDir + '/invalid/repo2/1')) self.assertEquals(['repoid:oai:bla/bla', 'repoid:recordid', 'repo2:1'], logger.invalidIds()) logger.clearInvalidData('repoid') self.assertEquals(['repo2:1'], logger.invalidIds()) self.assertFalse(isfile(self.logDir + '/invalid/repoid/oai:bla%2Fbla')) self.assertFalse(isfile(self.logDir + '/invalid/repoid/recordid')) self.assertTrue(isfile(self.logDir + '/invalid/repo2/1'))
def testRefreshWithIgnoredRecords(self): log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY) log.startRepository() for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]: if uploadId == '%s:oai:record:02' % (REPOSITORY): uploadId = '%s:oai:record:02/&gkn' % (REPOSITORY) log.notifyHarvestedRecord(uploadId) log.uploadIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]: log.notifyHarvestedRecord(uploadId) log.deleteIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]: log.notifyHarvestedRecord(uploadId) log.logInvalidData(uploadId, 'ignored message') log.logIgnoredIdentifierWarning(uploadId) log.endRepository('token', '2012-01-01T09:00:00Z') log.close() totalRecords = 15 oldUploads = 2 oldDeletes = 3 oldIgnoreds = 4 self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh') self.startHarvester(repository=REPOSITORY) # Smoot init self.assertEquals(0, self.sizeDumpDir()) self.startHarvester(repository=REPOSITORY) # Smooth harvest self.startHarvester(repository=REPOSITORY) # Smooth harvest self.assertEquals(totalRecords, self.sizeDumpDir()) self.startHarvester(repository=REPOSITORY) # Smooth finish self.assertEquals(totalRecords + oldUploads + oldIgnoreds, self.sizeDumpDir()) invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines() self.assertEquals(0, len(invalidIds), invalidIds) ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines() self.assertEquals(13, len(ids), ids)
def testLogIgnoredIdentifierWarning(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') logger.startRepository() logger.notifyHarvestedRecord('repoid:oai:bla/bla') logger.logInvalidData('repoid:oai:bla/bla', 'bla/bla') self.assertEquals('', open(self.logDir + '/name.events').read()) logger.logIgnoredIdentifierWarning('repoid:oai:bla/bla') self.assertTrue(open(self.logDir + '/name.events').read().endswith("\tWARNING\t[repoid:oai:bla/bla]\tIGNORED\n")) self.assertEquals(1, logger.totalInvalidIds()) logger.notifyHarvestedRecord('repoid:oai:bla/bla') self.assertEquals(0, logger.totalInvalidIds()) logger.uploadIdentifier('repoid:oai:bla/bla') self.assertEquals(1, logger.totalIds())
def testLogLineError(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') logger.startRepository() try: logger.notifyHarvestedRecord("name:uploadId1") logger.uploadIdentifier("name:uploadId1") logger.notifyHarvestedRecord("name:uploadId2") raise Exception('FATAL') except: exType, exValue, exTb = exc_info() logger.endWithException(exType, exValue, exTb) logger.close() lines = open(self.stateDir+'/name.stats').readlines() eventline = open(self.logDir+'/name.events').readlines()[0].strip() #Total is now counted based upon the id's self.assertTrue('2/1/0/1, Error: ' in lines[0], lines[0]) date,event,id,comments = LOGLINE_RE.match(eventline).groups() self.assertEquals('ERROR', event.strip()) self.assertEquals('name', id) self.assertTrue(comments.startswith('Traceback (most recent call last):|File "')) self.assertTrue('harvesterlogtest.py", line ' in comments) self.assertTrue(comments.endswith(', in testLogLineError raise Exception(\'FATAL\')|Exception: FATAL'))
def testLogLine(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') logger.startRepository() logger.notifyHarvestedRecord("name:uploadId1") logger.uploadIdentifier("name:uploadId1") logger.notifyHarvestedRecord("name:uploadId1") logger.deleteIdentifier("name:uploadId1") logger.notifyHarvestedRecord("name:uploadId2") logger.logInvalidData("name:uploadId2", "Test Exception") logger.logIgnoredIdentifierWarning("name:uploadId2") logger.endRepository(None, '2012-01-01T09:00:00Z') logger.close() lines = open(self.stateDir + '/name.stats').readlines() eventline = open(self.logDir + '/name.events').readlines()[1].strip() invalidUploadId2 = open(self.logDir + '/invalid/name/uploadId2').read() #Total is now counted based upon the id's self.assertTrue('3/1/1/0, Done:' in lines[0], lines[0]) date, event, id, comments = LOGLINE_RE.match(eventline).groups() self.assertEquals('SUCCES', event.strip()) self.assertEquals('name', id) self.assertEquals('Harvested/Uploaded/Deleted/Total: 3/1/1/0, ResumptionToken: None', comments) self.assertEquals('Test Exception', invalidUploadId2)
def createLogger(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') return self.logger
class HarvesterTest(unittest.TestCase): def setUp(self): self.sendCalled=0 self.sendException = None self.upload = None self.sendParts=[] self.sendId=[] self.listRecordsSet = None self.listRecordsToken = None self.startCalled=0 self.stopCalled=0 self.logDir = self.stateDir = mkdtemp() def tearDown(self): rmtree(self.logDir) def createLogger(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') return self.logger def createServer(self, url='http://repository.tudelft.nl/oai'): return OaiRequest(url) def testCreateHarvester(self): harvester = self.createHarvesterWithMockUploader('tud') self.assertEquals((0,0),(self.startCalled,self.stopCalled)) harvester.harvest() self.assertEquals((1,1),(self.startCalled,self.stopCalled)) harvester = self.createHarvesterWithMockUploader('eur') self.assertEquals((1,1),(self.startCalled,self.stopCalled)) harvester.harvest() self.assertEquals((2,2),(self.startCalled,self.stopCalled)) def testDoUpload(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assertEqual(3, self.sendCalled) self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2]) record = parse(StringIO(self.sendParts[2]['record'])) subjects = record.xpath('/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces) self.assertEqual(['quantitative electron microscopy', 'statistical experimental design', 'parameter estimation'], subjects) self.assertEquals('ResumptionToken: TestToken', file(os.path.join(self.stateDir, 'tud.stats')).read()[-27:-1]) def testLogIDsForRemoval(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() idsfile = open(self.stateDir+'/tud.ids') try: self.assertEquals('tud:oai:tudelft.nl:007087',idsfile.readline().strip()) self.assertEquals('tud:oai:tudelft.nl:007192',idsfile.readline().strip()) self.assertEquals('tud:oai:tudelft.nl:007193',idsfile.readline().strip()) finally: idsfile.close() def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None): self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name) repository = self.MockRepository(name, set) uploader = repository.createUploader(self.logger.eventLogger()) self.mapper = repository.mapping() harvester = Harvester(repository) harvester.addObserver(mockRequest or MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(uploader) harvester.addObserver(self.mapper) return harvester def testSimpleStat(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() self.assert_(os.path.isfile(self.stateDir+'/tud.stats')) stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') year = strftime('%Y') self.assertEquals('Started: %s-'%year, stats[0][:14]) self.assertEquals(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1]) self.assertEquals(' Done: %s-'%year, stats[2][:12]) def testErrorStat(self): harvester = self.createHarvesterWithMockUploader('tud') self.sendException = Exception('send failed') try: harvester.harvest() except: pass stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') self.assertTrue(stats[2].startswith(' Error: '), stats[2]) self.assertTrue(stats[2].endswith('send failed'), stats[2]) def testResumptionTokenLog(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.harvest() stats = open(self.stateDir + '/tud.stats').readline().strip().split(',') self.assertEquals(' ResumptionToken: TestToken', stats[3]) def testOtherMetadataPrefix(self): self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud') repository = self.MockRepository('tud', None) repository.metadataPrefix='lom' harvester = Harvester(repository) harvester.addObserver(MockOaiRequest('mocktud')) harvester.addObserver(self.logger) harvester.addObserver(repository.createUploader(self.logger.eventLogger)) harvester.addObserver(repository.mapping()) harvester.harvest() self.assertEquals(['tud:oai:lorenet:147'],self.sendId) def testWriteAndSeek(self): f = open('test','w') f.write('enige info: ') pos = f.tell() f.write('20000') f.seek(pos) f.write('12345') f.close() self.assertEquals('enige info: 12345', open('test','r').readline().strip()) os.remove('test') def testException(self): try: raise Exception('aap') self.fail() except: self.assertEquals('aap', str(sys.exc_value)) self.assertTrue('exceptions.Exception' in str(sys.exc_type), str(sys.exc_type)) def testIncrementalHarvest(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w')) f = open(self.stateDir + '/tud.ids', 'w') for i in range(113): f.write('oai:tudfakeid:%05i\n'%i) f.close() repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1999-12-01', self.listRecordsFrom) lines = open(self.stateDir + '/tud.stats').readlines() self.assertEquals(2, len(lines)) self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1])) def testNotIncrementalInCaseOfError(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('1998-12-01', self.listRecordsFrom) def testOnlyErrorInLogFile(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('aap', self.listRecordsFrom) def testResumptionToken(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n') f.close(); repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud') logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsToken = None h.harvest() self.assertEquals('ga+hier+verder', self.listRecordsToken) def testContinuousHarvesting(self): self.mockRepository = MockOaiRequest('mocktud') f = open(self.stateDir + '/tud.stats', 'w') f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n') f.close() JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w')) repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True) logger = self.createLogger() h = Harvester(repository) h.addObserver(self) h.addObserver(logger) h.addObserver(repository.createUploader(logger.eventLogger)) h.addObserver(repository.mapping()) self.listRecordsFrom = None h.harvest() self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom) def testHarvestSet(self): self.mockRepository = MockOaiRequest('mocktud') harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self) harvester.harvest() self.assertEquals('withfulltext:yes', self.listRecordsSet) def mockHarvest(self, repository, logger, uploader): if not hasattr(self, 'mockHarvestArgs'): self.mockHarvestArgs=[] self.mockHarvestArgs.append({'name':repository.id,'baseurl':repository.baseurl,'set':repository.set,'repositoryGroupId':repository.repositoryGroupId}) def testNoDateHarvester(self): "runs a test with xml containing no dates" harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token='NoDateToken' harvester.harvest() def testNothingInRepository(self): harvester = self.createHarvesterWithMockUploader('tud') self.logger._state.token='EmptyListToken' harvester.harvest() lines = open(self.stateDir+'/tud.stats').readlines() self.assert_('Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0]) def testUploadRecord(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid')) self.assertEquals(['tud:mockid'], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testSkippedRecord(self): harvester = self.createHarvesterWithMockUploader('tud') def createUpload(repository, oaiResponse): upload = Upload(repository=repository, oaiResponse=oaiResponse) upload.id = "tud:mockid" upload.skip = True return upload self.mapper.createUpload = createUpload harvester.upload(oaiResponse(identifier='mockid')) self.assertEquals([], self.sendId) self.assertFalse(hasattr(self, 'delete_id')) def testDelete(self): harvester = self.createHarvesterWithMockUploader('tud') harvester.upload(oaiResponse(identifier='mockid', deleted=True)) self.assertEquals([], self.sendId) self.assertEquals('tud:mockid', self.delete_id) def testDcIdentifierTake2(self): self.sendFulltexturl=None harvester = self.createHarvesterWithMockUploader('tud') self.logger.token='DcIdentifierHttp2' harvester.harvest() open(self.stateDir+'/tud.stats').readlines() def testHarvesterStopsIgnoringAfter100records(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 101 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid'))) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods]) def testHarvesterIgnoringInvalidDataErrors(self): observer = CallTrace('observer') upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid')) upload.id = 'mockid' observer.returnValues['createUpload'] = upload observer.returnValues['totalInvalidIds'] = 0 observer.exceptions['send'] = InvalidDataException(upload.id, "message") repository=CallTrace("repository", returnValues={'maxIgnore': 100}) harvester = Harvester(repository) harvester.addObserver(observer) harvester.upload(oaiResponse()) self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods]) #self shunt: def send(self, upload): self.sendCalled+=1 self.sendId.append(upload.id) self.sendParts.append(upload.parts) self.upload = upload if self.sendException: raise self.sendException def delete(self, anUpload): self.delete_id = anUpload.id def uploaderInfo(self): return 'The uploader is connected to /dev/null' def start(self): self.startCalled += 1 def stop(self): self.stopCalled += 1 def listRecordsButWaitLong(self, a, b, c, d): sleep(20) def MockRepository (self, id, set): return _MockRepository(id, 'http://mock.server', set, 'inst'+id,self) def MockRepository2 (self, nr): return _MockRepository('reponame'+nr, 'url'+nr, 'set'+nr, 'instname'+nr,self) def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False): return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous) def mockssetarget(self): return self def createUploader(self, logger): return self def listRecords(self, metadataPrefix = None, from_ = "aap", resumptionToken = 'mies', set = None): self.listRecordsFrom = from_ self.listRecordsToken = resumptionToken self.listRecordsSet = set if metadataPrefix: if set: return self.mockRepository.listRecords(metadataPrefix = metadataPrefix, set = set) return self.mockRepository.listRecords(metadataPrefix = metadataPrefix) return self.mockRepository.listRecords(resumptionToken = resumptionToken)
def testHasWorkWithResumptionTokenContinuous(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') self.assertTrue(logger.hasWork(continuousInterval=60)) logger.startRepository() logger.endRepository('resumptionToken', strftime("%Y-%m-%dT%H:%M:%SZ", logger._state._gmtime())) logger.close() logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name') self.assertTrue(logger.hasWork(continuousInterval=60)) logger.startRepository() logger.endRepository('resumptionToken2', strftime("%Y-%m-%dT%H:%M:%SZ", gmtime(time() - 60 - 1))) logger.close()
def testRefresh(self): oldlogs = self.getLogs() log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY) log.startRepository() for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,7,120,121]]: log.notifyHarvestedRecord(uploadId) log.uploadIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123]]: log.notifyHarvestedRecord(uploadId) log.deleteIdentifier(uploadId) log.endRepository('token', '2012-01-01T09:00:00Z') log.close() self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh') self.startHarvester(repository=REPOSITORY) logs = self.getLogs()[len(oldlogs):] self.assertEquals(0, len(logs)) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEquals('/oai', logs[-1]["path"]) self.assertEquals({'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc']}, logs[-1]["arguments"]) statsFile = join(self.harvesterStateDir, DOMAIN, '%s.stats' % REPOSITORY) token = getResumptionToken(open(statsFile).readlines()[-1]) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEquals('/oai', logs[-1]["path"]) self.assertEquals({'verb': ['ListRecords'], 'resumptionToken': [token]}, logs[-1]["arguments"]) self.assertEquals(15, self.sizeDumpDir()) self.startHarvester(repository=REPOSITORY) self.assertEquals(17, self.sizeDumpDir()) deleteFiles = [join(self.dumpDir, f) for f in listdir(self.dumpDir) if '_delete' in f] deletedIds = set([xpathFirst(parse(open(x)), '//ucp:recordIdentifier/text()') for x in deleteFiles]) self.assertEquals(set(['%s:oai:record:03' % REPOSITORY, '%s:oai:record:06' % REPOSITORY, '%s:oai:record:120' % REPOSITORY, '%s:oai:record:121' % REPOSITORY]), deletedIds) logs = self.getLogs()[len(oldlogs):] self.startHarvester(repository=REPOSITORY) self.assertEquals(len(logs), len(self.getLogs()[len(oldlogs):]), 'Action is over, expect nothing more.')
def testLogInvalidData(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') logger.startRepository() logger.notifyHarvestedRecord('repo/id:oai:bla/bla') logger.logInvalidData('repo/id:oai:bla/bla', "Error") self.assertEquals(1, logger.totalInvalidIds()) expectedFile = self.logDir + '/invalid/repo%2Fid/oai:bla%2Fbla' self.assertEquals("Error", open(expectedFile).read()) logger.notifyHarvestedRecord('repo/id:oai:bla/bla') self.assertEquals(0, logger.totalInvalidIds()) self.assertFalse(isfile(expectedFile))
def testClearWithInvalidRecords(self): log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY) log.startRepository() for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]: log.notifyHarvestedRecord(uploadId) log.uploadIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]: log.notifyHarvestedRecord(uploadId) log.deleteIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]: log.notifyHarvestedRecord(uploadId) log.logInvalidData(uploadId, 'ignored message') log.logIgnoredIdentifierWarning(uploadId) log.endRepository('token', '2012-01-01T09:00:00Z') log.close() oldUploads = 4 oldDeletes = 5 oldInvalids = 6 self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear') self.startHarvester(repository=REPOSITORY) self.assertEquals(oldUploads+oldInvalids, self.sizeDumpDir()) invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines() self.assertEquals(0, len(invalidIds), invalidIds) ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines() self.assertEquals(0, len(ids), ids)
def testInvalidIDs(self): logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name') logger.startRepository() logger.notifyHarvestedRecord('id:1') logger.logInvalidData('id:1', 'exception message') logger.notifyHarvestedRecord('id:2') logger.logInvalidData('id:2', 'exception message') self.assertEquals(['id:1', 'id:2'], logger.invalidIds())