예제 #1
0
 def testHasWorkBeforeAndAfterDoingWork(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     self.assertTrue(logger.hasWork())
     logger.startRepository()
     logger.endRepository(None, strftime("%Y-%m-%dT%H:%M:%SZ", logger._state._gmtime()))
     logger.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     self.assertFalse(logger.hasWork())
예제 #2
0
 def testLoggingAlwaysStartsNewline(self):
     "Tests an old situation that when a log was interrupted, it continued on the same line"
     f = open(self.stateDir+'/name.stats','w')
     f.write('Started: 2005-01-02 16:12:56, Harvested/Uploaded/Total: 199/200/1650, Don"crack"')
     f.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     logger.startRepository()
     logger.close()
     lines = open(self.stateDir+'/name.stats').readlines()
     self.assertEqual(2,len(lines))
예제 #3
0
 def testLogWithoutDoubleIDs(self):
     f = open(self.stateDir+'/name.ids','w')
     f.writelines(['id:1\n','id:2\n','id:1\n'])
     f.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     logger.startRepository()
     self.assertEquals(2,logger.totalIds())
     logger.uploadIdentifier('id:3')
     self.assertEquals(3,logger.totalIds())
     logger.uploadIdentifier('id:3')
     logger.uploadIdentifier('id:2')
     self.assertEquals(3,logger.totalIds())
예제 #4
0
 def testOtherMetadataPrefix(self):
     self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
     repository = self.MockRepository('tud', None)
     repository.metadataPrefix='lom'
     harvester = Harvester(repository)
     harvester.addObserver(MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(repository.createUploader(self.logger.eventLogger))
     harvester.addObserver(repository.mapping())
     harvester.harvest()
     self.assertEquals(['tud:oai:lorenet:147'],self.sendId)
예제 #5
0
 def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None):
     self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name)
     repository = self.MockRepository(name, set)
     uploader = repository.createUploader(self.logger.eventLogger())
     self.mapper = repository.mapping()
     harvester = Harvester(repository)
     harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(uploader)
     harvester.addObserver(self.mapper)
     return harvester
예제 #6
0
 def testMarkDeleted(self):
     f = open(self.stateDir+'/name.stats','w')
     f.write('Started: 2005-01-02 16:12:56, Harvested/Uploaded/Total: 199/200/1650, Done: 2005-04-22 11:48:30, ResumptionToken: resumption')
     f.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     self.assertEquals('resumption', logger._state.token)
     logger.markDeleted()
     logger.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     self.assertEquals(None, logger._state.token)
     self.assertEquals(None, logger._state.from_)
     self.assertEquals(0, logger.totalIds())
예제 #7
0
 def testHasWork(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='someuni')
     self.assertEquals(None, logger._state.from_)
     self.assertEquals(None, logger._state.token)
     self.assertTrue(logger.hasWork())
     logger._state.from_=strftime('%Y-%m-%d', gmtime())
     self.assertTrue(not logger.hasWork())
     logger._state.token='SomeToken'
     self.assertTrue(logger.hasWork())
     logger._state.from_='2005-01-02'
     self.assertTrue(logger.hasWork())
     logger._state.token=None
     self.assertTrue(logger.hasWork())
예제 #8
0
 def testSameDate(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='someuni')
     date=logger._state.getTime()[:10]
     self.assertTrue(logger.isCurrentDay(date))
     self.assertFalse(logger.isCurrentDay('2005-01-02'))
예제 #9
0
 def testClearInvalidData(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     logger.startRepository()
     logger.notifyHarvestedRecord('repoid:oai:bla/bla')
     logger.logInvalidData('repoid:oai:bla/bla', "Error")
     self.assertTrue(isfile(self.logDir + '/invalid/repoid/oai:bla%2Fbla'))
     logger.notifyHarvestedRecord('repoid:recordid')
     logger.logInvalidData('repoid:recordid', "Error")
     self.assertTrue(isfile(self.logDir + '/invalid/repoid/recordid'))
     logger.notifyHarvestedRecord('repo2:1')
     logger.logInvalidData('repo2:1', "Error")
     self.assertTrue(isfile(self.logDir + '/invalid/repo2/1'))
     self.assertEquals(['repoid:oai:bla/bla', 'repoid:recordid', 'repo2:1'], logger.invalidIds())
     logger.clearInvalidData('repoid')
     self.assertEquals(['repo2:1'], logger.invalidIds())
     self.assertFalse(isfile(self.logDir + '/invalid/repoid/oai:bla%2Fbla'))
     self.assertFalse(isfile(self.logDir + '/invalid/repoid/recordid'))
     self.assertTrue(isfile(self.logDir + '/invalid/repo2/1'))
예제 #10
0
    def testRefreshWithIgnoredRecords(self):
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]:
            if uploadId == '%s:oai:record:02' % (REPOSITORY):
                uploadId = '%s:oai:record:02/&gkn' % (REPOSITORY)
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]:
            log.notifyHarvestedRecord(uploadId)
            log.logInvalidData(uploadId, 'ignored message')
            log.logIgnoredIdentifierWarning(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()
        totalRecords = 15
        oldUploads = 2
        oldDeletes = 3
        oldIgnoreds = 4

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh')

        self.startHarvester(repository=REPOSITORY) # Smoot init
        self.assertEquals(0, self.sizeDumpDir())
        self.startHarvester(repository=REPOSITORY) # Smooth harvest
        self.startHarvester(repository=REPOSITORY) # Smooth harvest
        self.assertEquals(totalRecords, self.sizeDumpDir())
        self.startHarvester(repository=REPOSITORY) # Smooth finish
        self.assertEquals(totalRecords + oldUploads + oldIgnoreds, self.sizeDumpDir())
        invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(invalidIds), invalidIds)
        ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines()
        self.assertEquals(13, len(ids), ids)
예제 #11
0
    def testLogIgnoredIdentifierWarning(self):
        logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
        logger.startRepository()
        logger.notifyHarvestedRecord('repoid:oai:bla/bla')
        logger.logInvalidData('repoid:oai:bla/bla', 'bla/bla')
        self.assertEquals('', open(self.logDir + '/name.events').read())
        logger.logIgnoredIdentifierWarning('repoid:oai:bla/bla')
        self.assertTrue(open(self.logDir + '/name.events').read().endswith("\tWARNING\t[repoid:oai:bla/bla]\tIGNORED\n"))
        self.assertEquals(1, logger.totalInvalidIds())

        logger.notifyHarvestedRecord('repoid:oai:bla/bla')
        self.assertEquals(0, logger.totalInvalidIds())
        logger.uploadIdentifier('repoid:oai:bla/bla')
        self.assertEquals(1, logger.totalIds())
예제 #12
0
 def testLogLineError(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     logger.startRepository()
     try:
         logger.notifyHarvestedRecord("name:uploadId1")
         logger.uploadIdentifier("name:uploadId1")
         logger.notifyHarvestedRecord("name:uploadId2")
         raise Exception('FATAL')
     except:
         exType, exValue, exTb = exc_info()
         logger.endWithException(exType, exValue, exTb)
     logger.close()
     lines = open(self.stateDir+'/name.stats').readlines()
     eventline = open(self.logDir+'/name.events').readlines()[0].strip()
     #Total is now counted based upon the id's
     self.assertTrue('2/1/0/1, Error: ' in lines[0], lines[0])
     date,event,id,comments = LOGLINE_RE.match(eventline).groups()
     self.assertEquals('ERROR', event.strip())
     self.assertEquals('name', id)
     self.assertTrue(comments.startswith('Traceback (most recent call last):|File "'))
     self.assertTrue('harvesterlogtest.py", line ' in comments)
     self.assertTrue(comments.endswith(', in testLogLineError raise Exception(\'FATAL\')|Exception: FATAL'))
예제 #13
0
 def testLogLine(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     logger.startRepository()
     logger.notifyHarvestedRecord("name:uploadId1")
     logger.uploadIdentifier("name:uploadId1")
     logger.notifyHarvestedRecord("name:uploadId1")
     logger.deleteIdentifier("name:uploadId1")
     logger.notifyHarvestedRecord("name:uploadId2")
     logger.logInvalidData("name:uploadId2", "Test Exception")
     logger.logIgnoredIdentifierWarning("name:uploadId2")
     logger.endRepository(None, '2012-01-01T09:00:00Z')
     logger.close()
     lines = open(self.stateDir + '/name.stats').readlines()
     eventline = open(self.logDir + '/name.events').readlines()[1].strip()
     invalidUploadId2 = open(self.logDir + '/invalid/name/uploadId2').read()
     #Total is now counted based upon the id's
     self.assertTrue('3/1/1/0, Done:' in lines[0], lines[0])
     date, event, id, comments = LOGLINE_RE.match(eventline).groups()
     self.assertEquals('SUCCES', event.strip())
     self.assertEquals('name', id)
     self.assertEquals('Harvested/Uploaded/Deleted/Total: 3/1/1/0, ResumptionToken: None', comments)
     self.assertEquals('Test Exception', invalidUploadId2)
예제 #14
0
 def createLogger(self):
     self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
     return self.logger
예제 #15
0
class HarvesterTest(unittest.TestCase):
    def setUp(self):
        self.sendCalled=0
        self.sendException = None
        self.upload = None
        self.sendParts=[]
        self.sendId=[]
        self.listRecordsSet = None
        self.listRecordsToken = None
        self.startCalled=0
        self.stopCalled=0
        self.logDir = self.stateDir = mkdtemp()

    def tearDown(self):
        rmtree(self.logDir)

    def createLogger(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        return self.logger

    def createServer(self, url='http://repository.tudelft.nl/oai'):
        return OaiRequest(url)

    def testCreateHarvester(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.assertEquals((0,0),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester = self.createHarvesterWithMockUploader('eur')
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((2,2),(self.startCalled,self.stopCalled))

    def testDoUpload(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()

        self.assertEqual(3, self.sendCalled)
        self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2])
        record = parse(StringIO(self.sendParts[2]['record']))
        subjects = record.xpath('/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces)
        self.assertEqual(['quantitative electron microscopy', 'statistical experimental design', 'parameter estimation'], subjects)
        self.assertEquals('ResumptionToken: TestToken', file(os.path.join(self.stateDir, 'tud.stats')).read()[-27:-1])

    def testLogIDsForRemoval(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        idsfile = open(self.stateDir+'/tud.ids')
        try:
            self.assertEquals('tud:oai:tudelft.nl:007087',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007192',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007193',idsfile.readline().strip())
        finally:
            idsfile.close()

    def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None):
        self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name)
        repository = self.MockRepository(name, set)
        uploader = repository.createUploader(self.logger.eventLogger())
        self.mapper = repository.mapping()
        harvester = Harvester(repository)
        harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(uploader)
        harvester.addObserver(self.mapper)
        return harvester

    def testSimpleStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        self.assert_(os.path.isfile(self.stateDir+'/tud.stats'))
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        year = strftime('%Y')
        self.assertEquals('Started: %s-'%year, stats[0][:14])
        self.assertEquals(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1])
        self.assertEquals(' Done: %s-'%year, stats[2][:12])

    def testErrorStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.sendException = Exception('send failed')
        try:
            harvester.harvest()
        except:
            pass
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertTrue(stats[2].startswith(' Error: '), stats[2])
        self.assertTrue(stats[2].endswith('send failed'), stats[2])

    def testResumptionTokenLog(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertEquals(' ResumptionToken: TestToken', stats[3])

    def testOtherMetadataPrefix(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        repository = self.MockRepository('tud', None)
        repository.metadataPrefix='lom'
        harvester = Harvester(repository)
        harvester.addObserver(MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(repository.createUploader(self.logger.eventLogger))
        harvester.addObserver(repository.mapping())
        harvester.harvest()
        self.assertEquals(['tud:oai:lorenet:147'],self.sendId)

    def testWriteAndSeek(self):
        f = open('test','w')
        f.write('enige info: ')
        pos = f.tell()
        f.write('20000')
        f.seek(pos)
        f.write('12345')
        f.close()
        self.assertEquals('enige info: 12345', open('test','r').readline().strip())
        os.remove('test')

    def testException(self):
        try:
            raise Exception('aap')
            self.fail()
        except:
            self.assertEquals('aap', str(sys.exc_value))
            self.assertTrue('exceptions.Exception' in str(sys.exc_type), str(sys.exc_type))

    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w'))

        f = open(self.stateDir + '/tud.ids', 'w')
        for i in range(113): f.write('oai:tudfakeid:%05i\n'%i)
        f.close()
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1999-12-01', self.listRecordsFrom)
        lines = open(self.stateDir + '/tud.stats').readlines()
        self.assertEquals(2, len(lines))
        self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))

    def testNotIncrementalInCaseOfError(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1998-12-01', self.listRecordsFrom)

    def testOnlyErrorInLogFile(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('aap', self.listRecordsFrom)

    def testResumptionToken(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsToken = None
        h.harvest()
        self.assertEquals('ga+hier+verder', self.listRecordsToken)

    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w'))
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)

    def testHarvestSet(self):
        self.mockRepository = MockOaiRequest('mocktud')
        harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self)
        harvester.harvest()
        self.assertEquals('withfulltext:yes', self.listRecordsSet)

    def mockHarvest(self, repository, logger, uploader):
        if not hasattr(self, 'mockHarvestArgs'):
            self.mockHarvestArgs=[]
        self.mockHarvestArgs.append({'name':repository.id,'baseurl':repository.baseurl,'set':repository.set,'repositoryGroupId':repository.repositoryGroupId})

    def testNoDateHarvester(self):
        "runs a test with xml containing no dates"
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='NoDateToken'
        harvester.harvest()

    def testNothingInRepository(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='EmptyListToken'
        harvester.harvest()
        lines = open(self.stateDir+'/tud.stats').readlines()
        self.assert_('Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0])

    def testUploadRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals(['tud:mockid'], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testSkippedRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        def createUpload(repository, oaiResponse):
            upload = Upload(repository=repository, oaiResponse=oaiResponse)
            upload.id = "tud:mockid"
            upload.skip = True
            return upload
        self.mapper.createUpload = createUpload
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals([], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testDelete(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid', deleted=True))
        self.assertEquals([], self.sendId)
        self.assertEquals('tud:mockid', self.delete_id)

    def testDcIdentifierTake2(self):
        self.sendFulltexturl=None
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger.token='DcIdentifierHttp2'
        harvester.harvest()
        open(self.stateDir+'/tud.stats').readlines()

    def testHarvesterStopsIgnoringAfter100records(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 101
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid')))
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods])

    def testHarvesterIgnoringInvalidDataErrors(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 0
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        harvester.upload(oaiResponse())
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods])

    #self shunt:
    def send(self, upload):
        self.sendCalled+=1
        self.sendId.append(upload.id)
        self.sendParts.append(upload.parts)
        self.upload = upload
        if self.sendException:
            raise self.sendException

    def delete(self, anUpload):
        self.delete_id = anUpload.id

    def uploaderInfo(self):
        return 'The uploader is connected to /dev/null'

    def start(self):
        self.startCalled += 1

    def stop(self):
        self.stopCalled += 1

    def listRecordsButWaitLong(self, a, b, c, d):
        sleep(20)

    def MockRepository (self, id, set):
        return _MockRepository(id, 'http://mock.server', set, 'inst'+id,self)

    def MockRepository2 (self, nr):
        return _MockRepository('reponame'+nr, 'url'+nr, 'set'+nr, 'instname'+nr,self)

    def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False):
        return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous)

    def mockssetarget(self):
        return self

    def createUploader(self, logger):
        return self

    def listRecords(self, metadataPrefix = None, from_ = "aap", resumptionToken = 'mies', set = None):
        self.listRecordsFrom = from_
        self.listRecordsToken = resumptionToken
        self.listRecordsSet = set
        if metadataPrefix:
            if set:
                return self.mockRepository.listRecords(metadataPrefix = metadataPrefix, set = set)
            return self.mockRepository.listRecords(metadataPrefix = metadataPrefix)
        return self.mockRepository.listRecords(resumptionToken = resumptionToken)
예제 #16
0
 def testHasWorkWithResumptionTokenContinuous(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     self.assertTrue(logger.hasWork(continuousInterval=60))
     logger.startRepository()
     logger.endRepository('resumptionToken', strftime("%Y-%m-%dT%H:%M:%SZ", logger._state._gmtime()))
     logger.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     self.assertTrue(logger.hasWork(continuousInterval=60))
     logger.startRepository()
     logger.endRepository('resumptionToken2', strftime("%Y-%m-%dT%H:%M:%SZ", gmtime(time() - 60 - 1)))
     logger.close()
예제 #17
0
    def testRefresh(self):
        oldlogs = self.getLogs()
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,7,120,121]]:
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh')

        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()[len(oldlogs):]
        self.assertEquals(0, len(logs))
        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()
        self.assertEquals('/oai', logs[-1]["path"])
        self.assertEquals({'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc']}, logs[-1]["arguments"])
        statsFile = join(self.harvesterStateDir, DOMAIN, '%s.stats' % REPOSITORY)
        token = getResumptionToken(open(statsFile).readlines()[-1])

        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()
        self.assertEquals('/oai', logs[-1]["path"])
        self.assertEquals({'verb': ['ListRecords'], 'resumptionToken': [token]}, logs[-1]["arguments"])
        self.assertEquals(15, self.sizeDumpDir())

        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(17, self.sizeDumpDir())
        deleteFiles = [join(self.dumpDir, f) for f in listdir(self.dumpDir) if '_delete' in f]
        deletedIds = set([xpathFirst(parse(open(x)), '//ucp:recordIdentifier/text()') for x in deleteFiles])
        self.assertEquals(set(['%s:oai:record:03' % REPOSITORY, '%s:oai:record:06' % REPOSITORY, '%s:oai:record:120' % REPOSITORY, '%s:oai:record:121' % REPOSITORY]), deletedIds)

        logs = self.getLogs()[len(oldlogs):]
        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(len(logs), len(self.getLogs()[len(oldlogs):]), 'Action is over, expect nothing more.')
예제 #18
0
 def testLogInvalidData(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     logger.startRepository()
     logger.notifyHarvestedRecord('repo/id:oai:bla/bla')
     logger.logInvalidData('repo/id:oai:bla/bla', "Error")
     self.assertEquals(1, logger.totalInvalidIds())
     expectedFile = self.logDir + '/invalid/repo%2Fid/oai:bla%2Fbla'
     self.assertEquals("Error", open(expectedFile).read())
     logger.notifyHarvestedRecord('repo/id:oai:bla/bla')
     self.assertEquals(0, logger.totalInvalidIds())
     self.assertFalse(isfile(expectedFile))
예제 #19
0
    def testClearWithInvalidRecords(self):
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]:
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]:
            log.notifyHarvestedRecord(uploadId)
            log.logInvalidData(uploadId, 'ignored message')
            log.logIgnoredIdentifierWarning(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()
        oldUploads = 4
        oldDeletes = 5
        oldInvalids = 6

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear')

        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(oldUploads+oldInvalids, self.sizeDumpDir())
        invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(invalidIds), invalidIds)
        ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(ids), ids)
예제 #20
0
 def testInvalidIDs(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     logger.startRepository()
     logger.notifyHarvestedRecord('id:1')
     logger.logInvalidData('id:1', 'exception message')
     logger.notifyHarvestedRecord('id:2')
     logger.logInvalidData('id:2', 'exception message')
     self.assertEquals(['id:1', 'id:2'], logger.invalidIds())