コード例 #1
0
    def testRefreshWithIgnoredRecords(self):
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]:
            if uploadId == '%s:oai:record:02' % (REPOSITORY):
                uploadId = '%s:oai:record:02/&gkn' % (REPOSITORY)
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]:
            log.notifyHarvestedRecord(uploadId)
            log.logInvalidData(uploadId, 'ignored message')
            log.logIgnoredIdentifierWarning(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()
        totalRecords = 15
        oldUploads = 2
        oldDeletes = 3
        oldIgnoreds = 4

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh')

        self.startHarvester(repository=REPOSITORY) # Smoot init
        self.assertEquals(0, self.sizeDumpDir())
        self.startHarvester(repository=REPOSITORY) # Smooth harvest
        self.startHarvester(repository=REPOSITORY) # Smooth harvest
        self.assertEquals(totalRecords, self.sizeDumpDir())
        self.startHarvester(repository=REPOSITORY) # Smooth finish
        self.assertEquals(totalRecords + oldUploads + oldIgnoreds, self.sizeDumpDir())
        invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(invalidIds), invalidIds)
        ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines()
        self.assertEquals(13, len(ids), ids)
コード例 #2
0
    def testClearWithInvalidRecords(self):
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,2,120,121]]:
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123,124]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [7,8,125,126,127,128]]:
            log.notifyHarvestedRecord(uploadId)
            log.logInvalidData(uploadId, 'ignored message')
            log.logIgnoredIdentifierWarning(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()
        oldUploads = 4
        oldDeletes = 5
        oldInvalids = 6

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear')

        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(oldUploads+oldInvalids, self.sizeDumpDir())
        invalidIds = open(join(self.harvesterStateDir, DOMAIN, "%s_invalid.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(invalidIds), invalidIds)
        ids = open(join(self.harvesterStateDir, DOMAIN, "%s.ids" % REPOSITORY)).readlines()
        self.assertEquals(0, len(ids), ids)
コード例 #3
0
 def testLogWithoutDoubleIDs(self):
     f = open(self.stateDir+'/name.ids','w')
     f.writelines(['id:1\n','id:2\n','id:1\n'])
     f.close()
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     logger.startRepository()
     self.assertEquals(2,logger.totalIds())
     logger.uploadIdentifier('id:3')
     self.assertEquals(3,logger.totalIds())
     logger.uploadIdentifier('id:3')
     logger.uploadIdentifier('id:2')
     self.assertEquals(3,logger.totalIds())
コード例 #4
0
    def testLogIgnoredIdentifierWarning(self):
        logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
        logger.startRepository()
        logger.notifyHarvestedRecord('repoid:oai:bla/bla')
        logger.logInvalidData('repoid:oai:bla/bla', 'bla/bla')
        self.assertEquals('', open(self.logDir + '/name.events').read())
        logger.logIgnoredIdentifierWarning('repoid:oai:bla/bla')
        self.assertTrue(open(self.logDir + '/name.events').read().endswith("\tWARNING\t[repoid:oai:bla/bla]\tIGNORED\n"))
        self.assertEquals(1, logger.totalInvalidIds())

        logger.notifyHarvestedRecord('repoid:oai:bla/bla')
        self.assertEquals(0, logger.totalInvalidIds())
        logger.uploadIdentifier('repoid:oai:bla/bla')
        self.assertEquals(1, logger.totalIds())
コード例 #5
0
    def testRefresh(self):
        oldlogs = self.getLogs()
        log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY)
        log.startRepository()
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,7,120,121]]:
            log.notifyHarvestedRecord(uploadId)
            log.uploadIdentifier(uploadId)
        for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123]]:
            log.notifyHarvestedRecord(uploadId)
            log.deleteIdentifier(uploadId)
        log.endRepository('token', '2012-01-01T09:00:00Z')
        log.close()

        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh')

        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()[len(oldlogs):]
        self.assertEquals(0, len(logs))
        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()
        self.assertEquals('/oai', logs[-1]["path"])
        self.assertEquals({'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc']}, logs[-1]["arguments"])
        statsFile = join(self.harvesterStateDir, DOMAIN, '%s.stats' % REPOSITORY)
        token = getResumptionToken(open(statsFile).readlines()[-1])

        self.startHarvester(repository=REPOSITORY)
        logs = self.getLogs()
        self.assertEquals('/oai', logs[-1]["path"])
        self.assertEquals({'verb': ['ListRecords'], 'resumptionToken': [token]}, logs[-1]["arguments"])
        self.assertEquals(15, self.sizeDumpDir())

        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(17, self.sizeDumpDir())
        deleteFiles = [join(self.dumpDir, f) for f in listdir(self.dumpDir) if '_delete' in f]
        deletedIds = set([xpathFirst(parse(open(x)), '//ucp:recordIdentifier/text()') for x in deleteFiles])
        self.assertEquals(set(['%s:oai:record:03' % REPOSITORY, '%s:oai:record:06' % REPOSITORY, '%s:oai:record:120' % REPOSITORY, '%s:oai:record:121' % REPOSITORY]), deletedIds)

        logs = self.getLogs()[len(oldlogs):]
        self.startHarvester(repository=REPOSITORY)
        self.assertEquals(len(logs), len(self.getLogs()[len(oldlogs):]), 'Action is over, expect nothing more.')
コード例 #6
0
 def testLogLineError(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='name')
     logger.startRepository()
     try:
         logger.notifyHarvestedRecord("name:uploadId1")
         logger.uploadIdentifier("name:uploadId1")
         logger.notifyHarvestedRecord("name:uploadId2")
         raise Exception('FATAL')
     except:
         exType, exValue, exTb = exc_info()
         logger.endWithException(exType, exValue, exTb)
     logger.close()
     lines = open(self.stateDir+'/name.stats').readlines()
     eventline = open(self.logDir+'/name.events').readlines()[0].strip()
     #Total is now counted based upon the id's
     self.assertTrue('2/1/0/1, Error: ' in lines[0], lines[0])
     date,event,id,comments = LOGLINE_RE.match(eventline).groups()
     self.assertEquals('ERROR', event.strip())
     self.assertEquals('name', id)
     self.assertTrue(comments.startswith('Traceback (most recent call last):|File "'))
     self.assertTrue('harvesterlogtest.py", line ' in comments)
     self.assertTrue(comments.endswith(', in testLogLineError raise Exception(\'FATAL\')|Exception: FATAL'))
コード例 #7
0
 def testLogLine(self):
     logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name= 'name')
     logger.startRepository()
     logger.notifyHarvestedRecord("name:uploadId1")
     logger.uploadIdentifier("name:uploadId1")
     logger.notifyHarvestedRecord("name:uploadId1")
     logger.deleteIdentifier("name:uploadId1")
     logger.notifyHarvestedRecord("name:uploadId2")
     logger.logInvalidData("name:uploadId2", "Test Exception")
     logger.logIgnoredIdentifierWarning("name:uploadId2")
     logger.endRepository(None, '2012-01-01T09:00:00Z')
     logger.close()
     lines = open(self.stateDir + '/name.stats').readlines()
     eventline = open(self.logDir + '/name.events').readlines()[1].strip()
     invalidUploadId2 = open(self.logDir + '/invalid/name/uploadId2').read()
     #Total is now counted based upon the id's
     self.assertTrue('3/1/1/0, Done:' in lines[0], lines[0])
     date, event, id, comments = LOGLINE_RE.match(eventline).groups()
     self.assertEquals('SUCCES', event.strip())
     self.assertEquals('name', id)
     self.assertEquals('Harvested/Uploaded/Deleted/Total: 3/1/1/0, ResumptionToken: None', comments)
     self.assertEquals('Test Exception', invalidUploadId2)