def testHarvesterIgnoringInvalidDataErrors(self):
     observer = CallTrace('observer')
     upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
     upload.id = 'mockid'
     observer.returnValues['createUpload'] = upload
     observer.returnValues['totalInvalidIds'] = 0
     observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
     repository=CallTrace("repository", returnValues={'maxIgnore': 100})
     harvester = Harvester(repository)
     harvester.addObserver(observer)
     harvester.upload(oaiResponse())
     self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods])
 def testHarvesterStopsIgnoringAfter100records(self):
     observer = CallTrace('observer')
     upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
     upload.id = 'mockid'
     observer.returnValues['createUpload'] = upload
     observer.returnValues['totalInvalidIds'] = 101
     observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
     repository=CallTrace("repository", returnValues={'maxIgnore': 100})
     harvester = Harvester(repository)
     harvester.addObserver(observer)
     self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid')))
     self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods])
Exemple #3
0
 def createHarvesterWithMockUploader(self,
                                     name,
                                     set=None,
                                     mockRequest=None):
     self.logger = self.createLogger(name)
     repository = self.MockRepository(name, set)
     uploader = repository.createUploader(self.logger.eventLogger())
     self.mapper = repository.mapping()
     harvester = Harvester(repository)
     harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(uploader)
     harvester.addObserver(self.mapper)
     return harvester
 def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None):
     self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name)
     repository = self.MockRepository(name, set)
     uploader = repository.createUploader(self.logger.eventLogger())
     self.mapper = repository.mapping()
     harvester = Harvester(repository)
     harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(uploader)
     harvester.addObserver(self.mapper)
     return harvester
Exemple #5
0
 def testHarvesterStopsIgnoringAfter100records(self):
     observer = CallTrace('observer')
     upload = Upload(repository=None,
                     oaiResponse=oaiResponse(identifier='mockid'))
     upload.id = 'mockid'
     observer.returnValues['createUpload'] = upload
     observer.returnValues['totalInvalidIds'] = 101
     observer.exceptions['send'] = InvalidDataException(
         upload.id, "message")
     repository = CallTrace("repository", returnValues={'maxIgnore': 100})
     harvester = Harvester(repository)
     harvester.addObserver(observer)
     self.assertRaises(
         TooMuchInvalidDataException,
         lambda: harvester.upload(oaiResponse(identifier='mockid')))
     self.assertEqual([
         'createUpload', "notifyHarvestedRecord", "send", "logInvalidData",
         "totalInvalidIds"
     ], [m.name for m in observer.calledMethods])
Exemple #6
0
 def testHarvesterIgnoringInvalidDataErrors(self):
     observer = CallTrace('observer')
     upload = Upload(repository=None,
                     oaiResponse=oaiResponse(identifier='mockid'))
     upload.id = 'mockid'
     observer.returnValues['createUpload'] = upload
     observer.returnValues['totalInvalidIds'] = 0
     observer.exceptions['send'] = InvalidDataException(
         upload.id, "message")
     repository = CallTrace("repository", returnValues={'maxIgnore': 100})
     harvester = Harvester(repository)
     harvester.addObserver(observer)
     harvester.upload(oaiResponse())
     self.assertEqual([
         'createUpload', "notifyHarvestedRecord", "send", 'logInvalidData',
         "totalInvalidIds", 'logIgnoredIdentifierWarning'
     ], [m.name for m in observer.calledMethods])
Exemple #7
0
    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )

        with open(self.stateDir + '/tud.next', 'w') as f:
            JsonDict({
                'resumptionToken': None,
                'from': "2015-01-01T00:12:13Z"
            }).dump(f)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None,
                                          'tud',
                                          continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)
Exemple #8
0
 def testResumptionToken(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write(
         'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n'
     )
     f.close()
     repository = self.MockRepository3('tud',
                                       'http://repository.tudelft.nl/oai',
                                       None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsToken = None
     h.harvest()
     self.assertEqual('ga+hier+verder', self.listRecordsToken)
Exemple #9
0
 def testOnlyErrorInLogFile(self):
     self.mockRepository = MockOaiRequest('mocktud')
     with open(self.stateDir + '/tud.stats', 'w') as f:
         f.write(
             'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n'
         )
         f.write(
             'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n'
         )
     repository = self.MockRepository3('tud',
                                       'http://repository.tudelft.nl/oai',
                                       None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsFrom = None
     h.harvest()
     self.assertEqual('aap', self.listRecordsFrom)
Exemple #10
0
    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )
        with open(self.stateDir + '/tud.next', 'w') as fp:
            JsonDict({
                'resumptionToken': None,
                'from': "1999-12-01T16:37:41Z"
            }).dump(fp)

        with open(self.stateDir + '/tud.ids', 'w') as f:
            for i in range(113):
                f.write('oai:tudfakeid:%05i\n' % i)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('1999-12-01', self.listRecordsFrom)
        with open(self.stateDir + '/tud.stats') as f:
            lines = f.readlines()
        self.assertEqual(2, len(lines))
        self.assertEqual(('3', '3', '0', '116'),
                         getHarvestedUploadedRecords(lines[1]))
Exemple #11
0
 def testOtherMetadataPrefix(self):
     self.logger = self.createLogger('tud')
     repository = self.MockRepository('tud', None)
     repository.metadataPrefix = 'lom'
     harvester = Harvester(repository)
     harvester.addObserver(MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(
         repository.createUploader(self.logger.eventLogger))
     harvester.addObserver(repository.mapping())
     harvester.harvest()
     self.assertEqual(['tud:oai:lorenet:147'], self.sendId)
 def testContinuousHarvesting(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
     f.close()
     JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w'))
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True)
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsFrom = None
     h.harvest()
     self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)
 def testResumptionToken(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n')
     f.close();
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsToken = None
     h.harvest()
     self.assertEquals('ga+hier+verder', self.listRecordsToken)
 def testOnlyErrorInLogFile(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n')
     f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
     f.close();
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsFrom = None
     h.harvest()
     self.assertEquals('aap', self.listRecordsFrom)
    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w'))

        f = open(self.stateDir + '/tud.ids', 'w')
        for i in range(113): f.write('oai:tudfakeid:%05i\n'%i)
        f.close()
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1999-12-01', self.listRecordsFrom)
        lines = open(self.stateDir + '/tud.stats').readlines()
        self.assertEquals(2, len(lines))
        self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
 def testOtherMetadataPrefix(self):
     self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
     repository = self.MockRepository('tud', None)
     repository.metadataPrefix='lom'
     harvester = Harvester(repository)
     harvester.addObserver(MockOaiRequest('mocktud'))
     harvester.addObserver(self.logger)
     harvester.addObserver(repository.createUploader(self.logger.eventLogger))
     harvester.addObserver(repository.mapping())
     harvester.harvest()
     self.assertEquals(['tud:oai:lorenet:147'],self.sendId)