class OaiRequestTest(SeecrTestCase):
    def setUp(self):
        super(OaiRequestTest, self).setUp()
        self.request = MockOaiRequest('mocktud')

    def testUserAgentDefault(self):
        args = {}
        def myOwnUrlOpen(*fArgs, **fKwargs):
            args['args'] = fArgs
            args['kwargs'] = fKwargs
            return StringIO(oaiResponseXML())

        request = OaiRequest("http://harvest.me", _urlopen=myOwnUrlOpen)
        request.identify()
        
        self.assertEquals("Meresco Harvester trunk", args['args'][0].headers['User-agent'])

    def testContextSetToTLS12(self):
        from ssl import SSLError, PROTOCOL_TLSv1_2
        calls = []
        def loggingUrlOpen(*fArgs, **fKwargs):
            calls.append(fKwargs)
            raise SSLError("Some error")
        request = OaiRequest("http://harvest.me", _urlopen=loggingUrlOpen)
        try:
            request.identify()
            self.fail("Should have failed")
        except:
            pass
        self.assertEqual(2, len(calls))
        self.assertEqual(None, calls[0]['context'])
        context=calls[1]['context']
        self.assertEqual(PROTOCOL_TLSv1_2, context.protocol)



    def testMockOaiRequest(self):
        response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'})
        self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testOtherOaiRequest(self):
        response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'})
        self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testListRecordsError(self):
        try:
            self.request.listRecords(resumptionToken='BadResumptionToken')
            self.fail()
        except OAIError, e:
            self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage())
            self.assertEquals(u'badResumptionToken', e.errorCode())
Exemple #2
0
class OaiRequestTest(unittest.TestCase):
    def setUp(self):
        self.request = MockOaiRequest('mocktud')

    def testMockOaiRequest(self):
        response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'})
        self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testOtherOaiRequest(self):
        response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'})
        self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testListRecordsError(self):
        try:
            self.request.listRecords(resumptionToken='BadResumptionToken')
            self.fail()
        except OAIError, e:
            self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage())
            self.assertEquals(u'badResumptionToken', e.errorCode())
class OaiRequestTest(SeecrTestCase):
    def setUp(self):
        super(OaiRequestTest, self).setUp()
        self.request = MockOaiRequest('mocktud')

    def testUserAgentDefault(self):
        args = {}

        def myOwnUrlOpen(*fArgs, **fKwargs):
            args['args'] = fArgs
            args['kwargs'] = fKwargs
            return StringIO(oaiResponseXML())

        request = OaiRequest("http://harvest.me", _urlopen=myOwnUrlOpen)
        request.identify()

        self.assertEqual("Meresco Harvester {}".format(VERSION),
                         args['args'][0].headers['User-agent'])

    def testHeaders(self):
        self.assertEqual(
            {"User-Agent": "Meresco Harvester {}".format(VERSION)},
            OaiRequest('http://example.com')._headers())
        self.assertEqual({"User-Agent": "User Agent 3.0"},
                         OaiRequest('http://example.com',
                                    userAgent="User Agent 3.0")._headers())
        self.assertEqual(
            {"User-Agent": "Meresco Harvester {}".format(VERSION)},
            OaiRequest('http://example.com', userAgent='')._headers())
        self.assertEqual(
            {"User-Agent": "Meresco Harvester {}".format(VERSION)},
            OaiRequest('http://example.com', userAgent=' ')._headers())
        self.assertEqual(
            {
                "User-Agent": "Meresco Harvester {}".format(VERSION),
                "Authorization": "Bearer GivenKey"
            },
            OaiRequest('http://example.com',
                       authorizationKey='GivenKey')._headers())

    def testContextSetToTLS12(self):
        from ssl import SSLError, PROTOCOL_TLSv1_2
        calls = []

        def loggingUrlOpen(*fArgs, **fKwargs):
            calls.append(fKwargs)
            raise SSLError("Some error")

        request = OaiRequest("http://harvest.me", _urlopen=loggingUrlOpen)
        try:
            request.identify()
            self.fail("Should have failed")
        except:
            pass
        self.assertEqual(2, len(calls))
        self.assertEqual(None, calls[0]['context'])
        context = calls[1]['context']
        self.assertEqual(PROTOCOL_TLSv1_2, context.protocol)

    def testMockOaiRequest(self):
        response = self.request.request({
            'verb': 'ListRecords',
            'metadataPrefix': 'oai_dc'
        })
        self.assertEqual(
            '2004-12-29T13:19:27Z',
            xpathFirst(response.response,
                       '/oai:OAI-PMH/oai:responseDate/text()'))

    def testOtherOaiRequest(self):
        response = self.request.request({
            'verb': 'GetRecord',
            'metadataPrefix': 'oai_dc',
            'identifier': 'oai:rep:12345'
        })
        self.assertEqual(
            '2005-04-28T12:16:27Z',
            xpathFirst(response.response,
                       '/oai:OAI-PMH/oai:responseDate/text()'))

    def testListRecordsError(self):
        try:
            self.request.listRecords(resumptionToken='BadResumptionToken')
            self.fail()
        except OAIError as e:
            self.assertEqual(
                'The value of the resumptionToken argument is invalid or expired.',
                e.errorMessage())
            self.assertEqual('badResumptionToken', e.errorCode())

    def testListRecords(self):
        response = self.request.listRecords(metadataPrefix='oai_dc')
        self.assertEqual("TestToken", response.resumptionToken)
        self.assertEqual("2004-12-29T13:19:27Z", response.responseDate)
        self.assertEqual(3, len(response.records))
        self.assertEqual(
            'oai:tudelft.nl:007087',
            xpathFirst(response.records[0],
                       'oai:header/oai:identifier/text()'))
        self.assertEqual(None,
                         xpathFirst(response.records[0], 'oai:header/@status'))

    def mockRequest(self, args):
        self.mockRequest_args = args
        with open('mocktud/00001.xml') as fp:
            return parse(fp)

    def testListRecordArgs(self):
        self.request._request = self.mockRequest

        self.request.listRecords(metadataPrefix='kaas')
        kwargs = dict(self.mockRequest_args)
        self.assertEqual('kaas', kwargs['metadataPrefix'])
        self.assertTrue('resumptionToken' not in kwargs)
        self.assertEqual(('verb', 'ListRecords'), self.mockRequest_args[0])

        self.request.listRecords(from_='from',
                                 until='until',
                                 set='set',
                                 metadataPrefix='prefix')
        kwargs = dict(self.mockRequest_args)
        self.assertEqual(('verb', 'ListRecords'), self.mockRequest_args[0])
        self.assertEqual('from', kwargs['from'])
        self.assertEqual('until', kwargs['until'])
        self.assertEqual('set', kwargs['set'])
        self.assertEqual('prefix', kwargs['metadataPrefix'])

    def testGetRecord(self):
        response = self.request.getRecord(identifier='oai:rep:12345',
                                          metadataPrefix='oai_dc')
        self.assertEqual(
            'oai:rep:12345',
            xpathFirst(response.record, 'oai:header/oai:identifier/text()'))

    def testListRecordsWithAnEmptyList(self):
        response = self.request.listRecords(resumptionToken='EmptyListToken')
        self.assertEqual(0, len(response.records))
        self.assertEqual("", response.resumptionToken)
        self.assertEqual("2005-01-12T14:34:49Z", response.responseDate)

    def testBuildRequestUrl(self):
        oaiRequest = OaiRequest("http://x.y.z/oai")
        self.assertEqual(
            "http://x.y.z/oai?verb=ListRecords&metadataPrefix=oai_dc",
            oaiRequest._buildRequestUrl([('verb', 'ListRecords'),
                                         ('metadataPrefix', 'oai_dc')]))

        oaiRequest = OaiRequest("http://x.y.z/oai?apikey=xyz123")
        self.assertEqual(
            "http://x.y.z/oai?apikey=xyz123&verb=ListRecords&metadataPrefix=oai_dc",
            oaiRequest._buildRequestUrl([('verb', 'ListRecords'),
                                         ('metadataPrefix', 'oai_dc')]))

    def testShouldUseOwnClockTimeAsResponseDateIfNonePresent(self):
        originalZuluMethod = OaiResponse._zulu
        OaiResponse._zulu = staticmethod(lambda: '2020-12-12T12:12:12Z')
        try:
            response = oaiResponse(responseDate='')
            self.assertEqual('2020-12-12T12:12:12Z', response.responseDate)
        finally:
            OaiResponse._zulu = originalZuluMethod
Exemple #4
0
class HarvesterTest(unittest.TestCase):
    def setUp(self):
        self.sendCalled = 0
        self.sendException = None
        self.upload = None
        self.sendParts = []
        self.sendId = []
        self.listRecordsSet = None
        self.listRecordsToken = None
        self.startCalled = 0
        self.stopCalled = 0
        self.logDir = self.stateDir = mkdtemp()
        self.logger = None

    def tearDown(self):
        if not self.logger is None:
            self.logger.close()
        rmtree(self.logDir)

    def createLogger(self, name='tud'):
        self.logger = State(stateDir=self.stateDir,
                            logDir=self.logDir,
                            name=name).getHarvesterLog()
        return self.logger

    def createServer(self, url='http://repository.tudelft.nl/oai'):
        return OaiRequest(url)

    def testCreateHarvester(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.assertEqual((0, 0), (self.startCalled, self.stopCalled))
        harvester.harvest()
        self.assertEqual((1, 1), (self.startCalled, self.stopCalled))
        harvester = self.createHarvesterWithMockUploader('eur')
        self.assertEqual((1, 1), (self.startCalled, self.stopCalled))
        harvester.harvest()
        self.assertEqual((2, 2), (self.startCalled, self.stopCalled))

    def testDoUpload(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()

        self.assertEqual(3, self.sendCalled)
        self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2])
        record = parse(StringIO(self.sendParts[2]['record']))
        subjects = record.xpath(
            '/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()',
            namespaces=namespaces)
        self.assertEqual([
            'quantitative electron microscopy',
            'statistical experimental design', 'parameter estimation'
        ], subjects)
        with open(os.path.join(self.stateDir, 'tud.stats')) as f:
            self.assertEqual('ResumptionToken: TestToken', f.read()[-27:-1])

    def testLogIDsForRemoval(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        with open(self.stateDir + '/tud.ids') as idsfile:
            self.assertEqual('tud:oai:tudelft.nl:007087',
                             idsfile.readline().strip())
            self.assertEqual('tud:oai:tudelft.nl:007192',
                             idsfile.readline().strip())
            self.assertEqual('tud:oai:tudelft.nl:007193',
                             idsfile.readline().strip())

    def createHarvesterWithMockUploader(self,
                                        name,
                                        set=None,
                                        mockRequest=None):
        self.logger = self.createLogger(name)
        repository = self.MockRepository(name, set)
        uploader = repository.createUploader(self.logger.eventLogger())
        self.mapper = repository.mapping()
        harvester = Harvester(repository)
        harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(uploader)
        harvester.addObserver(self.mapper)
        return harvester

    def testSimpleStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        self.assertTrue(os.path.isfile(self.stateDir + '/tud.stats'))
        with open(self.stateDir + '/tud.stats') as fp:
            stats = fp.readline().strip().split(',')
        year = strftime('%Y')
        self.assertEqual('Started: %s-' % year, stats[0][:14])
        self.assertEqual(' Harvested/Uploaded/Deleted/Total: 3/3/0/3',
                         stats[1])
        self.assertEqual(' Done: %s-' % year, stats[2][:12])

    def testErrorStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.sendException = Exception('send failed')
        try:
            harvester.harvest()
        except:
            pass
        with open(self.stateDir + '/tud.stats') as fp:
            stats = fp.readline().strip().split(',')
        self.assertTrue(stats[2].startswith(' Error: '), stats[2])
        self.assertTrue(stats[2].endswith('send failed'), stats[2])

    def testResumptionTokenLog(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        with open(self.stateDir + '/tud.stats') as fp:
            stats = fp.readline().strip().split(',')
        self.assertEqual(' ResumptionToken: TestToken', stats[3])

    def testOtherMetadataPrefix(self):
        self.logger = self.createLogger('tud')
        repository = self.MockRepository('tud', None)
        repository.metadataPrefix = 'lom'
        harvester = Harvester(repository)
        harvester.addObserver(MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(
            repository.createUploader(self.logger.eventLogger))
        harvester.addObserver(repository.mapping())
        harvester.harvest()
        self.assertEqual(['tud:oai:lorenet:147'], self.sendId)

    def testWriteAndSeek(self):
        with open('test', 'w') as f:
            f.write('enige info: ')
            pos = f.tell()
            f.write('20000')
            f.seek(pos)
            f.write('12345')
        with open("test", "r") as f:
            self.assertEqual('enige info: 12345', f.readline().strip())
        os.remove('test')

    def testException(self):
        try:
            raise Exception('aap')
            self.fail()
        except:
            self.assertEqual('aap', str(sys.exc_info()[1]))
            self.assertEqual("<class 'Exception'>", str(sys.exc_info()[0]))

    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )
        with open(self.stateDir + '/tud.next', 'w') as fp:
            JsonDict({
                'resumptionToken': None,
                'from': "1999-12-01T16:37:41Z"
            }).dump(fp)

        with open(self.stateDir + '/tud.ids', 'w') as f:
            for i in range(113):
                f.write('oai:tudfakeid:%05i\n' % i)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('1999-12-01', self.listRecordsFrom)
        with open(self.stateDir + '/tud.stats') as f:
            lines = f.readlines()
        self.assertEqual(2, len(lines))
        self.assertEqual(('3', '3', '0', '116'),
                         getHarvestedUploadedRecords(lines[1]))

    def testNotIncrementalInCaseOfError(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n'
            )
            f.write(
                'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n'
            )
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('1998-12-01', self.listRecordsFrom)

    def testOnlyErrorInLogFile(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                'Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n'
            )
            f.write(
                'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n'
            )
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('aap', self.listRecordsFrom)

    def testResumptionToken(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(
            'Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n'
        )
        f.close()
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsToken = None
        h.harvest()
        self.assertEqual('ga+hier+verder', self.listRecordsToken)

    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        with open(self.stateDir + '/tud.stats', 'w') as f:
            f.write(
                ' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n'
            )

        with open(self.stateDir + '/tud.next', 'w') as f:
            JsonDict({
                'resumptionToken': None,
                'from': "2015-01-01T00:12:13Z"
            }).dump(f)
        repository = self.MockRepository3('tud',
                                          'http://repository.tudelft.nl/oai',
                                          None,
                                          'tud',
                                          continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEqual('2015-01-01T00:12:13Z', self.listRecordsFrom)

    def testHarvestSet(self):
        self.mockRepository = MockOaiRequest('mocktud')
        harvester = self.createHarvesterWithMockUploader(
            'um', set='withfulltext:yes', mockRequest=self)
        harvester.harvest()
        self.assertEqual('withfulltext:yes', self.listRecordsSet)

    def mockHarvest(self, repository, logger, uploader):
        if not hasattr(self, 'mockHarvestArgs'):
            self.mockHarvestArgs = []
        self.mockHarvestArgs.append({
            'name':
            repository.id,
            'baseurl':
            repository.baseurl,
            'set':
            repository.set,
            'repositoryGroupId':
            repository.repositoryGroupId
        })

    def testNoDateHarvester(self):
        "runs a test with xml containing no dates"
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token = 'NoDateToken'
        harvester.harvest()

    def testNothingInRepository(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token = 'EmptyListToken'
        harvester.harvest()
        with open(self.stateDir + '/tud.stats') as fp:
            lines = fp.readlines()
        self.assertTrue(
            'Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0])

    def testUploadRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEqual(['tud:mockid'], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testSkippedRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')

        def createUpload(repository, oaiResponse):
            upload = Upload(repository=repository, oaiResponse=oaiResponse)
            upload.id = "tud:mockid"
            upload.skip = True
            return upload

        self.mapper.createUpload = createUpload
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEqual([], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testDelete(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid', deleted=True))
        self.assertEqual([], self.sendId)
        self.assertEqual('tud:mockid', self.delete_id)

    def testDcIdentifierTake2(self):
        self.sendFulltexturl = None
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger.token = 'DcIdentifierHttp2'
        harvester.harvest()
        with open(self.stateDir + '/tud.stats') as f:
            f.readlines()

    def testHarvesterStopsIgnoringAfter100records(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None,
                        oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 101
        observer.exceptions['send'] = InvalidDataException(
            upload.id, "message")
        repository = CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        self.assertRaises(
            TooMuchInvalidDataException,
            lambda: harvester.upload(oaiResponse(identifier='mockid')))
        self.assertEqual([
            'createUpload', "notifyHarvestedRecord", "send", "logInvalidData",
            "totalInvalidIds"
        ], [m.name for m in observer.calledMethods])

    def testHarvesterIgnoringInvalidDataErrors(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None,
                        oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 0
        observer.exceptions['send'] = InvalidDataException(
            upload.id, "message")
        repository = CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        harvester.upload(oaiResponse())
        self.assertEqual([
            'createUpload', "notifyHarvestedRecord", "send", 'logInvalidData',
            "totalInvalidIds", 'logIgnoredIdentifierWarning'
        ], [m.name for m in observer.calledMethods])

    #self shunt:
    def send(self, upload):
        self.sendCalled += 1
        self.sendId.append(upload.id)
        self.sendParts.append(upload.parts)
        self.upload = upload
        if self.sendException:
            raise self.sendException

    def delete(self, anUpload):
        self.delete_id = anUpload.id

    def uploaderInfo(self):
        return 'The uploader is connected to /dev/null'

    def start(self):
        self.startCalled += 1

    def stop(self):
        self.stopCalled += 1

    def listRecordsButWaitLong(self, a, b, c, d):
        sleep(20)

    def MockRepository(self, id, set):
        return _MockRepository(id, 'http://mock.server', set, 'inst' + id,
                               self)

    def MockRepository2(self, nr):
        return _MockRepository('reponame' + nr, 'url' + nr, 'set' + nr,
                               'instname' + nr, self)

    def MockRepository3(self,
                        id,
                        baseurl,
                        set,
                        repositoryGroupId,
                        continuous=False):
        return _MockRepository(id,
                               baseurl,
                               set,
                               repositoryGroupId,
                               self,
                               continuous=continuous)

    def mockssetarget(self):
        return self

    def createUploader(self, logger):
        return self

    def listRecords(self,
                    metadataPrefix=None,
                    from_="aap",
                    resumptionToken='mies',
                    set=None):
        self.listRecordsFrom = from_
        self.listRecordsToken = resumptionToken
        self.listRecordsSet = set
        if metadataPrefix:
            if set:
                return self.mockRepository.listRecords(
                    metadataPrefix=metadataPrefix, set=set)
            return self.mockRepository.listRecords(
                metadataPrefix=metadataPrefix)
        return self.mockRepository.listRecords(resumptionToken=resumptionToken)
class HarvesterTest(unittest.TestCase):
    def setUp(self):
        self.sendCalled=0
        self.sendException = None
        self.upload = None
        self.sendParts=[]
        self.sendId=[]
        self.listRecordsSet = None
        self.listRecordsToken = None
        self.startCalled=0
        self.stopCalled=0
        self.logDir = self.stateDir = mkdtemp()

    def tearDown(self):
        rmtree(self.logDir)

    def createLogger(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        return self.logger

    def createServer(self, url='http://repository.tudelft.nl/oai'):
        return OaiRequest(url)

    def testCreateHarvester(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.assertEquals((0,0),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester = self.createHarvesterWithMockUploader('eur')
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((2,2),(self.startCalled,self.stopCalled))

    def testDoUpload(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()

        self.assertEqual(3, self.sendCalled)
        self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2])
        record = parse(StringIO(self.sendParts[2]['record']))
        subjects = record.xpath('/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces)
        self.assertEqual(['quantitative electron microscopy', 'statistical experimental design', 'parameter estimation'], subjects)
        self.assertEquals('ResumptionToken: TestToken', file(os.path.join(self.stateDir, 'tud.stats')).read()[-27:-1])

    def testLogIDsForRemoval(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        idsfile = open(self.stateDir+'/tud.ids')
        try:
            self.assertEquals('tud:oai:tudelft.nl:007087',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007192',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007193',idsfile.readline().strip())
        finally:
            idsfile.close()

    def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None):
        self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name)
        repository = self.MockRepository(name, set)
        uploader = repository.createUploader(self.logger.eventLogger())
        self.mapper = repository.mapping()
        harvester = Harvester(repository)
        harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(uploader)
        harvester.addObserver(self.mapper)
        return harvester

    def testSimpleStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        self.assert_(os.path.isfile(self.stateDir+'/tud.stats'))
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        year = strftime('%Y')
        self.assertEquals('Started: %s-'%year, stats[0][:14])
        self.assertEquals(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1])
        self.assertEquals(' Done: %s-'%year, stats[2][:12])

    def testErrorStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.sendException = Exception('send failed')
        try:
            harvester.harvest()
        except:
            pass
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertTrue(stats[2].startswith(' Error: '), stats[2])
        self.assertTrue(stats[2].endswith('send failed'), stats[2])

    def testResumptionTokenLog(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertEquals(' ResumptionToken: TestToken', stats[3])

    def testOtherMetadataPrefix(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        repository = self.MockRepository('tud', None)
        repository.metadataPrefix='lom'
        harvester = Harvester(repository)
        harvester.addObserver(MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(repository.createUploader(self.logger.eventLogger))
        harvester.addObserver(repository.mapping())
        harvester.harvest()
        self.assertEquals(['tud:oai:lorenet:147'],self.sendId)

    def testWriteAndSeek(self):
        f = open('test','w')
        f.write('enige info: ')
        pos = f.tell()
        f.write('20000')
        f.seek(pos)
        f.write('12345')
        f.close()
        self.assertEquals('enige info: 12345', open('test','r').readline().strip())
        os.remove('test')

    def testException(self):
        try:
            raise Exception('aap')
            self.fail()
        except:
            self.assertEquals('aap', str(sys.exc_value))
            self.assertTrue('exceptions.Exception' in str(sys.exc_type), str(sys.exc_type))

    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w'))

        f = open(self.stateDir + '/tud.ids', 'w')
        for i in range(113): f.write('oai:tudfakeid:%05i\n'%i)
        f.close()
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1999-12-01', self.listRecordsFrom)
        lines = open(self.stateDir + '/tud.stats').readlines()
        self.assertEquals(2, len(lines))
        self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))

    def testNotIncrementalInCaseOfError(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1998-12-01', self.listRecordsFrom)

    def testOnlyErrorInLogFile(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('aap', self.listRecordsFrom)

    def testResumptionToken(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsToken = None
        h.harvest()
        self.assertEquals('ga+hier+verder', self.listRecordsToken)

    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w'))
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)

    def testHarvestSet(self):
        self.mockRepository = MockOaiRequest('mocktud')
        harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self)
        harvester.harvest()
        self.assertEquals('withfulltext:yes', self.listRecordsSet)

    def mockHarvest(self, repository, logger, uploader):
        if not hasattr(self, 'mockHarvestArgs'):
            self.mockHarvestArgs=[]
        self.mockHarvestArgs.append({'name':repository.id,'baseurl':repository.baseurl,'set':repository.set,'repositoryGroupId':repository.repositoryGroupId})

    def testNoDateHarvester(self):
        "runs a test with xml containing no dates"
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='NoDateToken'
        harvester.harvest()

    def testNothingInRepository(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='EmptyListToken'
        harvester.harvest()
        lines = open(self.stateDir+'/tud.stats').readlines()
        self.assert_('Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0])

    def testUploadRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals(['tud:mockid'], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testSkippedRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        def createUpload(repository, oaiResponse):
            upload = Upload(repository=repository, oaiResponse=oaiResponse)
            upload.id = "tud:mockid"
            upload.skip = True
            return upload
        self.mapper.createUpload = createUpload
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals([], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testDelete(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid', deleted=True))
        self.assertEquals([], self.sendId)
        self.assertEquals('tud:mockid', self.delete_id)

    def testDcIdentifierTake2(self):
        self.sendFulltexturl=None
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger.token='DcIdentifierHttp2'
        harvester.harvest()
        open(self.stateDir+'/tud.stats').readlines()

    def testHarvesterStopsIgnoringAfter100records(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 101
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid')))
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods])

    def testHarvesterIgnoringInvalidDataErrors(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 0
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        harvester.upload(oaiResponse())
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods])

    #self shunt:
    def send(self, upload):
        self.sendCalled+=1
        self.sendId.append(upload.id)
        self.sendParts.append(upload.parts)
        self.upload = upload
        if self.sendException:
            raise self.sendException

    def delete(self, anUpload):
        self.delete_id = anUpload.id

    def uploaderInfo(self):
        return 'The uploader is connected to /dev/null'

    def start(self):
        self.startCalled += 1

    def stop(self):
        self.stopCalled += 1

    def listRecordsButWaitLong(self, a, b, c, d):
        sleep(20)

    def MockRepository (self, id, set):
        return _MockRepository(id, 'http://mock.server', set, 'inst'+id,self)

    def MockRepository2 (self, nr):
        return _MockRepository('reponame'+nr, 'url'+nr, 'set'+nr, 'instname'+nr,self)

    def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False):
        return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous)

    def mockssetarget(self):
        return self

    def createUploader(self, logger):
        return self

    def listRecords(self, metadataPrefix = None, from_ = "aap", resumptionToken = 'mies', set = None):
        self.listRecordsFrom = from_
        self.listRecordsToken = resumptionToken
        self.listRecordsSet = set
        if metadataPrefix:
            if set:
                return self.mockRepository.listRecords(metadataPrefix = metadataPrefix, set = set)
            return self.mockRepository.listRecords(metadataPrefix = metadataPrefix)
        return self.mockRepository.listRecords(resumptionToken = resumptionToken)