コード例 #1
0
ファイル: harvesttest.py プロジェクト: seecr/meresco-fetch
 def _prepareHarvester(self, deleteAll=False):
     self.harvester = Harvester(self.tempdir, log=self.log, deleteAll=deleteAll)
     self.harvester._state.now = lambda: ZuluTime("1976-11-08T12:34:56Z")
     self.harvester.addObserver(self.observer)
     return self.harvester
コード例 #2
0
ファイル: harvesttest.py プロジェクト: seecr/meresco-fetch
class HarvestTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.log = StringIO()
        self.observer = CallTrace('observer')
        self._prepareHarvester()

    def _prepareHarvester(self, deleteAll=False):
        self.harvester = Harvester(self.tempdir, log=self.log, deleteAll=deleteAll)
        self.harvester._state.now = lambda: ZuluTime("1976-11-08T12:34:56Z")
        self.harvester.addObserver(self.observer)
        return self.harvester

    def testHarvestNoRecords(self):
        batch = Batch()
        batch.harvestingReady = True
        self.observer.returnValues['downloadBatch'] = batch
        self.harvester.harvest()
        self.assertEquals(['downloadBatch'], self.observer.calledMethodNames())
        self.assertEqual('Harvesting.\n0 added, 0 deleted, 0 unchanged, 0 skipped.\n-\nFinished harvesting.\n', self.log.getvalue())

    def testHarvestMoreThanOneBatch(self):
        self.harvester._events.markHarvestStart()
        self.harvester._events.markEvent(identifier='id0', uploadData='data0')
        self.harvester._events.markEvent(identifier='id1', uploadData='data1')
        self.harvester._events.markEvent(identifier='id9', uploadData='data9')
        self.harvester._events.markHarvestReady()
        self.assertEquals(['id0', 'id1', 'id9'], list(self.harvester._events.remainingAdds()))
        batches = []
        batch = Batch()
        batch.records = [Record('id0', 'data0'), Record('id1', 'data1')]
        batch.resumptionAttributes = lambda: {'key': 'value1'}
        batches.append(batch)
        batch = Batch()
        batch.records = [Record('id2', 'data2')]
        batch.harvestingReady = True
        batches.append(batch)
        self.observer.methods['downloadBatch'] = lambda **kwargs: batches.pop(0)
        self.observer.methods['convert'] =lambda record: 'converted.' + record.data
        self.harvester.harvest()
        self.assertEquals(['downloadBatch', 'convert', 'uploadRecord', 'convert', 'uploadRecord', 'downloadBatch', 'convert', 'uploadRecord', 'deleteRecord'], self.observer.calledMethodNames())
        lastDownloadBatchCall = self.observer.calledMethods[-4]
        self.assertEquals({'resumptionAttributes': {'key': 'value1'}}, lastDownloadBatchCall.kwargs)
        lastUploadRecordCall = self.observer.calledMethods[-2]
        self.assertEquals({'identifier': 'id2', 'data': 'converted.data2'}, lastUploadRecordCall.kwargs)
        deleteRecordCall = self.observer.calledMethods[-1]
        self.assertEquals({'identifier': 'id9'}, deleteRecordCall.kwargs)
        self.assertEquals(['id0', 'id1', 'id2'], list(self.harvester._events.remainingAdds()))

    def testDeleteAll(self):
        self.harvester._events.markHarvestStart()
        self.harvester._events.markEvent(identifier='id0', uploadData='data0')
        self.harvester._events.markEvent(identifier='id1', uploadData='data1')
        self.harvester._events.markEvent(identifier='id9', uploadData='data9')
        self.harvester._events.markHarvestReady()
        self.assertEquals(['id0', 'id1', 'id9'], list(self.harvester._events.remainingAdds()))
        self._prepareHarvester(deleteAll=True).harvest()
        self.assertEquals(['deleteRecord'] * 3, self.observer.calledMethodNames())
        self.assertEquals([], list(self.harvester._events.remainingAdds()))

    def testDownloadError(self):
        def downloadBatchRaises(resumptionAttributes):
            raise IOError('help!')
        self.observer.methods['downloadBatch'] = downloadBatchRaises
        try:
            self.harvester.harvest()
            self.fail()
        except IOError:
            pass
        persistedState = jsonLoad(open(join(self.tempdir, 'state')))
        self.assertEquals({
            'harvestingReady': False,
            'datetime': '1976-11-08T12:34:56Z',
            'resumptionAttributes': None,
            'error': True}, persistedState)
        lastError = open(join(self.tempdir, 'last_error')).read()
        self.assertTrue('help!' in lastError, lastError)

    def testConvertError(self):
        batch = Batch()
        batch.records = [Record('id0', 'data0'), Record('id1', 'data1')]
        batch.harvestingReady = True
        self.observer.methods['downloadBatch'] = lambda **kwargs: batch
        def convertRaises(record):
            raise RuntimeError('help!')
        self.observer.methods['convert'] = convertRaises
        try:
            self.harvester.harvest()
            self.fail()
        except RuntimeError:
            pass
        persistedState = jsonLoad(open(join(self.tempdir, 'state')))
        self.assertEquals({
            'harvestingReady': False,
            'datetime': '1976-11-08T12:34:56Z',
            'resumptionAttributes': None,
            'error': True}, persistedState)
        lastError = open(join(self.tempdir, 'last_error')).read()
        self.assertTrue('help!' in lastError, lastError)

    def testSkipRecordException(self):
        batch = Batch()
        batch.records = [Record('id0', 'data0'), Record('id1', 'data1')]
        batch.harvestingReady = True
        self.observer.methods['downloadBatch'] = lambda **kwargs: batch
        def convertRaises(record):
            raise SkipRecordException()
        self.observer.methods['convert'] = convertRaises
        self.harvester.harvest()
        self.assertEqual([
            'Harvesting.', 
            "Skipping record 'id0'",
            "Skipping record 'id1'",
            "0 added, 0 deleted, 0 unchanged, 2 skipped.",
            "-",
            "Finished harvesting.", 
            ""], self.log.getvalue().split("\n"))


    def testUploadError(self):
        batch = Batch()
        batch.records = [Record('id0', 'data0'), Record('id1', 'data1')]
        batch.harvestingReady = True
        self.observer.methods['downloadBatch'] = lambda **kwargs: batch
        def uploadRecordRaises(identifier, data):
            raise RuntimeError('help!')
        self.observer.methods['uploadRecord'] = uploadRecordRaises
        self.observer.returnValues['convert'] = 'converted'
        try:
            self.harvester.harvest()
            self.fail()
        except RuntimeError:
            pass
        persistedState = jsonLoad(open(join(self.tempdir, 'state')))
        self.assertEquals({
            'harvestingReady': False,
            'datetime': '1976-11-08T12:34:56Z',
            'resumptionAttributes': None,
            'error': True}, persistedState)
        lastError = open(join(self.tempdir, 'last_error')).read()
        self.assertTrue('help!' in lastError, lastError)

    def testOnlyUploadUpdates(self):
        self.harvester._events.markHarvestStart()
        self.harvester._events.markEvent(identifier='id0', uploadData='converted.data0')
        self.harvester._events.markEvent(identifier='id1', uploadData='converted.data1')
        self.harvester._events.markHarvestReady()
        batch = Batch()
        batch.records = [Record('id0', 'data0'), Record('id1', 'data1.changed')]
        batch.harvestingReady = True
        self.observer.methods['downloadBatch'] = lambda **kwargs: batch
        self.observer.methods['convert'] =lambda record: 'converted.' + record.data
        self.harvester.harvest()
        self.assertEquals(['downloadBatch', 'convert', 'convert', 'uploadRecord'], self.observer.calledMethodNames())
        self.assertEquals({'identifier': 'id1', 'data': 'converted.data1.changed'}, self.observer.calledMethods[-1].kwargs)
        self.assertEquals(['id0', 'id1'], list(self.harvester._events.remainingAdds()))

    def testDeleteOnlyWhenNotAlready(self):
        self.harvester._events.markHarvestStart()
        self.harvester._events.markEvent(identifier='id0', uploadData='converted.data0')
        self.harvester._events.markEvent(identifier='id1', delete=True)
        self.harvester._events.markHarvestReady()
        batch = Batch()
        batch.records = [Record('id0', delete=True), Record('id1', delete=True)]
        batch.harvestingReady = True
        self.observer.methods['downloadBatch'] = lambda **kwargs: batch
        self.harvester.harvest()
        self.assertEquals(['downloadBatch', 'deleteRecord'], self.observer.calledMethodNames())
        self.assertEquals({'identifier': 'id0'}, self.observer.calledMethods[-1].kwargs)
        self.assertEquals([], list(self.harvester._events.remainingAdds()))

    def testDeleteOldIfHarvestingReady(self):
        JsonDict({
                'harvestingReady': True,
                'datetime': '1976-11-08T12:34:56Z',
                'resumptionAttributes': None,
                'error': False
            }).dump(join(self.tempdir, 'state'))
        open(join(self.tempdir, 'current'), 'w').write("")
        with open(join(self.tempdir, 'previous'), 'w') as f:
            f.write("id:1\tA\tdatahash\n")
            f.write("id:2\tA\tdatahash\n")
        self._prepareHarvester()
        self.harvester.harvest()
        self.assertEquals(['deleteRecord', 'deleteRecord'], self.observer.calledMethodNames())
        self.assertEquals({'identifier': 'id:1'}, self.observer.calledMethods[0].kwargs)
        self.assertEquals({'identifier': 'id:2'}, self.observer.calledMethods[1].kwargs)