def _prepareHarvester(self, deleteAll=False): self.harvester = Harvester(self.tempdir, log=self.log, deleteAll=deleteAll) self.harvester._state.now = lambda: ZuluTime("1976-11-08T12:34:56Z") self.harvester.addObserver(self.observer) return self.harvester
class HarvestTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.log = StringIO() self.observer = CallTrace('observer') self._prepareHarvester() def _prepareHarvester(self, deleteAll=False): self.harvester = Harvester(self.tempdir, log=self.log, deleteAll=deleteAll) self.harvester._state.now = lambda: ZuluTime("1976-11-08T12:34:56Z") self.harvester.addObserver(self.observer) return self.harvester def testHarvestNoRecords(self): batch = Batch() batch.harvestingReady = True self.observer.returnValues['downloadBatch'] = batch self.harvester.harvest() self.assertEquals(['downloadBatch'], self.observer.calledMethodNames()) self.assertEqual('Harvesting.\n0 added, 0 deleted, 0 unchanged, 0 skipped.\n-\nFinished harvesting.\n', self.log.getvalue()) def testHarvestMoreThanOneBatch(self): self.harvester._events.markHarvestStart() self.harvester._events.markEvent(identifier='id0', uploadData='data0') self.harvester._events.markEvent(identifier='id1', uploadData='data1') self.harvester._events.markEvent(identifier='id9', uploadData='data9') self.harvester._events.markHarvestReady() self.assertEquals(['id0', 'id1', 'id9'], list(self.harvester._events.remainingAdds())) batches = [] batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1')] batch.resumptionAttributes = lambda: {'key': 'value1'} batches.append(batch) batch = Batch() batch.records = [Record('id2', 'data2')] batch.harvestingReady = True batches.append(batch) self.observer.methods['downloadBatch'] = lambda **kwargs: batches.pop(0) self.observer.methods['convert'] =lambda record: 'converted.' + record.data self.harvester.harvest() self.assertEquals(['downloadBatch', 'convert', 'uploadRecord', 'convert', 'uploadRecord', 'downloadBatch', 'convert', 'uploadRecord', 'deleteRecord'], self.observer.calledMethodNames()) lastDownloadBatchCall = self.observer.calledMethods[-4] self.assertEquals({'resumptionAttributes': {'key': 'value1'}}, lastDownloadBatchCall.kwargs) lastUploadRecordCall = self.observer.calledMethods[-2] self.assertEquals({'identifier': 'id2', 'data': 'converted.data2'}, lastUploadRecordCall.kwargs) deleteRecordCall = self.observer.calledMethods[-1] self.assertEquals({'identifier': 'id9'}, deleteRecordCall.kwargs) self.assertEquals(['id0', 'id1', 'id2'], list(self.harvester._events.remainingAdds())) def testDeleteAll(self): self.harvester._events.markHarvestStart() self.harvester._events.markEvent(identifier='id0', uploadData='data0') self.harvester._events.markEvent(identifier='id1', uploadData='data1') self.harvester._events.markEvent(identifier='id9', uploadData='data9') self.harvester._events.markHarvestReady() self.assertEquals(['id0', 'id1', 'id9'], list(self.harvester._events.remainingAdds())) self._prepareHarvester(deleteAll=True).harvest() self.assertEquals(['deleteRecord'] * 3, self.observer.calledMethodNames()) self.assertEquals([], list(self.harvester._events.remainingAdds())) def testDownloadError(self): def downloadBatchRaises(resumptionAttributes): raise IOError('help!') self.observer.methods['downloadBatch'] = downloadBatchRaises try: self.harvester.harvest() self.fail() except IOError: pass persistedState = jsonLoad(open(join(self.tempdir, 'state'))) self.assertEquals({ 'harvestingReady': False, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': True}, persistedState) lastError = open(join(self.tempdir, 'last_error')).read() self.assertTrue('help!' in lastError, lastError) def testConvertError(self): batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1')] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch def convertRaises(record): raise RuntimeError('help!') self.observer.methods['convert'] = convertRaises try: self.harvester.harvest() self.fail() except RuntimeError: pass persistedState = jsonLoad(open(join(self.tempdir, 'state'))) self.assertEquals({ 'harvestingReady': False, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': True}, persistedState) lastError = open(join(self.tempdir, 'last_error')).read() self.assertTrue('help!' in lastError, lastError) def testSkipRecordException(self): batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1')] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch def convertRaises(record): raise SkipRecordException() self.observer.methods['convert'] = convertRaises self.harvester.harvest() self.assertEqual([ 'Harvesting.', "Skipping record 'id0'", "Skipping record 'id1'", "0 added, 0 deleted, 0 unchanged, 2 skipped.", "-", "Finished harvesting.", ""], self.log.getvalue().split("\n")) def testUploadError(self): batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1')] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch def uploadRecordRaises(identifier, data): raise RuntimeError('help!') self.observer.methods['uploadRecord'] = uploadRecordRaises self.observer.returnValues['convert'] = 'converted' try: self.harvester.harvest() self.fail() except RuntimeError: pass persistedState = jsonLoad(open(join(self.tempdir, 'state'))) self.assertEquals({ 'harvestingReady': False, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': True}, persistedState) lastError = open(join(self.tempdir, 'last_error')).read() self.assertTrue('help!' in lastError, lastError) def testOnlyUploadUpdates(self): self.harvester._events.markHarvestStart() self.harvester._events.markEvent(identifier='id0', uploadData='converted.data0') self.harvester._events.markEvent(identifier='id1', uploadData='converted.data1') self.harvester._events.markHarvestReady() batch = Batch() batch.records = [Record('id0', 'data0'), Record('id1', 'data1.changed')] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch self.observer.methods['convert'] =lambda record: 'converted.' + record.data self.harvester.harvest() self.assertEquals(['downloadBatch', 'convert', 'convert', 'uploadRecord'], self.observer.calledMethodNames()) self.assertEquals({'identifier': 'id1', 'data': 'converted.data1.changed'}, self.observer.calledMethods[-1].kwargs) self.assertEquals(['id0', 'id1'], list(self.harvester._events.remainingAdds())) def testDeleteOnlyWhenNotAlready(self): self.harvester._events.markHarvestStart() self.harvester._events.markEvent(identifier='id0', uploadData='converted.data0') self.harvester._events.markEvent(identifier='id1', delete=True) self.harvester._events.markHarvestReady() batch = Batch() batch.records = [Record('id0', delete=True), Record('id1', delete=True)] batch.harvestingReady = True self.observer.methods['downloadBatch'] = lambda **kwargs: batch self.harvester.harvest() self.assertEquals(['downloadBatch', 'deleteRecord'], self.observer.calledMethodNames()) self.assertEquals({'identifier': 'id0'}, self.observer.calledMethods[-1].kwargs) self.assertEquals([], list(self.harvester._events.remainingAdds())) def testDeleteOldIfHarvestingReady(self): JsonDict({ 'harvestingReady': True, 'datetime': '1976-11-08T12:34:56Z', 'resumptionAttributes': None, 'error': False }).dump(join(self.tempdir, 'state')) open(join(self.tempdir, 'current'), 'w').write("") with open(join(self.tempdir, 'previous'), 'w') as f: f.write("id:1\tA\tdatahash\n") f.write("id:2\tA\tdatahash\n") self._prepareHarvester() self.harvester.harvest() self.assertEquals(['deleteRecord', 'deleteRecord'], self.observer.calledMethodNames()) self.assertEquals({'identifier': 'id:1'}, self.observer.calledMethods[0].kwargs) self.assertEquals({'identifier': 'id:2'}, self.observer.calledMethods[1].kwargs)