class BaseCouchDocProcessorTest(SimpleTestCase):
    processor_class = None

    @staticmethod
    def _get_row(ident, doc_type="Bar"):
        doc_id_prefix = '{}-'.format(doc_type.lower())
        doc_id = '{}{}'.format(doc_id_prefix, ident)
        return {
            'id': doc_id,
            'key': [doc_type, doc_id], 'value': None, 'doc': {'_id': doc_id, 'doc_type': doc_type}
        }

    def _get_view_results(self, total, chuck_size, doc_type="Bar"):
        doc_id_prefix = '{}-'.format(doc_type.lower())
        results = [(
            {"endkey": [doc_type, {}], "group_level": 1, "reduce": True, "startkey": [doc_type]},
            [{"key": doc_type, "value": total}]
        )]
        for chunk in chunked(list(range(total)), chuck_size):
            chunk_rows = [self._get_row(ident, doc_type=doc_type) for ident in chunk]
            if chunk[0] == 0:
                results.append((
                    {
                        'startkey': [doc_type], 'endkey': [doc_type, {}], 'reduce': False,
                        'limit': chuck_size, 'include_docs': True
                    },
                    chunk_rows
                ))
            else:
                previous = '{}{}'.format(doc_id_prefix, chunk[0] - 1)
                results.append((
                    {
                        'endkey': [doc_type, {}], 'skip': 1, 'startkey_docid': previous, 'reduce': False,
                        'startkey': [doc_type, previous], 'limit': chuck_size, 'include_docs': True
                    },
                    chunk_rows
                ))

        return results

    def setUp(self):
        views = {
            "all_docs/by_doc_type": self._get_view_results(4, chuck_size=2)
        }
        docs = [self._get_row(ident)['doc'] for ident in range(4)]
        self.db = FakeCouchDb(views=views, docs={
            doc['_id']: doc for doc in docs
        })
        Bar.set_db(self.db)
        self.processor_slug = uuid.uuid4().hex

    def tearDown(self):
        self.db.reset()

    def _get_processor(self, chunk_size=2, ignore_docs=None, skip_docs=None, reset=False, doc_types=None):
        doc_types = doc_types or [Bar]
        doc_processor = DemoProcessor()
        doc_provider = CouchDocumentProvider(self.processor_slug, doc_types)
        processor = self.processor_class(
            doc_provider,
            doc_processor,
            chunk_size=chunk_size,
            reset=reset
        )
        processor.document_iterator.couch_db = self.db
        if ignore_docs:
            doc_processor.ignore_docs = ignore_docs
        if skip_docs:
            doc_processor.skip_docs = skip_docs
        return doc_processor, processor
Beispiel #2
0
class TestResumableFunctionIterator(SimpleTestCase):

    def setUp(self):
        self.couch_db = FakeCouchDb()
        self.batches = [
            list(range(0, 3)),
            list(range(3, 6)),
            list(range(6, 8)),
        ]
        self.all_items = list(itertools.chain(*self.batches))
        self.itr = self.get_iterator()

    def tearDown(self):
        self.couch_db.reset()

    def get_iterator(self, missing_items=None):
        def data_provider(batch_number):
            try:
                return self.batches[batch_number]
            except IndexError:
                return []

        itr = ResumableFunctionIterator('test', data_provider, TestArgsProvider())
        itr.couch_db = self.couch_db
        return itr

    def test_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)

    def test_resume_iteration(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual([item for item in self.itr], self.all_items[3:])

    def test_resume_iteration_after_exhaustion(self):
        itr = iter(self.itr)
        self.assertEqual(list(itr), self.all_items)
        # resume iteration
        self.batches.append([8, 9])
        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), [8, 9])

    def test_resume_iteration_after_legacy_completion(self):
        itr = iter(self.itr)
        self.assertEqual(list(itr), self.all_items)
        state = self.itr.state
        state.complete = True
        state.args = state.kwargs = None
        self.itr._save_state()
        # attempt to resume yields no new items
        self.batches.append([8, 9])
        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), [])

    def test_resume_iteration_after_complete_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)
        # resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), [])

    def test_discard_state(self):
        self.assertEqual(list(self.itr), self.all_items)
        self.itr.discard_state()

        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), self.all_items)

    def test_iteration_with_iterator_detail(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        self.assertEqual(self.itr.get_iterator_detail('progress'), None)
        self.itr.set_iterator_detail('progress', {"visited": 6})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'), {"visited": 6})
        self.itr.set_iterator_detail('progress', {"visited": "six"})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'), {"visited": "six"})
        self.assertEqual([item for item in self.itr], self.all_items[3:])
Beispiel #3
0
class TestResumableFunctionIterator(SimpleTestCase):

    def setUp(self):
        self.couch_db = FakeCouchDb()
        self.batches = [
            list(range(0, 3)),
            list(range(3, 6)),
            list(range(6, 8)),
        ]
        self.all_items = list(itertools.chain(*self.batches))
        self.itr = self.get_iterator()

    def tearDown(self):
        self.couch_db.reset()

    def get_iterator(self, missing_items=None):
        def data_provider(batch_number):
            try:
                return self.batches[batch_number]
            except IndexError:
                return []

        def item_getter(item_id):
            if missing_items and item_id in missing_items:
                return None
            return int(item_id)

        itr = ResumableFunctionIterator('test', data_provider, TestArgsProvider(), item_getter)
        itr.couch_db = self.couch_db
        return itr

    def test_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)

    def test_resume_iteration(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual([item for item in self.itr], self.all_items[3:])

    def test_resume_iteration_after_complete_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)
        # resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), [])

    def test_discard_state(self):
        self.assertEqual(list(self.itr), self.all_items)
        self.itr.discard_state()

        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), self.all_items)

    def test_iteration_with_iterator_detail(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        self.assertEqual(self.itr.get_iterator_detail('progress'), None)
        self.itr.set_iterator_detail('progress', {"visited": 6})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'), {"visited": 6})
        self.itr.set_iterator_detail('progress', {"visited": "six"})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'), {"visited": "six"})
        self.assertEqual([item for item in self.itr], self.all_items[3:])

    def test_iteration_with_retry(self):
        itr = iter(self.itr)
        item = next(itr)
        self.itr.retry(str(item))
        self.assertEqual(item, 0)
        self.assertEqual([0] + [d for d in itr],
                         self.all_items + [0])

    def test_iteration_complete_after_retry(self):
        itr = iter(self.itr)
        self.itr.retry(str(next(itr)))
        list(itr)
        self.itr = self.get_iterator()
        self.assertEqual([item for item in self.itr], [])

    def test_iteration_with_max_retry(self):
        itr = iter(self.itr)
        item = next(itr)
        ids = [item]
        self.assertEqual(item, 0)
        self.itr.retry(str(item))
        retries = 1
        for item in itr:
            ids.append(item)
            if item == 0:
                if retries < 3:
                    self.itr.retry(str(item))
                    retries += 1
                else:
                    break
        self.assertEqual(item, 0)
        with self.assertRaises(TooManyRetries):
            self.itr.retry(str(item))
        self.assertEqual(ids, self.all_items + [0, 0, 0])
        self.assertEqual(list(itr), [])
        self.assertEqual(list(self.get_iterator()), [])

    def test_iteration_with_missing_retry_doc(self):
        iterator = self.get_iterator(missing_items=["0"])
        itr = iter(iterator)
        item = next(itr)
        self.assertEqual(item, 0)
        iterator.retry(str(item))
        self.assertEqual([0] + [d for d in itr], self.all_items)
Beispiel #4
0
class TestResumableFunctionIterator(SimpleTestCase):
    def setUp(self):
        self.couch_db = FakeCouchDb()
        self.batches = [
            list(range(0, 3)),
            list(range(3, 6)),
            list(range(6, 8)),
        ]
        self.all_items = list(itertools.chain(*self.batches))
        self.itr = self.get_iterator()

    def tearDown(self):
        self.couch_db.reset()

    def get_iterator(self, missing_items=None):
        def data_provider(batch_number):
            try:
                return self.batches[batch_number]
            except IndexError:
                return []

        def item_getter(item_id):
            if missing_items and item_id in missing_items:
                return None
            return int(item_id)

        itr = ResumableFunctionIterator('test', data_provider,
                                        TestArgsProvider(), item_getter)
        itr.couch_db = self.couch_db
        return itr

    def test_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)

    def test_resume_iteration(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual([item for item in self.itr], self.all_items[3:])

    def test_resume_iteration_after_complete_iteration(self):
        self.assertEqual(list(self.itr), self.all_items)
        # resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), [])

    def test_discard_state(self):
        self.assertEqual(list(self.itr), self.all_items)
        self.itr.discard_state()

        self.itr = self.get_iterator()
        self.assertEqual(list(self.itr), self.all_items)

    def test_iteration_with_iterator_detail(self):
        itr = iter(self.itr)
        self.assertEqual([next(itr) for i in range(6)], self.all_items[:6])
        self.assertEqual(self.itr.get_iterator_detail('progress'), None)
        self.itr.set_iterator_detail('progress', {"visited": 6})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'),
                         {"visited": 6})
        self.itr.set_iterator_detail('progress', {"visited": "six"})
        # stop/resume iteration
        self.itr = self.get_iterator()
        self.assertEqual(self.itr.get_iterator_detail('progress'),
                         {"visited": "six"})
        self.assertEqual([item for item in self.itr], self.all_items[3:])

    def test_iteration_with_retry(self):
        itr = iter(self.itr)
        item = next(itr)
        self.itr.retry(str(item))
        self.assertEqual(item, 0)
        self.assertEqual([0] + [d for d in itr], self.all_items + [0])

    def test_iteration_complete_after_retry(self):
        itr = iter(self.itr)
        self.itr.retry(str(next(itr)))
        list(itr)
        self.itr = self.get_iterator()
        self.assertEqual([item for item in self.itr], [])

    def test_iteration_with_max_retry(self):
        itr = iter(self.itr)
        item = next(itr)
        ids = [item]
        self.assertEqual(item, 0)
        self.itr.retry(str(item))
        retries = 1
        for item in itr:
            ids.append(item)
            if item == 0:
                if retries < 3:
                    self.itr.retry(str(item))
                    retries += 1
                else:
                    break
        self.assertEqual(item, 0)
        with self.assertRaises(TooManyRetries):
            self.itr.retry(str(item))
        self.assertEqual(ids, self.all_items + [0, 0, 0])
        self.assertEqual(list(itr), [])
        self.assertEqual(list(self.get_iterator()), [])

    def test_iteration_with_missing_retry_doc(self):
        iterator = self.get_iterator(missing_items=["0"])
        itr = iter(iterator)
        item = next(itr)
        self.assertEqual(item, 0)
        iterator.retry(str(item))
        self.assertEqual([0] + [d for d in itr], self.all_items)