def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, group_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
    def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, client_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
    def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_latest_offsets()
        self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
    def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_latest_offsets()
        self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_latest_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
Esempio n. 6
0
 def test_multiple_topics_with_partial_checkpoint(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = {'form': feed.get_latest_offsets()['form']}
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     # should include at least the form and the case (may have more than one case since not
     # specifying a checkpoint rewinds it to the beginning of the feed)
     self.assertTrue(len(changes) > 1)
     found_change_ids = set([change.id for change in changes])
     for expected_id in set([meta.document_id for meta in expected_metas]):
         self.assertTrue(expected_id in found_change_ids)
 def test_multiple_topics(self):
     feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed')
     self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False))))
     offsets = feed.get_latest_offsets()
     expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)]
     unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)]
     changes = list(feed.iter_changes(since=offsets, forever=False))
     self.assertEqual(2, len(changes))
     found_change_ids = set([change.id for change in changes])
     self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids)
     for unexpected in unexpected_metas:
         self.assertTrue(unexpected.document_id not in found_change_ids)
Esempio n. 8
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE],
                               client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1,
                change_feed=feed),
            processor_chunk_size=2)

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 5)
Esempio n. 9
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            ),
            processor_chunk_size=2
        )

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 5)