def test_dont_create_checkpoint_past_current(self): pillow_name = 'test-checkpoint-reset' # initialize change feed and pillow feed = KafkaChangeFeed(topics=topics.USER_TOPICS, group_id='test-kafka-feed') checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) original_kafka_offsets = feed.get_latest_offsets() current_kafka_offsets = deepcopy(original_kafka_offsets) self.assertEqual(feed.get_current_checkpoint_offsets(), {}) self.assertEqual(pillow.get_last_checkpoint_sequence(), {}) publish_stub_change(topics.COMMCARE_USER) # the following line causes tests to fail if you have multiple partitions current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1 pillow.process_changes(since=original_kafka_offsets, forever=False) self.assertEqual(1, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
def test_dont_create_checkpoint_past_current(self): pillow_name = 'test-checkpoint-reset' # initialize change feed and pillow feed = KafkaChangeFeed(topics=topics.USER_TOPICS, client_id='test-kafka-feed') checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) original_kafka_offsets = feed.get_latest_offsets() current_kafka_offsets = deepcopy(original_kafka_offsets) self.assertEqual(feed.get_current_checkpoint_offsets(), {}) self.assertEqual(pillow.get_last_checkpoint_sequence(), {}) publish_stub_change(topics.COMMCARE_USER) # the following line causes tests to fail if you have multiple partitions current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1 pillow.process_changes(since=original_kafka_offsets, forever=False) self.assertEqual(1, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
def test_checkpoint_with_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') pillow_name = 'test-multi-topic-checkpoints' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) offsets = feed.get_latest_offsets() self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys())) # send a few changes to kafka so they should be picked up by the pillow publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(since=offsets, forever=False) self.assertEqual(4, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence()) publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(8, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
def test_checkpoint_with_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed') pillow_name = 'test-multi-topic-checkpoints' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) offsets = feed.get_latest_offsets() self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys())) # send a few changes to kafka so they should be picked up by the pillow publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(since=offsets, forever=False) self.assertEqual(4, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence()) publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(8, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
def test_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = feed.get_latest_offsets() expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)] changes = list(feed.iter_changes(since=offsets, forever=False)) self.assertEqual(2, len(changes)) found_change_ids = set([change.id for change in changes]) self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids) for unexpected in unexpected_metas: self.assertTrue(unexpected.document_id not in found_change_ids)
def test_multiple_topics_with_partial_checkpoint(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = {'form': feed.get_latest_offsets()['form']} expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] changes = list(feed.iter_changes(since=offsets, forever=False)) # should include at least the form and the case (may have more than one case since not # specifying a checkpoint rewinds it to the beginning of the feed) self.assertTrue(len(changes) > 1) found_change_ids = set([change.id for change in changes]) for expected_id in set([meta.document_id for meta in expected_metas]): self.assertTrue(expected_id in found_change_ids)
def test_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], client_id='test-kafka-feed') self.assertEqual(0, len(list(feed.iter_changes(since=None, forever=False)))) offsets = feed.get_latest_offsets() expected_metas = [publish_stub_change(topics.FORM), publish_stub_change(topics.CASE)] unexpected_metas = [publish_stub_change(topics.FORM_SQL), publish_stub_change(topics.CASE_SQL)] changes = list(feed.iter_changes(since=offsets, forever=False)) self.assertEqual(2, len(changes)) found_change_ids = set([change.id for change in changes]) self.assertEqual(set([meta.document_id for meta in expected_metas]), found_change_ids) for unexpected in unexpected_metas: self.assertTrue(unexpected.document_id not in found_change_ids)
def test_basic(self): # setup feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed') pillow_name = 'test-chunked-processing' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = ChunkedCountProcessor() original_process_change = processor.process_change original_process_changes_chunk = processor.process_changes_chunk pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed), processor_chunk_size=2) since = feed.get_latest_offsets() self._produce_changes(2) # pillow should use process_changes_chunk (make process_change raise an exception for test) processor.process_change = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=since, forever=False) self.assertEqual(processor.count, 2) self._produce_changes(2) # if process_changes_chunk raises exception, pillow should use process_change processor.process_change = original_process_change processor.process_changes_chunk = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 4) self._produce_changes(1) # offsets after full chunk should still be processed processor.process_change = MagicMock(side_effect=Exception('_')) processor.process_changes_chunk = original_process_changes_chunk pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 5)
def test_basic(self): # setup feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed') pillow_name = 'test-chunked-processing' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = ChunkedCountProcessor() original_process_change = processor.process_change original_process_changes_chunk = processor.process_changes_chunk pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ), processor_chunk_size=2 ) since = feed.get_latest_offsets() self._produce_changes(2) # pillow should use process_changes_chunk (make process_change raise an exception for test) processor.process_change = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=since, forever=False) self.assertEqual(processor.count, 2) self._produce_changes(2) # if process_changes_chunk raises exception, pillow should use process_change processor.process_change = original_process_change processor.process_changes_chunk = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 4) self._produce_changes(1) # offsets after full chunk should still be processed processor.process_change = MagicMock(side_effect=Exception('_')) processor.process_changes_chunk = original_process_changes_chunk pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 5)