def test_checkpoint_with_multiple_topics(self): feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed') pillow_name = 'test-multi-topic-checkpoints' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) offsets = feed.get_latest_offsets() self.assertEqual(set([(topics.FORM, 0), (topics.CASE, 0)]), set(offsets.keys())) # send a few changes to kafka so they should be picked up by the pillow publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(since=offsets, forever=False) self.assertEqual(4, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence()) publish_stub_change(topics.FORM) publish_stub_change(topics.FORM) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE) publish_stub_change(topics.CASE_SQL) pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(8, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
def test_dont_create_checkpoint_past_current(self): pillow_name = 'test-checkpoint-reset' # initialize change feed and pillow feed = KafkaChangeFeed(topics=topics.USER_TOPICS, group_id='test-kafka-feed') checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = CountingProcessor() pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed ) ) original_kafka_offsets = feed.get_latest_offsets() current_kafka_offsets = deepcopy(original_kafka_offsets) self.assertEqual(feed.get_current_checkpoint_offsets(), {}) self.assertEqual(pillow.get_last_checkpoint_sequence(), {}) publish_stub_change(topics.COMMCARE_USER) # the following line causes tests to fail if you have multiple partitions current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1 pillow.process_changes(since=original_kafka_offsets, forever=False) self.assertEqual(1, processor.count) self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
def get_change_feed_pillow_for_db(pillow_id, couch_db, default_topic=None): """Generic pillow for inserting Couch documents into Kafka. Reads from: - CouchDB Writes to: - Kafka """ processor = KafkaProcessor( data_source_type=data_sources.SOURCE_COUCH, data_source_name=couch_db.dbname, default_topic=default_topic, ) change_feed = CouchChangeFeed(couch_db) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def GetDocPillow(): return FakeConstructedPillow( name='GetDocPillow', checkpoint=PillowCheckpoint('get_doc_processor', 'text'), change_feed=RandomChangeFeed(10), processor=GetDocProcessor(), )
def get_form_submission_metadata_tracker_pillow( pillow_id='FormSubmissionMetadataTrackerProcessor', num_processes=1, process_num=0, **kwargs): """ This gets a pillow which iterates through all forms and marks the corresponding app as having submissions. This could be expanded to be more generic and include other processing that needs to happen on each form """ change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL], group_id='form-processsor', num_processes=num_processes, process_num=process_num) checkpoint = PillowCheckpoint('form-submission-metadata-tracker', change_feed.sequence_format) form_processor = FormSubmissionMetadataTrackerProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
def __init__(self, indicator_name, kafka_topic, processor, domains=None, doc_type=None): self.kafka_topic = kafka_topic self.domains = domains or processor.domains self.doc_type = doc_type or processor.doc_type change_feed = KafkaChangeFeed(topics=[self.kafka_topic], group_id=indicator_name) name = '{}Pillow'.format(indicator_name) checkpoint = PillowCheckpoint( 'fluff.{}.{}'.format(name, get_machine_id()), change_feed.sequence_format) super(FluffPillow, self).__init__( name=name, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed))
def get_main_blob_deletion_pillow(pillow_id): """Get blob deletion pillow for the main couch database Using the KafkaChangeFeed ties this to the main couch database. """ return _get_blob_deletion_pillow( pillow_id, get_db(None), PillowCheckpoint('kafka-blob-deletion-pillow-checkpoint'), KafkaChangeFeed(topics=[topics.META], group_id='blob-deletion-group'), )
def make_fake_constructed_pillow(pillow_id, checkpoint_id): from pillowtop.feed.mock import RandomChangeFeed from pillowtop.processors import LoggingProcessor pillow = FakeConstructedPillow( name=pillow_id, checkpoint=PillowCheckpoint(checkpoint_id, 'text'), change_feed=RandomChangeFeed(10), processor=LoggingProcessor(), ) return pillow
def _make_couch_pillow(couch_db): from pillowtop.feed.couch import CouchChangeFeed from pillowtop.processors import LoggingProcessor from pillowtop.checkpoints.manager import PillowCheckpoint pillow = FakeConstructedPillow( name='fake-couch-pillow', checkpoint=PillowCheckpoint('fake-feed-test-checkpoint', 'text'), change_feed=CouchChangeFeed(couch_db=couch_db), processor=LoggingProcessor(), ) pillow.process_change = MagicMock(return_value=True) return pillow
def get_change_feed_pillow_for_db(pillow_id, couch_db): processor = KafkaProcessor( data_source_type=data_sources.SOURCE_COUCH, data_source_name=couch_db.dbname ) change_feed = CouchChangeFeed(couch_db) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def get_change_feed_pillow_for_db(pillow_id, couch_db): kafka_client = get_kafka_client_or_none() processor = KafkaProcessor( kafka_client, data_source_type=data_sources.COUCH, data_source_name=couch_db.dbname ) change_feed = CouchChangeFeed(couch_db, include_docs=True) checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, ), )
def __init__(self, processor, pillow_name): change_feed = KafkaChangeFeed(topics.ALL, group_id=pillow_name) checkpoint = PillowCheckpoint(pillow_name) event_handler = MultiTopicCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed ) super(ConfigurableReportKafkaPillow, self).__init__( name=pillow_name, change_feed=change_feed, processor=processor, checkpoint=checkpoint, change_processed_event_handler=event_handler ) # set by the superclass constructor assert self._processor is not None assert self._processor.bootstrapped is not None
def get_user_sync_history_pillow(pillow_id='UpdateUserSyncHistoryPillow', **kwargs): """ This gets a pillow which iterates through all synclogs """ couch_db = SyncLog.get_db() change_feed = CouchChangeFeed(couch_db, include_docs=True) checkpoint = PillowCheckpoint('synclog', change_feed.sequence_format) form_processor = UserSyncHistoryProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def _get_mvp_indicator_pillow(pillow_id, processor): checkpoint = PillowCheckpoint( 'mvp_docs.pillows.{}.{}'.format(pillow_id, get_machine_id()), ) feed = CouchChangeFeed(XFormInstance.get_db(), include_docs=True, couch_filter='hqadmin/domains_and_doc_types', extra_couch_view_params={ 'domains': ' '.join(processor.domains), 'doc_types': ' '.join(processor.doc_types), }) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100), )
def _get_blob_deletion_pillow(pillow_id, couch_db, checkpoint=None, change_feed=None): if checkpoint is None: checkpoint = PillowCheckpoint(pillow_id) if change_feed is None: change_feed = CouchChangeFeed(couch_db, include_docs=False) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=BlobDeletionProcessor(get_blob_db(), couch_db.dbname), change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=KAFKA_CHECKPOINT_FREQUENCY, ), )
def __init__(self, indicator_class, processor): self.indicator_class = indicator_class self.kafka_topic = indicator_class().kafka_topic self.domains = processor.domains self.doc_type = processor.doc_type name = '{}Pillow'.format(indicator_class.__name__) checkpoint = PillowCheckpoint('fluff.{}.{}'.format(name, get_machine_id())) super(FluffPillow, self).__init__( name=name, checkpoint=checkpoint, change_feed=KafkaChangeFeed(topics=[self.kafka_topic], group_id=indicator_class.__name__), processor=processor, change_processed_event_handler=PillowCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, ) )
def test_basic(self): # setup feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed') pillow_name = 'test-chunked-processing' checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format) processor = ChunkedCountProcessor() original_process_change = processor.process_change original_process_changes_chunk = processor.process_changes_chunk pillow = ConstructedPillow( name=pillow_name, checkpoint=checkpoint, change_feed=feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed), processor_chunk_size=2) since = feed.get_latest_offsets() self._produce_changes(2) # pillow should use process_changes_chunk (make process_change raise an exception for test) processor.process_change = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=since, forever=False) self.assertEqual(processor.count, 2) self._produce_changes(2) # if process_changes_chunk raises exception, pillow should use process_change processor.process_change = original_process_change processor.process_changes_chunk = MagicMock(side_effect=Exception('_')) pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 4) self._produce_changes(1) # offsets after full chunk should still be processed processor.process_change = MagicMock(side_effect=Exception('_')) processor.process_changes_chunk = original_process_changes_chunk pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False) self.assertEqual(processor.count, 5)
def get_app_form_submission_tracker_pillow( pillow_id='AppFormSubmissionTrackerPillow'): """ This gets a pillow which iterates through all forms and marks the corresponding app as having submissions. This could be expanded to be more generic and include other processing that needs to happen on each form """ checkpoint = PillowCheckpoint('app-form-submission-tracker') form_processor = AppFormSubmissionTrackerProcessor() change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL], group_id='form-processsor') return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=MultiTopicCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
def checkpoint(self): return PillowCheckpoint(self._checkpoint_id, 'text')
def test_checkpoint_id(self): checkpoint_id = 'test-checkpoint-id' self.assertEqual(checkpoint_id, PillowCheckpoint(checkpoint_id, 'text').checkpoint_id)
def __init__(self): super(FakePillow, self).__init__('fake pillow', PillowCheckpoint('test_pillow_import', 'text'), RandomChangeFeed(10), LoggingProcessor())