def get_form_submission_metadata_tracker_pillow( pillow_id='FormSubmissionMetadataTrackerProcessor', num_processes=1, process_num=0, **kwargs): """ This gets a pillow which iterates through all forms and marks the corresponding app as having submissions. This could be expanded to be more generic and include other processing that needs to happen on each form """ change_feed = KafkaChangeFeed(topics=[topics.FORM, topics.FORM_SQL], group_id='form-processsor', num_processes=num_processes, process_num=process_num) checkpoint = PillowCheckpoint('form-submission-metadata-tracker', change_feed.sequence_format) form_processor = FormSubmissionMetadataTrackerProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
def get_form_submission_metadata_tracker_pillow(pillow_id='FormSubmissionMetadataTrackerPillow', num_processes=1, process_num=0, **kwargs): """ This gets a pillow which iterates through all forms and marks the corresponding app as having submissions. Processors: - :py:class:`pillowtop.processors.form.FormSubmissionMetadataTrackerProcessor` """ # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ change_feed = KafkaChangeFeed( topics=topics.FORM_TOPICS, client_id='form-processsor', num_processes=num_processes, process_num=process_num ) checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker', topics.FORM_TOPICS) form_processor = FormSubmissionMetadataTrackerProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
def __init__(self, doc_provider, data_source_type, data_source_name, chunk_size=1000, reset=False): self.reset = reset self.doc_provider = doc_provider self.chunk_size = chunk_size self.doc_processor = AppFormSubmissionReindexDocProcessor( FormSubmissionMetadataTrackerProcessor(), data_source_type, data_source_name, )
def get_form_submission_metadata_tracker_pillow(pillow_id='FormSubmissionMetadataTrackerProcessor', num_processes=1, process_num=0, **kwargs): """ # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/ This gets a pillow which iterates through all forms and marks the corresponding app as having submissions. This could be expanded to be more generic and include other processing that needs to happen on each form """ change_feed = KafkaChangeFeed( topics=topics.FORM_TOPICS, client_id='form-processsor', num_processes=num_processes, process_num=process_num ) checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker', topics.FORM_TOPICS) form_processor = FormSubmissionMetadataTrackerProcessor() return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=form_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
def __init__(self, doc_provider, chunk_size=1000, reset=False): self.reset = reset self.doc_provider = doc_provider self.chunk_size = chunk_size self.doc_processor = UserAppFormSubmissionDocProcessor(FormSubmissionMetadataTrackerProcessor())
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed(topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ) xform_to_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = ElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size)
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, dedicated_migration_process=False, **kwargs): """Generic XForm change processor Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True) - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:class:`corehq.pillows.user.UnknownUsersProcessor` (disabled when RUN_UNKNOWN_USER_PILLOW=False) - :py:class:`pillowtop.form.FormSubmissionMetadataTrackerProcessor` (disabled when RUN_FORM_META_PILLOW=False) - :py:class:`corehq.apps.data_interfaces.pillow.CaseDeduplicationPillow`` """ # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num, dedicated_migration_process=dedicated_migration_process) ucr_processor = get_ucr_processor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ucr_configs=ucr_configs) xform_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, change_filter_fn=is_couch_change_for_sql_domain) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if settings.RUN_DEDUPLICATION_PILLOW: processors.append(CaseDeduplicationProcessor()) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter, change_filter_fn=is_couch_change_for_sql_domain) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size, process_num=process_num, is_dedicated_migration_process=dedicated_migration_process and (process_num == 0))