Exemple #1
0
    def __init__(self, processor, pillow_name, topics, num_processes, process_num, retry_errors=False,
            processor_chunk_size=0):
        change_feed = KafkaChangeFeed(
            topics, client_id=pillow_name, num_processes=num_processes, process_num=process_num
        )
        checkpoint = KafkaPillowCheckpoint(pillow_name, topics)
        event_handler = KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
            checkpoint_callback=processor
        )
        super(ConfigurableReportKafkaPillow, self).__init__(
            name=pillow_name,
            change_feed=change_feed,
            processor=processor,
            checkpoint=checkpoint,
            change_processed_event_handler=event_handler,
            processor_chunk_size=processor_chunk_size
        )
        # set by the superclass constructor
        assert self.processors is not None
        assert len(self.processors) == 1
        self._processor = self.processors[0]
        assert self._processor.bootstrapped is not None

        # retry errors defaults to False because there is not a solution to
        # distinguish between doc save errors and data source config errors
        self.retry_errors = retry_errors
Exemple #2
0
def get_location_pillow(pillow_id='location-ucr-pillow',
                        include_ucrs=None,
                        num_processes=1,
                        process_num=0,
                        ucr_configs=None,
                        **kwargs):
    # Todo; is ucr_division needed?
    change_feed = KafkaChangeFeed([LOCATION_TOPIC],
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('Location'),
            StaticDataSourceProvider('Location')
        ],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[ucr_processor])
def get_group_pillow(pillow_id='group-pillow',
                     num_processes=1,
                     process_num=0,
                     **kwargs):
    """Group pillow

    Processors:
      - :py:class:`corehq.pillows.groups_to_user.GroupsToUsersProcessor`
      - :py:func:`corehq.pillows.group.get_group_to_elasticsearch_processor`
    """
    assert pillow_id == 'group-pillow', 'Pillow ID is not allowed to change'
    to_user_es_processor = GroupsToUsersProcessor()
    to_group_es_processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-users',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint_id = "{}-{}-{}".format(pillow_id, USER_INDEX,
                                      to_group_es_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, [topics.GROUP])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[to_user_es_processor, to_group_es_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
Exemple #4
0
def get_case_messaging_sync_pillow(
        pillow_id='case_messaging_sync_pillow',
        topics=None,
        num_processes=1,
        process_num=0,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
        **kwargs):
    if topics:
        assert set(topics).issubset(
            CASE_TOPICS), set(topics) - set(CASE_TOPICS)
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
    )
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[CaseMessagingSyncProcessor()],
                             processor_chunk_size=processor_chunk_size)
Exemple #5
0
def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None,
                        num_processes=1, process_num=0, ucr_configs=None, **kwargs):
    """Processes updates to locations for UCR

    Note this is only applicable if a domain on the environment has `LOCATIONS_IN_UCR` flag enabled.

    Processors:
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    change_feed = KafkaChangeFeed(
        [LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location')],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=[ucr_processor]
    )
def get_form_submission_metadata_tracker_pillow(pillow_id='FormSubmissionMetadataTrackerPillow',
                                                num_processes=1, process_num=0, **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions.

        Processors:
          - :py:class:`pillowtop.processors.form.FormSubmissionMetadataTrackerProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    change_feed = KafkaChangeFeed(
        topics=topics.FORM_TOPICS, client_id='form-processsor',
        num_processes=num_processes, process_num=process_num
    )
    checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker', topics.FORM_TOPICS)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed,
        ),
    )
Exemple #7
0
def get_form_submission_metadata_tracker_pillow(
        pillow_id='FormSubmissionMetadataTrackerProcessor',
        num_processes=1,
        process_num=0,
        **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions. This could be expanded to be more generic and include
    other processing that needs to happen on each form
    """
    change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                  group_id='form-processsor',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker',
                                       topics.FORM_TOPICS)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
Exemple #8
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """Return a pillow that processes cases. The processors include, UCR and elastic processors

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:function:`corehq.pillows.case_search.get_case_search_processor`
      - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Exemple #9
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, case_search_processor]
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Exemple #10
0
def get_user_sync_history_pillow(
        pillow_id='UpdateUserSyncHistoryPillow', num_processes=1, process_num=0, **kwargs):
    """
    This gets a pillow which iterates through all synclogs
    """
    change_feed = KafkaChangeFeed(
        topics=[topics.SYNCLOG_SQL], client_id=SYNCLOG_SQL_USER_SYNC_GROUP_ID,
        num_processes=num_processes, process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [topics.SYNCLOG_SQL])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=UserSyncHistoryProcessor(),
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Exemple #11
0
def get_user_sync_history_pillow(
        pillow_id='UpdateUserSyncHistoryPillow', num_processes=1, process_num=0, **kwargs):
    """Synclog pillow

    Processors:
      - :py:func:`corehq.pillows.synclog.UserSyncHistoryProcessor`
    """
    change_feed = KafkaChangeFeed(
        topics=[topics.SYNCLOG_SQL], client_id=SYNCLOG_SQL_USER_SYNC_GROUP_ID,
        num_processes=num_processes, process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [topics.SYNCLOG_SQL])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=UserSyncHistoryProcessor(),
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
Exemple #12
0
 def __init__(self, processor, pillow_name, topics, num_processes,
              process_num):
     change_feed = PartitionedKafkaChangeFeed(topics,
                                              group_id=pillow_name,
                                              num_processes=num_processes,
                                              process_num=process_num)
     checkpoint = KafkaPillowCheckpoint(pillow_name, topics)
     event_handler = KafkaCheckpointEventHandler(checkpoint=checkpoint,
                                                 checkpoint_frequency=1000,
                                                 change_feed=change_feed)
     super(ConfigurableReportKafkaPillow,
           self).__init__(name=pillow_name,
                          change_feed=change_feed,
                          processor=processor,
                          checkpoint=checkpoint,
                          change_processed_event_handler=event_handler)
     # set by the superclass constructor
     assert self.processors is not None
     assert len(self.processors) == 1
     self._processor = self.processors[0]
     assert self._processor.bootstrapped is not None
Exemple #13
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     **kwargs):
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)

    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
    )
    xform_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = ElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=processors,
                             processor_chunk_size=processor_chunk_size)
Exemple #14
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     dedicated_migration_process=False,
                     **kwargs):
    """Generic XForm change processor

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:class:`corehq.pillows.user.UnknownUsersProcessor` (disabled when RUN_UNKNOWN_USER_PILLOW=False)
      - :py:class:`pillowtop.form.FormSubmissionMetadataTrackerProcessor` (disabled when RUN_FORM_META_PILLOW=False)
      - :py:class:`corehq.apps.data_interfaces.pillow.CaseDeduplicationPillow``
    """
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(
        topics,
        client_id=pillow_id,
        num_processes=num_processes,
        process_num=process_num,
        dedicated_migration_process=dedicated_migration_process)

    ucr_processor = get_ucr_processor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
        ucr_configs=ucr_configs)

    xform_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
        change_filter_fn=is_couch_change_for_sql_domain)
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if settings.RUN_DEDUPLICATION_PILLOW:
        processors.append(CaseDeduplicationProcessor())

    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = BulkElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter,
            change_filter_fn=is_couch_change_for_sql_domain)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)

    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size,
        process_num=process_num,
        is_dedicated_migration_process=dedicated_migration_process
        and (process_num == 0))