Beispiel #1
0
def _get_pillow(configs, processor_chunk_size=0):
    pillow = get_case_pillow(processor_chunk_size=processor_chunk_size)
    # overwrite processors since we're only concerned with UCR here
    ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[])
    ucr_processor.bootstrap(configs)
    pillow.processors = [ucr_processor]
    return pillow
Beispiel #2
0
def _get_pillow(configs, processor_chunk_size=0):
    pillow = get_case_pillow(processor_chunk_size=processor_chunk_size)
    # overwrite processors since we're only concerned with UCR here
    ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[])
    ucr_processor.bootstrap(configs)
    pillow.processors = [ucr_processor]
    return pillow
Beispiel #3
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """Return a pillow that processes cases. The processors include, UCR and elastic processors

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:function:`corehq.pillows.case_search.get_case_search_processor`
      - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Beispiel #4
0
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None,
                     include_ucrs=None, exclude_ucrs=None,
                     num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )

    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    xform_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    processors = [xform_to_es_processor,
        form_meta_processor, unknown_user_form_processor]
    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = ElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter
        )
        processors = [xform_to_report_es_processor] + processors
    if not skip_ucr:
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Beispiel #5
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, case_search_processor]
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Beispiel #6
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, case_search_processor]
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Beispiel #7
0
def get_user_pillow(pillow_id='user-pillow',
                    num_processes=1,
                    process_num=0,
                    skip_ucr=False,
                    **kwargs):
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(data_source_providers=[
        DynamicDataSourceProvider(),
        StaticDataSourceProvider()
    ], )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor]
        if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
Beispiel #8
0
def get_user_pillow(pillow_id='user-pillow', num_processes=1, process_num=0,
        skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs):
    """Processes users and sends them to ES and UCRs.

    Processors:
      - :py:func:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareUser'), StaticDataSourceProvider('CommCareUser')],
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    change_feed = KafkaChangeFeed(
        topics=topics.USER_TOPICS, client_id='users-to-es', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor] if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
        processor_chunk_size=processor_chunk_size
    )
Beispiel #9
0
 def test_get_docs(self):
     docs = [
         get_sample_doc_and_indicators(self.fake_time_now)[0]
         for i in range(10)
     ]
     feed = self.pillow.get_change_feed()
     since = feed.get_latest_offsets()
     cases = self._create_cases(docs=docs)
     changes = list(feed.iter_changes(since, forever=False))
     bad_changes, result_docs = ConfigurableReportPillowProcessor.get_docs_for_changes(
         changes, docs[1]['domain'])
     self.assertEqual(set([c.id for c in changes]),
                      set([doc['_id'] for doc in result_docs]))
Beispiel #10
0
 def test_get_docs(self):
     docs = [
         get_sample_doc_and_indicators(self.fake_time_now)[0]
         for i in range(10)
     ]
     feed = self.pillow.get_change_feed()
     since = feed.get_latest_offsets()
     cases = self._create_cases(docs=docs)
     changes = list(feed.iter_changes(since, forever=False))
     bad_changes, result_docs = ConfigurableReportPillowProcessor.get_docs_for_changes(
         changes, docs[1]['domain'])
     self.assertEqual(
         set([c.id for c in changes]),
         set([doc['_id'] for doc in result_docs])
     )
Beispiel #11
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     **kwargs):
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)

    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
    )
    xform_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = ElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=processors,
                             processor_chunk_size=processor_chunk_size)