예제 #1
0
    def test_dont_create_checkpoint_past_current(self):
        pillow_name = 'test-checkpoint-reset'

        # initialize change feed and pillow
        feed = KafkaChangeFeed(topics=topics.USER_TOPICS, client_id='test-kafka-feed')
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )

        original_kafka_offsets = feed.get_latest_offsets()
        current_kafka_offsets = deepcopy(original_kafka_offsets)
        self.assertEqual(feed.get_current_checkpoint_offsets(), {})
        self.assertEqual(pillow.get_last_checkpoint_sequence(), {})

        publish_stub_change(topics.COMMCARE_USER)
        # the following line causes tests to fail if you have multiple partitions
        current_kafka_offsets[(topics.COMMCARE_USER, 0)] += 1
        pillow.process_changes(since=original_kafka_offsets, forever=False)
        self.assertEqual(1, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), current_kafka_offsets)
    def test_checkpoint_with_multiple_topics(self):
        feed = KafkaChangeFeed(topics=[topics.FORM, topics.CASE], group_id='test-kafka-feed')
        pillow_name = 'test-multi-topic-checkpoints'
        checkpoint = PillowCheckpoint(pillow_name)
        processor = CountingProcessor()
        pillow = ConstructedPillow(
            name=pillow_name,
            document_store=None,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=MultiTopicCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            )
        )
        offsets = feed.get_current_offsets()
        self.assertEqual(set([topics.FORM, topics.CASE]), set(offsets.keys()))

        # send a few changes to kafka so they should be picked up by the pillow
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(since=offsets, forever=False)
        self.assertEqual(4, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.FORM)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE)
        publish_stub_change(topics.CASE_SQL)
        pillow.process_changes(pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(8, processor.count)
        self.assertEqual(feed.get_current_checkpoint_offsets(), pillow.get_last_checkpoint_sequence())
def get_group_pillow(pillow_id='group-pillow',
                     num_processes=1,
                     process_num=0,
                     **kwargs):
    """Group pillow

    Processors:
      - :py:class:`corehq.pillows.groups_to_user.GroupsToUsersProcessor`
      - :py:func:`corehq.pillows.group.get_group_to_elasticsearch_processor`
    """
    assert pillow_id == 'group-pillow', 'Pillow ID is not allowed to change'
    to_user_es_processor = GroupsToUsersProcessor()
    to_group_es_processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-users',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint_id = "{}-{}-{}".format(pillow_id, USER_INDEX,
                                      to_group_es_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, [topics.GROUP])
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[to_user_es_processor, to_group_es_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
예제 #4
0
def get_location_pillow(pillow_id='location-ucr-pillow',
                        include_ucrs=None,
                        num_processes=1,
                        process_num=0,
                        ucr_configs=None,
                        **kwargs):
    # Todo; is ucr_division needed?
    change_feed = KafkaChangeFeed([LOCATION_TOPIC],
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('Location'),
            StaticDataSourceProvider('Location')
        ],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[ucr_processor])
예제 #5
0
def get_domain_kafka_to_elasticsearch_pillow(pillow_id='KafkaDomainPillow',
                                             num_processes=1,
                                             process_num=0,
                                             **kwargs):
    assert pillow_id == 'KafkaDomainPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, DOMAIN_INDEX_INFO, [topics.DOMAIN])
    domain_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=DOMAIN_INDEX_INFO,
        doc_prep_fn=transform_domain_for_elasticsearch)
    change_feed = KafkaChangeFeed(topics=[topics.DOMAIN],
                                  client_id='domains-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=domain_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #6
0
def get_user_pillow(pillow_id='user-pillow', num_processes=1, process_num=0,
        skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs):
    """Processes users and sends them to ES and UCRs.

    Processors:
      - :py:func:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareUser'), StaticDataSourceProvider('CommCareUser')],
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    change_feed = KafkaChangeFeed(
        topics=topics.USER_TOPICS, client_id='users-to-es', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor] if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
        processor_chunk_size=processor_chunk_size
    )
예제 #7
0
def get_case_to_elasticsearch_pillow(pillow_id='CaseToElasticsearchPillow',
                                     num_processes=1,
                                     process_num=0,
                                     **kwargs):
    """Return a pillow that processes cases to Elasticsearch.

    Processors:
      - :py:class:`pillowtop.processors.elastic.ElasticProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'CaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_INDEX_INFO, CASE_TOPICS)
    case_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch)
    kafka_change_feed = KafkaChangeFeed(topics=CASE_TOPICS,
                                        client_id='cases-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
예제 #8
0
def get_group_pillow_old(pillow_id='GroupPillow',
                         num_processes=1,
                         process_num=0,
                         **kwargs):
    """Group pillow (old). Sends Group data to Elasticsearch

    Processors:
      - :py:class:`corehq.pillows.group.get_group_to_elasticsearch_processor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, GROUP_INDEX_INFO, [topics.GROUP])
    processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
예제 #9
0
def get_group_pillow(pillow_id='GroupPillow',
                     num_processes=1,
                     process_num=0,
                     **kwargs):
    """
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, GROUP_INDEX_INFO, [topics.GROUP])
    processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=GROUP_INDEX_INFO,
    )
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  group_id='groups-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #10
0
def get_change_feed_pillow_for_db(pillow_id, couch_db, default_topic=None):
    """Generic pillow for inserting Couch documents into Kafka.

    Reads from:
      - CouchDB

    Writes to:
      - Kafka
    """
    processor = KafkaProcessor(
        data_source_type=data_sources.SOURCE_COUCH,
        data_source_name=couch_db.dbname,
        default_topic=default_topic,
    )
    change_feed = CouchChangeFeed(couch_db)
    checkpoint = PillowCheckpoint(pillow_id, change_feed.sequence_format)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
        ),
    )
예제 #11
0
def get_xform_to_elasticsearch_pillow(pillow_id='XFormToElasticsearchPillow',
                                      num_processes=1,
                                      process_num=0,
                                      **kwargs):
    assert pillow_id == 'XFormToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, XFORM_INDEX_INFO, topics.FORM_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    kafka_change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                        client_id='forms-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
예제 #12
0
def get_form_submission_metadata_tracker_pillow(
        pillow_id='FormSubmissionMetadataTrackerProcessor',
        num_processes=1,
        process_num=0,
        **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions. This could be expanded to be more generic and include
    other processing that needs to happen on each form
    """
    change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                  group_id='form-processsor',
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker',
                                       topics.FORM_TOPICS)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
예제 #13
0
def get_case_messaging_sync_pillow(
        pillow_id='case_messaging_sync_pillow',
        topics=None,
        num_processes=1,
        process_num=0,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
        **kwargs):
    """Pillow for synchronizing messaging data with case data.

        Processors:
          - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(
            CASE_TOPICS), set(topics) - set(CASE_TOPICS)
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)
    checkpoint = KafkaPillowCheckpoint(pillow_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
    )
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=[CaseMessagingSyncProcessor()],
                             processor_chunk_size=processor_chunk_size)
예제 #14
0
def get_app_to_elasticsearch_pillow(
        pillow_id='ApplicationToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    """App pillow

    Processors:
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
    """
    assert pillow_id == 'ApplicationToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, APP_INDEX_INFO, [topics.APP])
    app_processor = ElasticProcessor(elasticsearch=get_es_new(),
                                     index_info=APP_INDEX_INFO,
                                     doc_prep_fn=transform_app_for_es)
    change_feed = KafkaChangeFeed(topics=[topics.APP],
                                  client_id='apps-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=app_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #15
0
def get_ledger_to_elasticsearch_pillow(pillow_id='LedgerToElasticsearchPillow', num_processes=1,
                                       process_num=0, **kwargs):
    """Ledger pillow

    Note that this pillow's id references Elasticsearch, but it no longer saves to ES.
    It has been kept to keep the checkpoint consistent, and can be changed at any time.

    Processors:
      - :py:class:`corehq.pillows.ledger.LedgerProcessor`
    """
    assert pillow_id == 'LedgerToElasticsearchPillow', 'Pillow ID is not allowed to change'
    IndexInfo = namedtuple('IndexInfo', ['index'])
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, IndexInfo("ledgers_2016-03-15"), [topics.LEDGER]
    )
    change_feed = KafkaChangeFeed(
        topics=[topics.LEDGER], client_id='ledgers-to-es', num_processes=num_processes, process_num=process_num
    )
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=LedgerProcessor(),
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed
        ),
    )
예제 #16
0
def get_user_pillow(pillow_id='user-pillow',
                    num_processes=1,
                    process_num=0,
                    skip_ucr=False,
                    **kwargs):
    # Pillow that sends users to ES and UCR
    assert pillow_id == 'user-pillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = get_user_es_processor()
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('CommCareUser'),
            StaticDataSourceProvider('CommCareUser')
        ],
        run_migrations=(
            process_num == 0),  # only first process runs migrations
    )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=[user_processor]
        if skip_ucr else [ucr_processor, user_processor],
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #17
0
파일: xform.py 프로젝트: soitun/commcare-hq
def get_xform_to_elasticsearch_pillow(pillow_id='XFormToElasticsearchPillow',
                                      num_processes=1,
                                      process_num=0,
                                      **kwargs):
    """XForm change processor that sends form data to Elasticsearch

    Processors:
      - :py:class:`pillowtop.processors.elastic.ElasticProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'XFormToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, XFORM_INDEX_INFO, FORM_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
        change_filter_fn=is_couch_change_for_sql_domain)
    kafka_change_feed = KafkaChangeFeed(topics=FORM_TOPICS,
                                        client_id='forms-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
예제 #18
0
def get_case_search_to_elasticsearch_pillow(
        pillow_id='CaseSearchToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    """Populates the `case search` Elasticsearch index.

        Processors:
          - :py:class:`corehq.pillows.case_search.CaseSearchPillowProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'CaseSearchToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_SEARCH_INDEX_INFO, topics.CASE_TOPICS)
    case_processor = CaseSearchPillowProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_SEARCH_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch)
    change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                  client_id='cases-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
예제 #19
0
def get_form_submission_metadata_tracker_pillow(pillow_id='FormSubmissionMetadataTrackerPillow',
                                                num_processes=1, process_num=0, **kwargs):
    """
    This gets a pillow which iterates through all forms and marks the corresponding app
    as having submissions.

        Processors:
          - :py:class:`pillowtop.processors.form.FormSubmissionMetadataTrackerProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    change_feed = KafkaChangeFeed(
        topics=topics.FORM_TOPICS, client_id='form-processsor',
        num_processes=num_processes, process_num=process_num
    )
    checkpoint = KafkaPillowCheckpoint('form-submission-metadata-tracker', topics.FORM_TOPICS)
    form_processor = FormSubmissionMetadataTrackerProcessor()
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed,
        ),
    )
예제 #20
0
def get_report_xform_to_elasticsearch_pillow(
        pillow_id='ReportXFormToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'ReportXFormToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, REPORT_XFORM_INDEX_INFO, topics.FORM_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=REPORT_XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_report_forms_index,
        doc_filter_fn=report_xform_filter)
    kafka_change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                        client_id='report-forms-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
예제 #21
0
def get_case_search_to_elasticsearch_pillow(
        pillow_id='CaseSearchToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    assert pillow_id == 'CaseSearchToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, CASE_SEARCH_INDEX_INFO, topics.CASE_TOPICS)
    case_processor = CaseSearchPillowProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_SEARCH_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch)
    change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                  group_id='cases-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=case_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed,
        ),
    )
예제 #22
0
파일: sms.py 프로젝트: tstalka/commcare-hq
def get_sql_sms_pillow(pillow_id='SqlSMSPillow',
                       num_processes=1,
                       process_num=0,
                       processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                       **kwargs):
    assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, SMS_INDEX_INFO, [topics.SMS])
    processor = BulkElasticProcessor(elasticsearch=get_es_new(),
                                     index_info=SMS_INDEX_INFO,
                                     doc_prep_fn=lambda x: x)
    change_feed = KafkaChangeFeed(topics=[topics.SMS],
                                  client_id='sql-sms-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
        processor_chunk_size=processor_chunk_size)
예제 #23
0
def get_location_pillow(pillow_id='location-ucr-pillow', include_ucrs=None,
                        num_processes=1, process_num=0, ucr_configs=None, **kwargs):
    """Processes updates to locations for UCR

    Note this is only applicable if a domain on the environment has `LOCATIONS_IN_UCR` flag enabled.

    Processors:
      - :py:func:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor`
    """
    change_feed = KafkaChangeFeed(
        [LOCATION_TOPIC], client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('Location'), StaticDataSourceProvider('Location')],
        include_ucrs=include_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    checkpoint = KafkaPillowCheckpoint(pillow_id, [LOCATION_TOPIC])
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=[ucr_processor]
    )
예제 #24
0
def get_user_pillow(pillow_id='UserPillow',
                    num_processes=1,
                    process_num=0,
                    **kwargs):
    assert pillow_id == 'UserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=USER_INDEX_INFO,
        doc_prep_fn=transform_user_for_elasticsearch,
    )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=user_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #25
0
파일: user.py 프로젝트: solleks/commcare-hq
def get_unknown_users_pillow(pillow_id='unknown-users-pillow',
                             num_processes=1,
                             process_num=0,
                             **kwargs):
    """This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ

        Processors:
          - :py:class:`corehq.pillows.user.UnknownUsersProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.FORM_TOPICS)
    processor = UnknownUsersProcessor()
    change_feed = KafkaChangeFeed(topics=topics.FORM_TOPICS,
                                  client_id='unknown-users',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #26
0
파일: user.py 프로젝트: solleks/commcare-hq
def get_user_pillow_old(pillow_id='UserPillow',
                        num_processes=1,
                        process_num=0,
                        **kwargs):
    """Processes users and sends them to ES.

    Processors:
      - :py:func:`pillowtop.processors.elastic.ElasticProcessor`
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    assert pillow_id == 'UserPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, USER_INDEX_INFO, topics.USER_TOPICS)
    user_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=USER_INDEX_INFO,
        doc_prep_fn=transform_user_for_elasticsearch,
    )
    change_feed = KafkaChangeFeed(topics=topics.USER_TOPICS,
                                  client_id='users-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=user_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
    )
예제 #27
0
def get_report_case_to_elasticsearch_pillow(
        pillow_id='ReportCaseToElasticsearchPillow',
        num_processes=1,
        process_num=0,
        **kwargs):
    assert pillow_id == 'ReportCaseToElasticsearchPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, REPORT_CASE_INDEX_INFO, topics.CASE_TOPICS)
    form_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=REPORT_CASE_INDEX_INFO,
        doc_prep_fn=transform_case_to_report_es,
        doc_filter_fn=report_case_filter,
    )
    kafka_change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS,
                                        group_id='report-cases-to-es',
                                        num_processes=num_processes,
                                        process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=kafka_change_feed,
        processor=form_processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=kafka_change_feed),
    )
예제 #28
0
def get_group_pillow_old(pillow_id='GroupPillow',
                         num_processes=1,
                         process_num=0,
                         **kwargs):
    """
    # todo; To remove after full rollout of https://github.com/dimagi/commcare-hq/pull/21329/
    This pillow adds users from xform submissions that come in to the User Index if they don't exist in HQ
    """
    assert pillow_id == 'GroupPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, GROUP_INDEX_INFO, [topics.GROUP])
    processor = get_group_to_elasticsearch_processor()
    change_feed = KafkaChangeFeed(topics=[topics.GROUP],
                                  client_id='groups-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=10,
            change_feed=change_feed),
    )
예제 #29
0
 def setUp(self):
     super(KafkaPublishingSQLTest, self).setUp()
     FormProcessorTestUtils.delete_all_cases_forms_ledgers()
     self.form_accessors = FormAccessors(domain=self.domain)
     self.processor = TestProcessor()
     self.case_pillow = ConstructedPillow(
         name='test-kafka-case-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.CASE, topics.CASE_SQL],
                                     group_id='test-kafka-case-feed'),
         processor=self.processor)
     self.ledger_pillow = ConstructedPillow(
         name='test-kafka-ledger-feed',
         checkpoint=None,
         change_feed=KafkaChangeFeed(topics=[topics.LEDGER],
                                     group_id='test-kafka-ledger-feed'),
         processor=self.processor)
예제 #30
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """Return a pillow that processes cases. The processors include, UCR and elastic processors

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:function:`corehq.pillows.case_search.get_case_search_processor`
      - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
예제 #31
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE],
                               client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint,
                checkpoint_frequency=1,
                change_feed=feed),
            processor_chunk_size=2)

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(),
                               forever=False)
        self.assertEqual(processor.count, 5)
예제 #32
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=UCR_PROCESSING_CHUNK_SIZE, topics=None, **kwargs):
    """
    Return a pillow that processes cases. The processors include, UCR and elastic processors
        Args:
            skip_ucr: Can be set to True to avoid passing UCR processor, useful for tests
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider(), StaticDataSourceProvider()],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = ElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, case_search_processor]
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
예제 #33
0
    def test_basic(self):
        # setup
        feed = KafkaChangeFeed(topics=[topics.CASE], client_id='test-kafka-feed')
        pillow_name = 'test-chunked-processing'
        checkpoint = PillowCheckpoint(pillow_name, feed.sequence_format)
        processor = ChunkedCountProcessor()
        original_process_change = processor.process_change
        original_process_changes_chunk = processor.process_changes_chunk

        pillow = ConstructedPillow(
            name=pillow_name,
            checkpoint=checkpoint,
            change_feed=feed,
            processor=processor,
            change_processed_event_handler=KafkaCheckpointEventHandler(
                checkpoint=checkpoint, checkpoint_frequency=1, change_feed=feed
            ),
            processor_chunk_size=2
        )

        since = feed.get_latest_offsets()
        self._produce_changes(2)
        # pillow should use process_changes_chunk (make process_change raise an exception for test)
        processor.process_change = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=since, forever=False)
        self.assertEqual(processor.count, 2)

        self._produce_changes(2)
        # if process_changes_chunk raises exception, pillow should use process_change
        processor.process_change = original_process_change
        processor.process_changes_chunk = MagicMock(side_effect=Exception('_'))
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 4)

        self._produce_changes(1)
        # offsets after full chunk should still be processed
        processor.process_change = MagicMock(side_effect=Exception('_'))
        processor.process_changes_chunk = original_process_changes_chunk
        pillow.process_changes(since=pillow.get_last_checkpoint_sequence(), forever=False)
        self.assertEqual(processor.count, 5)