Exemple #1
0
    def test_process_changes_chunk(self):
        processor = BulkElasticProcessor(self.es, TEST_INDEX_INFO)

        changes = self._changes_from_ids(self.case_ids)

        retry, errors = processor.process_changes_chunk(changes)
        self.assertEqual([], retry)
        self.assertEqual([], errors)

        es_docs = self.es_interface.get_bulk_docs(
            index=self.index,
            doc_type=TEST_INDEX_INFO.type,
            doc_ids=self.case_ids)
        ids_in_es = {doc['_id'] for doc in es_docs}
        self.assertEqual(set(self.case_ids), ids_in_es)
Exemple #2
0
def get_sql_sms_pillow(pillow_id='SqlSMSPillow',
                       num_processes=1,
                       process_num=0,
                       processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                       **kwargs):
    """SMS Pillow

    Processors:
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
    """
    assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change'
    checkpoint = get_checkpoint_for_elasticsearch_pillow(
        pillow_id, SMS_INDEX_INFO, [topics.SMS])
    processor = BulkElasticProcessor(elasticsearch=get_es_new(),
                                     index_info=SMS_INDEX_INFO,
                                     doc_prep_fn=lambda x: x)
    change_feed = KafkaChangeFeed(topics=[topics.SMS],
                                  client_id='sql-sms-to-es',
                                  num_processes=num_processes,
                                  process_num=process_num)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=processor,
        change_processed_event_handler=KafkaCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=100,
            change_feed=change_feed),
        processor_chunk_size=processor_chunk_size)
Exemple #3
0
    def test_process_changes_chunk_ignore_couch(self):
        processor = BulkElasticProcessor(
            self.es,
            TEST_INDEX_INFO,
            change_filter_fn=is_couch_change_for_sql_domain)

        changes = self._changes_from_ids(self.case_ids)

        retry, errors = processor.process_changes_chunk(changes)
        self.assertEqual([], retry)
        self.assertEqual([], errors)

        es_docs = self.es_interface.get_bulk_docs(
            self.es_alias,
            doc_type=TEST_INDEX_INFO.type,
            doc_ids=self.case_ids)
        self.assertEqual([], es_docs)
    def test_process_changes_chunk_with_errors(self):
        mock_response = (5, [{'index': {'_id': self.case_ids[0], 'error': 'DateParseError'}}])
        processor = BulkElasticProcessor(Mock(), TEST_INDEX_INFO)

        missing_case_ids = [uuid.uuid4().hex, uuid.uuid4().hex]
        changes = self._changes_from_ids(self.case_ids + missing_case_ids)

        with patch.object(ElasticsearchInterface, 'bulk_ops', return_value=mock_response):
            retry, errors = processor.process_changes_chunk(changes)
        self.assertEqual(
            set(missing_case_ids),
            set([change.id for change in retry])
        )
        self.assertEqual(
            [self.case_ids[0]],
            [error[0].id for error in errors]
        )
Exemple #5
0
def get_case_pillow(
        pillow_id='case-pillow', ucr_division=None,
        include_ucrs=None, exclude_ucrs=None,
        num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False,
        processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs):
    """Return a pillow that processes cases. The processors include, UCR and elastic processors

    Processors:
      - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True)
      - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor`
      - :py:function:`corehq.pillows.case_search.get_case_search_processor`
      - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor`
    """
    if topics:
        assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only"
    topics = topics or CASE_TOPICS
    change_feed = KafkaChangeFeed(
        topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num
    )
    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(process_num == 0),  # only first process runs migrations
    )
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    case_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=CASE_INDEX_INFO,
        doc_prep_fn=transform_case_for_elasticsearch
    )
    case_search_processor = get_case_search_processor()

    checkpoint_id = "{}-{}-{}-{}".format(
        pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync')
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed,
        checkpoint_callback=ucr_processor
    )
    processors = [case_to_es_processor, CaseMessagingSyncProcessor()]
    if settings.RUN_CASE_SEARCH_PILLOW:
        processors.append(case_search_processor)
    if not settings.ENTERPRISE_MODE:
        processors.append(get_case_to_report_es_processor())
    if not skip_ucr:
        # this option is useful in tests to avoid extra UCR setup where unneccessary
        processors = [ucr_processor] + processors
    return ConstructedPillow(
        name=pillow_id,
        change_feed=change_feed,
        checkpoint=checkpoint,
        change_processed_event_handler=event_handler,
        processor=processors,
        processor_chunk_size=processor_chunk_size
    )
Exemple #6
0
def get_user_es_processor():
    return BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=USER_INDEX_INFO,
        doc_prep_fn=transform_user_for_elasticsearch,
    )
Exemple #7
0
def get_xform_pillow(pillow_id='xform-pillow',
                     ucr_division=None,
                     include_ucrs=None,
                     exclude_ucrs=None,
                     num_processes=1,
                     process_num=0,
                     ucr_configs=None,
                     skip_ucr=False,
                     processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE,
                     topics=None,
                     **kwargs):
    # avoid circular dependency
    from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter
    from corehq.pillows.mappings.user_mapping import USER_INDEX
    if topics:
        assert set(topics).issubset(
            FORM_TOPICS), "This is a pillow to process cases only"
    topics = topics or FORM_TOPICS
    change_feed = KafkaChangeFeed(topics,
                                  client_id=pillow_id,
                                  num_processes=num_processes,
                                  process_num=process_num)

    ucr_processor = ConfigurableReportPillowProcessor(
        data_source_providers=[
            DynamicDataSourceProvider('XFormInstance'),
            StaticDataSourceProvider('XFormInstance')
        ],
        ucr_division=ucr_division,
        include_ucrs=include_ucrs,
        exclude_ucrs=exclude_ucrs,
        run_migrations=(
            process_num == 0),  # only first process runs migrations
    )
    xform_to_es_processor = BulkElasticProcessor(
        elasticsearch=get_es_new(),
        index_info=XFORM_INDEX_INFO,
        doc_prep_fn=transform_xform_for_elasticsearch,
        doc_filter_fn=xform_pillow_filter,
    )
    unknown_user_form_processor = UnknownUsersProcessor()
    form_meta_processor = FormSubmissionMetadataTrackerProcessor()
    checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index,
                                         REPORT_XFORM_INDEX_INFO.index,
                                         USER_INDEX)
    checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics)
    event_handler = KafkaCheckpointEventHandler(
        checkpoint=checkpoint,
        checkpoint_frequency=1000,
        change_feed=change_feed,
        checkpoint_callback=ucr_processor)
    if ucr_configs:
        ucr_processor.bootstrap(ucr_configs)
    processors = [xform_to_es_processor]
    if settings.RUN_UNKNOWN_USER_PILLOW:
        processors.append(unknown_user_form_processor)
    if settings.RUN_FORM_META_PILLOW:
        processors.append(form_meta_processor)
    if not settings.ENTERPRISE_MODE:
        xform_to_report_es_processor = BulkElasticProcessor(
            elasticsearch=get_es_new(),
            index_info=REPORT_XFORM_INDEX_INFO,
            doc_prep_fn=transform_xform_for_report_forms_index,
            doc_filter_fn=report_xform_filter)
        processors.append(xform_to_report_es_processor)
    if not skip_ucr:
        processors.append(ucr_processor)
    return ConstructedPillow(name=pillow_id,
                             change_feed=change_feed,
                             checkpoint=checkpoint,
                             change_processed_event_handler=event_handler,
                             processor=processors,
                             processor_chunk_size=processor_chunk_size)