def test_process_changes_chunk(self): processor = BulkElasticProcessor(self.es, TEST_INDEX_INFO) changes = self._changes_from_ids(self.case_ids) retry, errors = processor.process_changes_chunk(changes) self.assertEqual([], retry) self.assertEqual([], errors) es_docs = self.es_interface.get_bulk_docs( index=self.index, doc_type=TEST_INDEX_INFO.type, doc_ids=self.case_ids) ids_in_es = {doc['_id'] for doc in es_docs} self.assertEqual(set(self.case_ids), ids_in_es)
def get_sql_sms_pillow(pillow_id='SqlSMSPillow', num_processes=1, process_num=0, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, **kwargs): """SMS Pillow Processors: - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` """ assert pillow_id == 'SqlSMSPillow', 'Pillow ID is not allowed to change' checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, SMS_INDEX_INFO, [topics.SMS]) processor = BulkElasticProcessor(elasticsearch=get_es_new(), index_info=SMS_INDEX_INFO, doc_prep_fn=lambda x: x) change_feed = KafkaChangeFeed(topics=[topics.SMS], client_id='sql-sms-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed), processor_chunk_size=processor_chunk_size)
def test_process_changes_chunk_ignore_couch(self): processor = BulkElasticProcessor( self.es, TEST_INDEX_INFO, change_filter_fn=is_couch_change_for_sql_domain) changes = self._changes_from_ids(self.case_ids) retry, errors = processor.process_changes_chunk(changes) self.assertEqual([], retry) self.assertEqual([], errors) es_docs = self.es_interface.get_bulk_docs( self.es_alias, doc_type=TEST_INDEX_INFO.type, doc_ids=self.case_ids) self.assertEqual([], es_docs)
def test_process_changes_chunk_with_errors(self): mock_response = (5, [{'index': {'_id': self.case_ids[0], 'error': 'DateParseError'}}]) processor = BulkElasticProcessor(Mock(), TEST_INDEX_INFO) missing_case_ids = [uuid.uuid4().hex, uuid.uuid4().hex] changes = self._changes_from_ids(self.case_ids + missing_case_ids) with patch.object(ElasticsearchInterface, 'bulk_ops', return_value=mock_response): retry, errors = processor.process_changes_chunk(changes) self.assertEqual( set(missing_case_ids), set([change.id for change in retry]) ) self.assertEqual( [self.case_ids[0]], [error[0].id for error in errors] )
def get_case_pillow( pillow_id='case-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): """Return a pillow that processes cases. The processors include, UCR and elastic processors Processors: - :py:class:`corehq.apps.userreports.pillow.ConfigurableReportPillowProcessor` (disabled when skip_ucr=True) - :py:class:`pillowtop.processors.elastic.BulkElasticProcessor` - :py:function:`corehq.pillows.case_search.get_case_search_processor` - :py:class:`corehq.messaging.pillow.CaseMessagingSyncProcessor` """ if topics: assert set(topics).issubset(CASE_TOPICS), "This is a pillow to process cases only" topics = topics or CASE_TOPICS change_feed = KafkaChangeFeed( topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num ) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[DynamicDataSourceProvider('CommCareCase'), StaticDataSourceProvider('CommCareCase')], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=(process_num == 0), # only first process runs migrations ) if ucr_configs: ucr_processor.bootstrap(ucr_configs) case_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=CASE_INDEX_INFO, doc_prep_fn=transform_case_for_elasticsearch ) case_search_processor = get_case_search_processor() checkpoint_id = "{}-{}-{}-{}".format( pillow_id, CASE_INDEX_INFO.index, case_search_processor.index_info.index, 'messaging-sync') checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor ) processors = [case_to_es_processor, CaseMessagingSyncProcessor()] if settings.RUN_CASE_SEARCH_PILLOW: processors.append(case_search_processor) if not settings.ENTERPRISE_MODE: processors.append(get_case_to_report_es_processor()) if not skip_ucr: # this option is useful in tests to avoid extra UCR setup where unneccessary processors = [ucr_processor] + processors return ConstructedPillow( name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size )
def get_user_es_processor(): return BulkElasticProcessor( elasticsearch=get_es_new(), index_info=USER_INDEX_INFO, doc_prep_fn=transform_user_for_elasticsearch, )
def get_xform_pillow(pillow_id='xform-pillow', ucr_division=None, include_ucrs=None, exclude_ucrs=None, num_processes=1, process_num=0, ucr_configs=None, skip_ucr=False, processor_chunk_size=DEFAULT_PROCESSOR_CHUNK_SIZE, topics=None, **kwargs): # avoid circular dependency from corehq.pillows.reportxform import transform_xform_for_report_forms_index, report_xform_filter from corehq.pillows.mappings.user_mapping import USER_INDEX if topics: assert set(topics).issubset( FORM_TOPICS), "This is a pillow to process cases only" topics = topics or FORM_TOPICS change_feed = KafkaChangeFeed(topics, client_id=pillow_id, num_processes=num_processes, process_num=process_num) ucr_processor = ConfigurableReportPillowProcessor( data_source_providers=[ DynamicDataSourceProvider('XFormInstance'), StaticDataSourceProvider('XFormInstance') ], ucr_division=ucr_division, include_ucrs=include_ucrs, exclude_ucrs=exclude_ucrs, run_migrations=( process_num == 0), # only first process runs migrations ) xform_to_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_elasticsearch, doc_filter_fn=xform_pillow_filter, ) unknown_user_form_processor = UnknownUsersProcessor() form_meta_processor = FormSubmissionMetadataTrackerProcessor() checkpoint_id = "{}-{}-{}-{}".format(pillow_id, XFORM_INDEX_INFO.index, REPORT_XFORM_INDEX_INFO.index, USER_INDEX) checkpoint = KafkaPillowCheckpoint(checkpoint_id, topics) event_handler = KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=1000, change_feed=change_feed, checkpoint_callback=ucr_processor) if ucr_configs: ucr_processor.bootstrap(ucr_configs) processors = [xform_to_es_processor] if settings.RUN_UNKNOWN_USER_PILLOW: processors.append(unknown_user_form_processor) if settings.RUN_FORM_META_PILLOW: processors.append(form_meta_processor) if not settings.ENTERPRISE_MODE: xform_to_report_es_processor = BulkElasticProcessor( elasticsearch=get_es_new(), index_info=REPORT_XFORM_INDEX_INFO, doc_prep_fn=transform_xform_for_report_forms_index, doc_filter_fn=report_xform_filter) processors.append(xform_to_report_es_processor) if not skip_ucr: processors.append(ucr_processor) return ConstructedPillow(name=pillow_id, change_feed=change_feed, checkpoint=checkpoint, change_processed_event_handler=event_handler, processor=processors, processor_chunk_size=processor_chunk_size)