def test_run_validation_successful(patch_storage, patch_rabbit, patch_db_helper, tmp_path): # Given test_message = {"test_message": "blah"} mock_processor = setup_mock_processor({ 'header_1': [], 'header_2': [] }, test_message) bulk_processor = BulkProcessor(mock_processor) bulk_processor.working_dir = tmp_path mock_blob = Mock() mock_blob.name = 'mock_blob_name' patch_storage.Client.return_value.list_blobs.return_value = [mock_blob] patch_storage.Client.return_value.download_blob_to_file.side_effect = partial( mock_download_blob, mock_data=(b'header_1,header_2\n' b'value1,value2\n')) # When bulk_processor.run() # Then mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value \ .upload_from_filename mock_upload_to_bucket.assert_called_once_with( str(tmp_path.joinpath('PROCESSED_mock_blob_name'))) patch_rabbit.return_value.__enter__.return_value.publish_message.assert_called_once_with( message=json.dumps(test_message), content_type='application/json', headers=None, exchange=mock_processor.exchange, routing_key=mock_processor.routing_key) patch_db_helper.connect_to_read_replica_pool.assert_called_once() assert_no_left_over_files(tmp_path)
def test_validation_row_too_short(patch_storage, patch_rabbit, patch_db_helper, tmp_path): # Given mock_processor = setup_mock_processor({'COL_1': [], 'COL_2': []}, None) bulk_processor = BulkProcessor(mock_processor) bulk_processor.working_dir = tmp_path mock_blob = Mock() mock_blob.name = 'mock_blob_name' patch_storage.Client.return_value.list_blobs.return_value = [mock_blob] # Mock data misses the 2nd column in it's row entirely patch_storage.Client.return_value.download_blob_to_file.side_effect = partial( mock_download_blob, mock_data=b'COL_1,COL_2\n' b'col_1_value') # When bulk_processor.run() # Then mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value \ .upload_from_filename mock_upload_calls = mock_upload_to_bucket.call_args_list assert len( mock_upload_calls) == 2, 'Upload to bucket should be called twice' assert call(str( tmp_path.joinpath('ERROR_mock_blob_name'))) in mock_upload_calls assert call(str( tmp_path.joinpath('ERROR_DETAIL_mock_blob_name'))) in mock_upload_calls patch_rabbit.return_value.__enter__.return_value.publish_message.assert_not_called( ) patch_db_helper.connect_to_read_replica_pool.assert_called_once() assert_no_left_over_files(tmp_path)
def bulk_non_compliance_processed(context): # Run against the real bucket if it is configured if Config.BULK_NON_COMPLIANCE_BUCKET_NAME: BulkProcessor(NonComplianceProcessor()).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.non_compliance_bulk_file): BulkProcessor(NonComplianceProcessor()).run()
def process_uninvalidate_addresses_updates_file(context): # Run against the real bucket if it is configured if Config.BULK_UNINVALIDATED_ADDRESS_BUCKET_NAME: BulkProcessor(UnInvalidateAddressProcessor()).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.bulk_uninvalidated_addresses_file): BulkProcessor(UnInvalidateAddressProcessor()).run()
def process_bulk_address_updates_file(context): # Run against the real bucket if it is configured if Config.BULK_ADDRESS_UPDATE_BUCKET_NAME: BulkProcessor(AddressUpdateProcessor()).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.bulk_address_updates_file): BulkProcessor(AddressUpdateProcessor()).run()
def bulk_questionnaire_link_processed(context): # Run against the real bucket if it is configured if Config.BULK_QID_LINK_BUCKET_NAME: BulkProcessor(QidLinkProcessor()).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.qid_link_bulk_file): BulkProcessor(QidLinkProcessor()).run()
def process_bulk_refusal_file(context): # Run against the real bucket if it is configured if Config.BULK_REFUSAL_BUCKET_NAME: BulkProcessor(RefusalProcessor()).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.bulk_refusals_file): BulkProcessor(RefusalProcessor()).run()
def process_bulk_new_address_file(context): new_address_processor = NewAddressProcessor( action_plan_id=context.action_plan_id, collection_exercise_id=context.collection_exercise_id) # Run against the real bucket if it is configured if Config.BULK_NEW_ADDRESS_BUCKET_NAME: BulkProcessor(new_address_processor).run() return # If we don't have a bucket, mock the storage bucket client interactions to work with only local files with mock_bulk_processor_bucket(context.bulk_new_address_file): BulkProcessor(new_address_processor).run()
def test_refusal_validation_headers_fails_empty(_patched_storage_client): result = BulkProcessor(RefusalProcessor()).find_header_validation_errors( {}) assert result.line_number == 1 assert "refusal_type" in result.description assert "case_id" in result.description
def main(): logger_initial_config() logger = wrap_logger(logging.getLogger(__name__)) logger.info('Started bulk processing qid linking', app_log_level=Config.LOG_LEVEL, environment=Config.ENVIRONMENT) BulkProcessor(QidLinkProcessor()).run()
def main(): logger_initial_config() logger = wrap_logger(logging.getLogger(__name__)) logger.info('Started bulk processing non compliance', app_log_level=Config.LOG_LEVEL, environment=Config.ENVIRONMENT) BulkProcessor(NonComplianceProcessor()).run()
def main(): logger_initial_config() logger = wrap_logger(logging.getLogger(__name__)) logger.info('Started bulk processing uninvalidate addresses', app_log_level=Config.LOG_LEVEL, environment=Config.ENVIRONMENT) BulkProcessor(UnInvalidateAddressProcessor()).run()
def test_qid_link_validation_headers(_patched_storage_client): refusal_headers = ["case_id", "qid"] result = BulkProcessor( QidLinkProcessor()).find_header_validation_errors(refusal_headers) assert result is None
def test_refusal_validation_headers(_patched_storage_client): refusal_headers = ["case_id", "refusal_type"] result = BulkProcessor( RefusalProcessor()).find_header_validation_errors(refusal_headers) assert result is None
def test_qid_link_validation_headers_fails_empty(_patched_storage_client): result = BulkProcessor(QidLinkProcessor()).find_header_validation_errors( {}) assert result.line_number == 1 assert "case_id" in result.description assert "qid" in result.description
def test_invalid_address_validation_headers_fails_empty( _patched_storage_client): result = BulkProcessor( InvalidAddressProcessor()).find_header_validation_errors({}) assert result.line_number == 1 assert "reason" in result.description assert "case_id" in result.description
def test_invalid_address_validation_headers(_patched_storage_client): invalid_address_headers = ["case_id", "reason"] result = BulkProcessor( InvalidAddressProcessor()).find_header_validation_errors( invalid_address_headers) assert result is None
def test_uninvalidate_address_validation_headers(patch_storage): invalid_address_headers = ["CASE_ID"] result = BulkProcessor( UnInvalidateAddressProcessor()).find_header_validation_errors( invalid_address_headers) assert result is None
def test_non_compliance_validation_headers_fails_empty( _patched_storage_client): result = BulkProcessor( NonComplianceProcessor()).find_header_validation_errors({}) assert result.line_number == 1 assert "CASE_ID" in result.description assert "NC_STATUS" in result.description
def test_qid_link_validation_headers_fails_qid(_patched_storage_client): refusal_headers = ["case_id", "notqid"] result = BulkProcessor( QidLinkProcessor()).find_header_validation_errors(refusal_headers) assert result.line_number == 1 assert "notqid" in result.description assert "qid" in result.description
def test_non_compliance_validation_headers(_patched_storage_client): refusal_headers = [ "CASE_ID", "NC_STATUS", "FIELDCOORDINATOR_ID", "FIELDOFFICER_ID" ] result = BulkProcessor( NonComplianceProcessor()).find_header_validation_errors( refusal_headers) assert result is None
def test_uninvalidate_address_validation_headers_fails_case_id(patch_storage): invalid_address_headers = ["not_a_case_id"] result = BulkProcessor( UnInvalidateAddressProcessor()).find_header_validation_errors( invalid_address_headers) assert result.line_number == 1 assert "not_a_case_id" in result.description assert "case_id" in result.description
def test_rebuild_errored_csv_row_too_many_columns(): # Given # If a row contains too many columns then the excess will be stored in a list in the None key row_in_expected_format = {'COL_1': 'value_1', None: ['extra_1', 'extra_2']} # When rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format) # Then assert rebuilt_row == 'value_1,extra_1,extra_2'
def test_rebuild_errored_csv_row_too_few_columns(): # Given # If a row contains too few columns then the missing values on the end will be stored as None row_in_expected_format = {'COL_1': 'value_1', 'COL_MISSING_2': None} # When rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format) # Then assert rebuilt_row == 'value_1'
def test_invalid_address_validation_headers_fails_case_id( _patched_storage_client): invalid_address_headers = ["not_a_case_id", "reason"] result = BulkProcessor( InvalidAddressProcessor()).find_header_validation_errors( invalid_address_headers) assert result.line_number == 1 assert "not_a_case_id" in result.description assert "case_id" in result.description
def test_process_file_encoding_failure(patch_storage, patch_rabbit, tmp_path): schema = {'header_1': [], 'header_2': []} header = ','.join(key for key in schema.keys()) mock_processor = setup_mock_processor(schema, None) bulk_processor = BulkProcessor(mock_processor) bulk_processor.working_dir = tmp_path test_file = RESOURCE_PATH.joinpath('bulk_test_file_encoding_failure.csv') success_file, error_file, error_detail_file = bulk_processor.initialise_results_files( test_file.name) success_count, failure_count = bulk_processor.process_file( test_file, success_file, error_file, error_detail_file) assert failure_count == 1, 'Should have one failure when it tries to decode the file' assert not success_count, 'Should not successfully process any rows' assert success_file.read_text() == header + '\n' assert 'Invalid file encoding, requires utf-8' in error_detail_file.read_text( ) patch_rabbit.publish_message.assert_not_called()
def test_run_success_failure_mix(patch_storage, patch_rabbit, patch_db_helper, tmp_path): # Given test_message = {"test_message": "blah"} mock_processor = setup_mock_processor({'header': [no_invalid_validator()]}, test_message) bulk_processor = BulkProcessor(mock_processor) bulk_processor.working_dir = tmp_path mock_blob = Mock() mock_blob.name = 'mock_blob_name' patch_storage.Client.return_value.list_blobs.return_value = [mock_blob] patch_storage.Client.return_value.download_blob_to_file.side_effect = partial( mock_download_blob, mock_data=(b'header\n' b'value\n' b'invalid')) # When bulk_processor.run() # Then mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value. \ upload_from_filename mock_upload_calls = mock_upload_to_bucket.call_args_list assert len( mock_upload_calls) == 3, 'Upload to bucket should be called twice' assert call(str( tmp_path.joinpath('PROCESSED_mock_blob_name'))) in mock_upload_calls assert call(str( tmp_path.joinpath('ERROR_mock_blob_name'))) in mock_upload_calls assert call(str( tmp_path.joinpath('ERROR_DETAIL_mock_blob_name'))) in mock_upload_calls patch_rabbit.return_value.__enter__.return_value.publish_message.assert_called_once_with( message=json.dumps(test_message), content_type='application/json', headers=None, exchange=mock_processor.exchange, routing_key=mock_processor.routing_key) patch_db_helper.connect_to_read_replica_pool.assert_called_once() assert_no_left_over_files(tmp_path)
def test_process_file_successful(patch_storage, patch_rabbit, tmp_path): schema = {'header_1': [], 'header_2': []} header = ','.join(key for key in schema.keys()) mock_processor = setup_mock_processor(schema, None) mock_processor.build_event_messages.side_effect = lambda row: [row] bulk_processor = BulkProcessor(mock_processor) bulk_processor.working_dir = tmp_path bulk_processor.rabbit = patch_rabbit test_file = RESOURCE_PATH.joinpath('bulk_test_file_success.csv') success_file, error_file, error_detail_file = bulk_processor.initialise_results_files( test_file.name) success_count, failure_count = bulk_processor.process_file( test_file, success_file, error_file, error_detail_file) assert not failure_count, 'Should have no processing errors' assert success_count == 1, 'Should successfully process one row' assert success_file.read_text() == test_file.read_text() assert error_file.read_text() == header + '\n' assert error_detail_file.read_text() == HEADER_IS_VALID patch_rabbit.publish_message.assert_called_once_with( message=json.dumps({ 'header_1': 'foo', 'header_2': 'bar' }), content_type='application/json', headers=None, exchange=mock_processor.exchange, routing_key=mock_processor.routing_key)
def test_rebuild_errored_csv_row(): # Given row_in_expected_format = { 'COL_1': 'value_1', 'COL_2': '', 'COL_3': 'value_3' } # When rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format) # Then assert rebuilt_row == 'value_1,,value_3'
def test_non_compliance_validation_headers_fail(_patched_storage_client): refusal_headers = ["ID", "NC_STAT", "FIELDCOORDINATORID", "FIELDOFFICERID"] result = BulkProcessor( NonComplianceProcessor()).find_header_validation_errors( refusal_headers) assert result.line_number == 1 assert "ID" in result.description assert "CASE_ID" in result.description assert "NC_STAT" in result.description assert "FIELDCOORDINATORID" in result.description assert "FIELDOFFICERID" in result.description