def should_log_missing_item(self, subtests: SubTests) -> None: # Given dataset_id = any_dataset_id() version_id = any_dataset_version_id() index = 0 expected_log = dumps({ "success": False, ERROR_KEY: { "message": ProcessingAssetsModelBase.DoesNotExist.msg }, "parameters": { "hash_key": f"DATASET#{dataset_id}#VERSION#{version_id}", "range_key": f"{ProcessingAssetType.DATA.value}#{index}", }, }) sys.argv = [ any_program_name(), f"--dataset-id={dataset_id}", f"--version-id={version_id}", f"--assets-table-name={get_param(ParameterName.PROCESSING_ASSETS_TABLE_NAME)}", f"--results-table-name={get_param(ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)}", # pylint:disable=line-too-long f"--first-item={index}", ] # When/Then with patch.object(self.logger, "error") as logger_mock, subtests.test( msg="Return code"), raises(DoesNotExist), patch.dict( environ, {ARRAY_INDEX_VARIABLE_NAME: "0"}): main() with subtests.test(msg="Log message"): logger_mock.assert_any_call(expected_log)
def should_save_staging_access_validation_results( validation_results_factory_mock: MagicMock, processing_assets_model_mock: MagicMock, get_object_mock: MagicMock, ) -> None: expected_error = ClientError( { AWS_RESPONSE_ERROR_KEY: { AWS_RESPONSE_ERROR_CODE_KEY: "TEST", AWS_RESPONSE_ERROR_MESSAGE_KEY: "TEST", } }, operation_name="get_object", ) get_object_mock.side_effect = expected_error s3_url = any_s3_url() dataset_id = any_dataset_id() version_id = any_dataset_version_id() hash_key = f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}" array_index = "1" validation_results_table_name = any_table_name() # When sys.argv = [ any_program_name(), f"--dataset-id={dataset_id}", f"--version-id={version_id}", f"--assets-table-name={any_table_name()}", f"--results-table-name={validation_results_table_name}", "--first-item=0", ] def get_mock(given_hash_key: str, range_key: str) -> ProcessingAssetsModelBase: assert given_hash_key == hash_key assert range_key == f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}{array_index}" return ProcessingAssetsModelBase( hash_key=given_hash_key, range_key=f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}1", url=s3_url, multihash=any_hex_multihash(), ) processing_assets_model_mock.return_value.get.side_effect = get_mock with raises(ClientError), patch.dict(environ, {ARRAY_INDEX_VARIABLE_NAME: array_index}): main() assert validation_results_factory_mock.mock_calls == [ call(hash_key, validation_results_table_name), call().save( s3_url, Check.STAGING_ACCESS, ValidationResult.FAILED, details={MESSAGE_KEY: str(expected_error)}, ), ]
def should_validate_given_index( validation_results_factory_mock: MagicMock, processing_assets_model_mock: MagicMock, validate_url_multihash_mock: MagicMock, subtests: SubTests, ) -> None: # Given dataset_id = any_dataset_id() version_id = any_dataset_version_id() hash_key = f"DATASET#{dataset_id}#VERSION#{version_id}" url = any_s3_url() hex_multihash = any_hex_multihash() array_index = "1" def get_mock(given_hash_key: str, range_key: str) -> ProcessingAssetsModelBase: assert given_hash_key == hash_key assert range_key == f"{ProcessingAssetType.DATA.value}#{array_index}" return ProcessingAssetsModelBase( hash_key=given_hash_key, range_key="{ProcessingAssetType.DATA.value}#1", url=url, multihash=hex_multihash, ) processing_assets_model_mock.return_value.get.side_effect = get_mock logger = logging.getLogger("backend.check_files_checksums.task") validation_results_table_name = any_table_name() expected_calls = [ call(hash_key, validation_results_table_name), call().save(url, Check.CHECKSUM, ValidationResult.PASSED), ] # When sys.argv = [ any_program_name(), f"--dataset-id={dataset_id}", f"--version-id={version_id}", f"--assets-table-name={any_table_name()}", f"--results-table-name={validation_results_table_name}", "--first-item=0", ] with patch.object(logger, "info") as info_log_mock, patch.dict( environ, {ARRAY_INDEX_VARIABLE_NAME: array_index}): # Then with subtests.test(msg="Return code"): assert main() == 0 with subtests.test(msg="Log message"): info_log_mock.assert_any_call('{"success": true, "message": ""}') with subtests.test(msg="Validate checksums"): assert validate_url_multihash_mock.mock_calls == [ call(url, hex_multihash) ] with subtests.test(msg="Validation result"): assert validation_results_factory_mock.mock_calls == expected_calls
def should_log_error_when_validation_fails( # pylint: disable=too-many-locals validation_results_factory_mock: MagicMock, processing_assets_model_mock: MagicMock, validate_url_multihash_mock: MagicMock, subtests: SubTests, ) -> None: # Given actual_hex_digest = any_sha256_hex_digest() expected_hex_digest = any_sha256_hex_digest() expected_hex_multihash = sha256_hex_digest_to_multihash( expected_hex_digest) dataset_id = any_dataset_id() dataset_version_id = any_dataset_version_id() hash_key = f"DATASET#{dataset_id}#VERSION#{dataset_version_id}" url = any_s3_url() processing_assets_model_mock.return_value.get.return_value = ProcessingAssetsModelBase( hash_key=hash_key, range_key=f"{ProcessingAssetType.DATA.value}#0", url=url, multihash=expected_hex_multihash, ) expected_details = { "message": f"Checksum mismatch: expected {expected_hex_digest}, got {actual_hex_digest}" } expected_log = dumps({"success": False, **expected_details}) validate_url_multihash_mock.side_effect = ChecksumMismatchError( actual_hex_digest) logger = logging.getLogger("backend.check_files_checksums.task") # When validation_results_table_name = any_table_name() sys.argv = [ any_program_name(), f"--dataset-id={dataset_id}", f"--version-id={dataset_version_id}", f"--assets-table-name={any_table_name()}", f"--results-table-name={validation_results_table_name}", "--first-item=0", ] # Then with patch.object(logger, "error") as error_log_mock, patch.dict( environ, {ARRAY_INDEX_VARIABLE_NAME: "0"}): with subtests.test(msg="Return code"): assert main() == 0 with subtests.test(msg="Log message"): error_log_mock.assert_any_call(expected_log) with subtests.test(msg="Validation result"): assert validation_results_factory_mock.mock_calls == [ call(hash_key, validation_results_table_name), call().save(url, Check.CHECKSUM, ValidationResult.FAILED, details=expected_details), ]
def should_log_missing_item(subtests: SubTests) -> None: # Given dataset_id = any_dataset_id() version_id = any_dataset_version_id() index = 0 expected_log = dumps( { SUCCESS_KEY: False, ERROR_KEY: {MESSAGE_KEY: ProcessingAssetsModelBase.DoesNotExist.msg}, "parameters": { "hash_key": ( f"{DATASET_ID_PREFIX}{dataset_id}" f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}" ), "range_key": f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}{index}", }, } ) sys.argv = [ any_program_name(), f"--dataset-id={dataset_id}", f"--version-id={version_id}", f"--assets-table-name={get_param(ParameterName.PROCESSING_ASSETS_TABLE_NAME)}", f"--results-table-name={get_param(ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)}", f"--first-item={index}", ] # When/Then with patch.object(LOGGER, "error") as logger_mock, patch.dict( environ, {ARRAY_INDEX_VARIABLE_NAME: "0"} ): with subtests.test(msg="Return code"), raises(DoesNotExist): main() with subtests.test(msg="Log message"): logger_mock.assert_any_call(expected_log)