예제 #1
0
    def should_log_missing_item(self, subtests: SubTests) -> None:
        # Given
        dataset_id = any_dataset_id()
        version_id = any_dataset_version_id()
        index = 0
        expected_log = dumps({
            "success": False,
            ERROR_KEY: {
                "message": ProcessingAssetsModelBase.DoesNotExist.msg
            },
            "parameters": {
                "hash_key": f"DATASET#{dataset_id}#VERSION#{version_id}",
                "range_key": f"{ProcessingAssetType.DATA.value}#{index}",
            },
        })

        sys.argv = [
            any_program_name(),
            f"--dataset-id={dataset_id}",
            f"--version-id={version_id}",
            f"--assets-table-name={get_param(ParameterName.PROCESSING_ASSETS_TABLE_NAME)}",
            f"--results-table-name={get_param(ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)}",  # pylint:disable=line-too-long
            f"--first-item={index}",
        ]

        # When/Then
        with patch.object(self.logger, "error") as logger_mock, subtests.test(
                msg="Return code"), raises(DoesNotExist), patch.dict(
                    environ, {ARRAY_INDEX_VARIABLE_NAME: "0"}):
            main()
            with subtests.test(msg="Log message"):
                logger_mock.assert_any_call(expected_log)
예제 #2
0
def should_save_staging_access_validation_results(
    validation_results_factory_mock: MagicMock,
    processing_assets_model_mock: MagicMock,
    get_object_mock: MagicMock,
) -> None:
    expected_error = ClientError(
        {
            AWS_RESPONSE_ERROR_KEY: {
                AWS_RESPONSE_ERROR_CODE_KEY: "TEST",
                AWS_RESPONSE_ERROR_MESSAGE_KEY: "TEST",
            }
        },
        operation_name="get_object",
    )
    get_object_mock.side_effect = expected_error

    s3_url = any_s3_url()
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()
    hash_key = f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"

    array_index = "1"

    validation_results_table_name = any_table_name()
    # When
    sys.argv = [
        any_program_name(),
        f"--dataset-id={dataset_id}",
        f"--version-id={version_id}",
        f"--assets-table-name={any_table_name()}",
        f"--results-table-name={validation_results_table_name}",
        "--first-item=0",
    ]

    def get_mock(given_hash_key: str, range_key: str) -> ProcessingAssetsModelBase:
        assert given_hash_key == hash_key
        assert range_key == f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}{array_index}"
        return ProcessingAssetsModelBase(
            hash_key=given_hash_key,
            range_key=f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}1",
            url=s3_url,
            multihash=any_hex_multihash(),
        )

    processing_assets_model_mock.return_value.get.side_effect = get_mock

    with raises(ClientError), patch.dict(environ, {ARRAY_INDEX_VARIABLE_NAME: array_index}):
        main()

    assert validation_results_factory_mock.mock_calls == [
        call(hash_key, validation_results_table_name),
        call().save(
            s3_url,
            Check.STAGING_ACCESS,
            ValidationResult.FAILED,
            details={MESSAGE_KEY: str(expected_error)},
        ),
    ]
예제 #3
0
def should_validate_given_index(
    validation_results_factory_mock: MagicMock,
    processing_assets_model_mock: MagicMock,
    validate_url_multihash_mock: MagicMock,
    subtests: SubTests,
) -> None:
    # Given
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()
    hash_key = f"DATASET#{dataset_id}#VERSION#{version_id}"

    url = any_s3_url()
    hex_multihash = any_hex_multihash()

    array_index = "1"

    def get_mock(given_hash_key: str,
                 range_key: str) -> ProcessingAssetsModelBase:
        assert given_hash_key == hash_key
        assert range_key == f"{ProcessingAssetType.DATA.value}#{array_index}"
        return ProcessingAssetsModelBase(
            hash_key=given_hash_key,
            range_key="{ProcessingAssetType.DATA.value}#1",
            url=url,
            multihash=hex_multihash,
        )

    processing_assets_model_mock.return_value.get.side_effect = get_mock
    logger = logging.getLogger("backend.check_files_checksums.task")
    validation_results_table_name = any_table_name()
    expected_calls = [
        call(hash_key, validation_results_table_name),
        call().save(url, Check.CHECKSUM, ValidationResult.PASSED),
    ]

    # When
    sys.argv = [
        any_program_name(),
        f"--dataset-id={dataset_id}",
        f"--version-id={version_id}",
        f"--assets-table-name={any_table_name()}",
        f"--results-table-name={validation_results_table_name}",
        "--first-item=0",
    ]
    with patch.object(logger, "info") as info_log_mock, patch.dict(
            environ, {ARRAY_INDEX_VARIABLE_NAME: array_index}):
        # Then
        with subtests.test(msg="Return code"):
            assert main() == 0

        with subtests.test(msg="Log message"):
            info_log_mock.assert_any_call('{"success": true, "message": ""}')

    with subtests.test(msg="Validate checksums"):
        assert validate_url_multihash_mock.mock_calls == [
            call(url, hex_multihash)
        ]

    with subtests.test(msg="Validation result"):
        assert validation_results_factory_mock.mock_calls == expected_calls
예제 #4
0
def should_log_error_when_validation_fails(  # pylint: disable=too-many-locals
    validation_results_factory_mock: MagicMock,
    processing_assets_model_mock: MagicMock,
    validate_url_multihash_mock: MagicMock,
    subtests: SubTests,
) -> None:

    # Given
    actual_hex_digest = any_sha256_hex_digest()
    expected_hex_digest = any_sha256_hex_digest()
    expected_hex_multihash = sha256_hex_digest_to_multihash(
        expected_hex_digest)
    dataset_id = any_dataset_id()
    dataset_version_id = any_dataset_version_id()
    hash_key = f"DATASET#{dataset_id}#VERSION#{dataset_version_id}"
    url = any_s3_url()
    processing_assets_model_mock.return_value.get.return_value = ProcessingAssetsModelBase(
        hash_key=hash_key,
        range_key=f"{ProcessingAssetType.DATA.value}#0",
        url=url,
        multihash=expected_hex_multihash,
    )
    expected_details = {
        "message":
        f"Checksum mismatch: expected {expected_hex_digest}, got {actual_hex_digest}"
    }
    expected_log = dumps({"success": False, **expected_details})
    validate_url_multihash_mock.side_effect = ChecksumMismatchError(
        actual_hex_digest)
    logger = logging.getLogger("backend.check_files_checksums.task")
    # When

    validation_results_table_name = any_table_name()
    sys.argv = [
        any_program_name(),
        f"--dataset-id={dataset_id}",
        f"--version-id={dataset_version_id}",
        f"--assets-table-name={any_table_name()}",
        f"--results-table-name={validation_results_table_name}",
        "--first-item=0",
    ]

    # Then
    with patch.object(logger, "error") as error_log_mock, patch.dict(
            environ, {ARRAY_INDEX_VARIABLE_NAME: "0"}):
        with subtests.test(msg="Return code"):
            assert main() == 0

        with subtests.test(msg="Log message"):
            error_log_mock.assert_any_call(expected_log)

    with subtests.test(msg="Validation result"):
        assert validation_results_factory_mock.mock_calls == [
            call(hash_key, validation_results_table_name),
            call().save(url,
                        Check.CHECKSUM,
                        ValidationResult.FAILED,
                        details=expected_details),
        ]
예제 #5
0
def should_log_missing_item(subtests: SubTests) -> None:
    # Given
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()
    index = 0
    expected_log = dumps(
        {
            SUCCESS_KEY: False,
            ERROR_KEY: {MESSAGE_KEY: ProcessingAssetsModelBase.DoesNotExist.msg},
            "parameters": {
                "hash_key": (
                    f"{DATASET_ID_PREFIX}{dataset_id}"
                    f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"
                ),
                "range_key": f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}{index}",
            },
        }
    )

    sys.argv = [
        any_program_name(),
        f"--dataset-id={dataset_id}",
        f"--version-id={version_id}",
        f"--assets-table-name={get_param(ParameterName.PROCESSING_ASSETS_TABLE_NAME)}",
        f"--results-table-name={get_param(ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)}",
        f"--first-item={index}",
    ]

    # When/Then
    with patch.object(LOGGER, "error") as logger_mock, patch.dict(
        environ, {ARRAY_INDEX_VARIABLE_NAME: "0"}
    ):
        with subtests.test(msg="Return code"), raises(DoesNotExist):
            main()

        with subtests.test(msg="Log message"):
            logger_mock.assert_any_call(expected_log)