Beispiel #1
0
def should_save_non_s3_url_validation_results(
    get_param_mock: MagicMock,
    validation_results_factory_mock: MagicMock,
) -> None:

    validation_results_table_name = any_table_name()
    get_param_mock.return_value = validation_results_table_name
    non_s3_url = any_https_url()
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        lambda_handler(
            {
                DATASET_ID_KEY: dataset_id,
                VERSION_ID_KEY: version_id,
                METADATA_URL_KEY: non_s3_url,
            },
            any_lambda_context(),
        )

    hash_key = f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"
    assert validation_results_factory_mock.mock_calls == [
        call(hash_key, validation_results_table_name),
        call().save(
            non_s3_url,
            Check.NON_S3_URL,
            ValidationResult.FAILED,
            details={MESSAGE_KEY: f"URL doesn't start with “{S3_URL_PREFIX}”: “{non_s3_url}”"},
        ),
    ]
Beispiel #2
0
def should_log_event_payload() -> None:
    payload = deepcopy(MINIMAL_PAYLOAD)
    expected_log = dumps({"event": payload})

    with patch.object(LOGGER, "debug") as logger_mock, patch(
            "backend.check_stac_metadata.task.STACDatasetValidator.run"):
        lambda_handler(payload, any_lambda_context())

        logger_mock.assert_any_call(expected_log)
Beispiel #3
0
def should_succeed_with_validation_failure(validate_url_mock: MagicMock) -> None:
    validate_url_mock.side_effect = ValidationError(any_error_message())

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        lambda_handler(
            {
                DATASET_ID_KEY: any_dataset_id(),
                VERSION_ID_KEY: any_dataset_version_id(),
                METADATA_URL_KEY: any_s3_url(),
            },
            any_lambda_context(),
        )
Beispiel #4
0
def should_validate_given_url(validate_url_mock: MagicMock) -> None:
    url = any_s3_url()

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        lambda_handler(
            {
                DATASET_ID_KEY: any_dataset_id(),
                VERSION_ID_KEY: any_dataset_version_id(),
                METADATA_URL_KEY: url,
            },
            any_lambda_context(),
        )

    validate_url_mock.assert_called_once_with(url)
Beispiel #5
0
def should_log_event_payload() -> None:
    payload = {
        DATASET_ID_KEY: any_dataset_id(),
        VERSION_ID_KEY: any_dataset_version_id(),
        METADATA_URL_KEY: any_s3_url(),
    }

    expected_log = dumps({"event": payload})

    with patch.object(
            logging.getLogger("backend.check_stac_metadata.task"),
            "debug") as logger_mock, patch(
                "backend.check_stac_metadata.task.STACDatasetValidator.run"):
        lambda_handler(
            payload,
            any_lambda_context(),
        )
        logger_mock.assert_any_call(expected_log)
Beispiel #6
0
def should_save_staging_access_validation_results(
    validation_results_factory_mock: MagicMock,
    get_object_mock: MagicMock,
) -> None:

    validation_results_table_name = get_param(ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)
    expected_error = ClientError(
        {
            AWS_RESPONSE_ERROR_KEY: {
                AWS_RESPONSE_ERROR_CODE_KEY: "TEST",
                AWS_RESPONSE_ERROR_MESSAGE_KEY: "TEST",
            }
        },
        operation_name="get_object",
    )
    get_object_mock.side_effect = expected_error

    s3_url = any_s3_url()
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    lambda_handler(
        {
            DATASET_ID_KEY: dataset_id,
            VERSION_ID_KEY: version_id,
            METADATA_URL_KEY: s3_url,
        },
        any_lambda_context(),
    )

    hash_key = f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"
    assert validation_results_factory_mock.mock_calls == [
        call(hash_key, validation_results_table_name),
        call().save(
            s3_url,
            Check.STAGING_ACCESS,
            ValidationResult.FAILED,
            details={MESSAGE_KEY: str(expected_error)},
        ),
    ]
def should_save_staging_access_validation_results(
    validation_results_factory_mock: MagicMock,
    get_object_mock: MagicMock,
) -> None:

    validation_results_table_name = get_param(
        ParameterName.STORAGE_VALIDATION_RESULTS_TABLE_NAME)
    expected_error = ClientError(
        {"Error": {
            "Code": "TEST",
            "Message": "TEST"
        }},
        operation_name="get_object")
    get_object_mock.side_effect = expected_error

    s3_url = any_s3_url()
    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    lambda_handler(
        {
            DATASET_ID_KEY: dataset_id,
            VERSION_ID_KEY: version_id,
            METADATA_URL_KEY: s3_url,
        },
        any_lambda_context(),
    )

    hash_key = f"DATASET#{dataset_id}#VERSION#{version_id}"
    assert validation_results_factory_mock.mock_calls == [
        call(hash_key, validation_results_table_name),
        call().save(
            s3_url,
            Check.STAGING_ACCESS,
            ValidationResult.FAILED,
            details={"message": str(expected_error)},
        ),
    ]
Beispiel #8
0
def should_return_error_when_schema_validation_fails(
        validate_schema_mock: MagicMock, subtests: SubTests) -> None:
    # Given
    error_message = any_error_message()
    error = ValidationError(error_message)
    validate_schema_mock.side_effect = error
    expected_log = dumps({ERROR_KEY: error}, default=str)

    with patch.object(LOGGER, "warning") as logger_mock:
        # When
        with subtests.test(msg="response"):
            response = lambda_handler({}, any_lambda_context())
            # Then
            assert response == {ERROR_MESSAGE_KEY: error_message}

        # Then
        with subtests.test(msg="log"):
            logger_mock.assert_any_call(expected_log)
Beispiel #9
0
def should_insert_asset_urls_and_checksums_into_database(subtests: SubTests) -> None:
    # pylint: disable=too-many-locals
    # Given a metadata file with two assets
    first_asset_content = any_file_contents()
    first_asset_multihash = sha256(first_asset_content).hexdigest()

    second_asset_content = any_file_contents()
    second_asset_multihash = sha512(second_asset_content).hexdigest()

    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    with S3Object(
        file_object=BytesIO(initial_bytes=first_asset_content),
        bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
        key=any_safe_filename(),
    ) as first_asset_s3_object, S3Object(
        file_object=BytesIO(initial_bytes=second_asset_content),
        bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
        key=any_safe_filename(),
    ) as second_asset_s3_object:
        expected_hash_key = (
            f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"
        )

        metadata_stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
        metadata_stac_object[STAC_ASSETS_KEY] = {
            any_asset_name(): {
                STAC_HREF_KEY: first_asset_s3_object.url,
                STAC_FILE_CHECKSUM_KEY: first_asset_multihash,
            },
            any_asset_name(): {
                STAC_HREF_KEY: second_asset_s3_object.url,
                STAC_FILE_CHECKSUM_KEY: second_asset_multihash,
            },
        }
        metadata_content = dumps(metadata_stac_object).encode()
        with S3Object(
            file_object=BytesIO(initial_bytes=metadata_content),
            bucket_name=ResourceName.STORAGE_BUCKET_NAME.value,
            key=any_safe_filename(),
        ) as metadata_s3_object:
            # When

            processing_assets_model = processing_assets_model_with_meta()
            expected_asset_items = [
                processing_assets_model(
                    hash_key=expected_hash_key,
                    range_key=f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}0",
                    url=first_asset_s3_object.url,
                    multihash=first_asset_multihash,
                ),
                processing_assets_model(
                    hash_key=expected_hash_key,
                    range_key=f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}1",
                    url=second_asset_s3_object.url,
                    multihash=second_asset_multihash,
                ),
            ]

            expected_metadata_items = [
                processing_assets_model(
                    hash_key=expected_hash_key,
                    range_key=f"{ProcessingAssetType.METADATA.value}{DB_KEY_SEPARATOR}0",
                    url=metadata_s3_object.url,
                ),
            ]

            lambda_handler(
                {
                    DATASET_ID_KEY: dataset_id,
                    VERSION_ID_KEY: version_id,
                    METADATA_URL_KEY: metadata_s3_object.url,
                },
                any_lambda_context(),
            )

            # Then
            actual_items = processing_assets_model.query(
                expected_hash_key,
                processing_assets_model.sk.startswith(
                    f"{ProcessingAssetType.DATA.value}{DB_KEY_SEPARATOR}"
                ),
            )
            for actual_item, expected_item in zip(actual_items, expected_asset_items):
                with subtests.test():
                    assert actual_item.attribute_values == expected_item.attribute_values

            actual_items = processing_assets_model.query(
                expected_hash_key,
                processing_assets_model.sk.startswith(
                    f"{ProcessingAssetType.METADATA.value}{DB_KEY_SEPARATOR}"
                ),
            )
            for actual_item, expected_item in zip(actual_items, expected_metadata_items):
                with subtests.test():
                    assert actual_item.attribute_values == expected_item.attribute_values
Beispiel #10
0
def should_save_json_schema_validation_results_per_file(subtests: SubTests) -> None:
    base_url = f"{S3_URL_PREFIX}{ResourceName.STAGING_BUCKET_NAME.value}/"
    valid_child_key = any_safe_filename()
    invalid_child_key = any_safe_filename()
    invalid_stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    invalid_stac_object.pop(STAC_ID_KEY)

    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    with S3Object(
        file_object=json_dict_to_file_object(
            {
                **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                STAC_LINKS_KEY: [
                    {STAC_HREF_KEY: f"{base_url}{valid_child_key}", "rel": "child"},
                    {STAC_HREF_KEY: f"{base_url}{invalid_child_key}", "rel": "child"},
                ],
            }
        ),
        bucket_name=ResourceName.STAGING_BUCKET_NAME.value,
        key=any_safe_filename(),
    ) as root_s3_object, S3Object(
        file_object=json_dict_to_file_object(deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)),
        bucket_name=ResourceName.STAGING_BUCKET_NAME.value,
        key=valid_child_key,
    ) as valid_child_s3_object, S3Object(
        file_object=json_dict_to_file_object(invalid_stac_object),
        bucket_name=ResourceName.STAGING_BUCKET_NAME.value,
        key=invalid_child_key,
    ) as invalid_child_s3_object:

        # When
        lambda_handler(
            {
                DATASET_ID_KEY: dataset_id,
                VERSION_ID_KEY: version_id,
                METADATA_URL_KEY: root_s3_object.url,
            },
            any_lambda_context(),
        )

    hash_key = f"{DATASET_ID_PREFIX}{dataset_id}{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}"
    validation_results_model = validation_results_model_with_meta()
    with subtests.test(msg="Root validation results"):
        assert (
            validation_results_model.get(
                hash_key=hash_key,
                range_key=(
                    f"{CHECK_ID_PREFIX}{Check.JSON_SCHEMA.value}"
                    f"{DB_KEY_SEPARATOR}{URL_ID_PREFIX}{root_s3_object.url}"
                ),
                consistent_read=True,
            ).result
            == ValidationResult.PASSED.value
        )

    with subtests.test(msg="Valid child validation results"):
        assert (
            validation_results_model.get(
                hash_key=hash_key,
                range_key=(
                    f"{CHECK_ID_PREFIX}{Check.JSON_SCHEMA.value}"
                    f"{DB_KEY_SEPARATOR}{URL_ID_PREFIX}{valid_child_s3_object.url}"
                ),
                consistent_read=True,
            ).result
            == ValidationResult.PASSED.value
        )

    with subtests.test(msg="Invalid child validation results"):
        assert (
            validation_results_model.get(
                hash_key=hash_key,
                range_key=(
                    f"{CHECK_ID_PREFIX}{Check.JSON_SCHEMA.value}"
                    f"{DB_KEY_SEPARATOR}{URL_ID_PREFIX}{invalid_child_s3_object.url}"
                ),
                consistent_read=True,
            ).result
            == ValidationResult.FAILED.value
        )
def should_save_json_schema_validation_results_per_file(
        subtests: SubTests) -> None:
    staging_bucket_name = get_param(ParameterName.STAGING_BUCKET_NAME)
    base_url = f"s3://{staging_bucket_name}/"
    valid_child_key = any_safe_filename()
    invalid_child_key = any_safe_filename()
    invalid_stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    invalid_stac_object.pop("id")

    dataset_id = any_dataset_id()
    version_id = any_dataset_version_id()

    with S3Object(
            file_object=json_dict_to_file_object({
                **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                "links": [
                    {
                        "href": f"{base_url}{valid_child_key}",
                        "rel": "child"
                    },
                    {
                        "href": f"{base_url}{invalid_child_key}",
                        "rel": "child"
                    },
                ],
            }),
            bucket_name=staging_bucket_name,
            key=any_safe_filename(),
    ) as root_s3_object, S3Object(
            file_object=json_dict_to_file_object(
                deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)),
            bucket_name=staging_bucket_name,
            key=valid_child_key,
    ) as valid_child_s3_object, S3Object(
            file_object=json_dict_to_file_object(invalid_stac_object),
            bucket_name=staging_bucket_name,
            key=invalid_child_key,
    ) as invalid_child_s3_object:

        # When
        lambda_handler(
            {
                DATASET_ID_KEY: dataset_id,
                VERSION_ID_KEY: version_id,
                METADATA_URL_KEY: root_s3_object.url,
            },
            any_lambda_context(),
        )

    hash_key = f"DATASET#{dataset_id}#VERSION#{version_id}"
    validation_results_model = validation_results_model_with_meta()
    with subtests.test(msg="Root validation results"):
        assert (validation_results_model.get(
            hash_key=hash_key,
            range_key=
            f"CHECK#{Check.JSON_SCHEMA.value}#URL#{root_s3_object.url}",
            consistent_read=True,
        ).result == ValidationResult.PASSED.value)

    with subtests.test(msg="Valid child validation results"):
        assert (validation_results_model.get(
            hash_key=hash_key,
            range_key=
            f"CHECK#{Check.JSON_SCHEMA.value}#URL#{valid_child_s3_object.url}",
            consistent_read=True,
        ).result == ValidationResult.PASSED.value)

    with subtests.test(msg="Invalid child validation results"):
        assert (validation_results_model.get(
            hash_key=hash_key,
            range_key=
            f"CHECK#{Check.JSON_SCHEMA.value}#URL#{invalid_child_s3_object.url}",
            consistent_read=True,
        ).result == ValidationResult.FAILED.value)