def should_report_invalid_json(
        validation_results_factory_mock: MagicMock) -> None:
    # Given
    metadata_url = any_s3_url()
    url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value="{")})
    validator = STACDatasetValidator(url_reader,
                                     validation_results_factory_mock)

    # When
    with raises(JSONDecodeError):
        validator.validate(metadata_url)

    # Then
    assert validation_results_factory_mock.mock_calls == [
        call.save(
            metadata_url,
            Check.JSON_PARSE,
            ValidationResult.FAILED,
            details={
                "message":
                "Expecting property name enclosed in double quotes:"
                " line 1 column 2 (char 1)"
            },
        ),
    ]
def should_collect_assets_from_validated_collection_metadata_files(
        subtests: SubTests) -> None:
    # Given one asset in another directory and one relative link
    base_url = any_s3_url()
    metadata_url = f"{base_url}/{any_safe_filename()}"
    stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    first_asset_url = f"{base_url}/{any_safe_filename()}/{any_safe_filename()}"
    first_asset_multihash = any_hex_multihash()
    second_asset_filename = any_safe_filename()
    second_asset_url = f"{base_url}/{second_asset_filename}"
    second_asset_multihash = any_hex_multihash()
    stac_object["assets"] = {
        any_asset_name(): {
            "href": first_asset_url,
            "file:checksum": first_asset_multihash
        },
        any_asset_name(): {
            "href": second_asset_filename,
            "file:checksum": second_asset_multihash,
        },
    }
    expected_assets = [
        {
            "multihash": first_asset_multihash,
            "url": first_asset_url
        },
        {
            "multihash": second_asset_multihash,
            "url": second_asset_url
        },
    ]
    expected_metadata = [{"url": metadata_url}]
    url_reader = MockJSONURLReader({metadata_url: stac_object})

    with patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        validator = STACDatasetValidator(url_reader,
                                         MockValidationResultFactory())

    # When
    validator.validate(metadata_url)

    # Then
    with subtests.test():
        assert _sort_assets(
            validator.dataset_assets) == _sort_assets(expected_assets)
    with subtests.test():
        assert validator.dataset_metadata == expected_metadata
Example #3
0
def should_log_staging_access_validation(validate_mock: MagicMock) -> None:
    metadata_url = any_s3_url()
    hash_key = (
        f"{DATASET_ID_PREFIX}{any_dataset_id()}"
        f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{any_dataset_version_id()}")

    expected_error = ClientError(
        {
            AWS_RESPONSE_ERROR_KEY: {
                AWS_RESPONSE_ERROR_CODE_KEY: "TEST",
                AWS_RESPONSE_ERROR_MESSAGE_KEY: "TEST",
            }
        },
        operation_name="get_object",
    )
    validate_mock.side_effect = expected_error

    url_reader = MockJSONURLReader(
        {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT})

    expected_message = dumps({
        SUCCESS_KEY: False,
        MESSAGE_KEY: str(expected_error)
    })

    with patch.object(LOGGER, "error") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).run(
            metadata_url, hash_key)

        logger_mock.assert_any_call(expected_message)
Example #4
0
def should_log_assets() -> None:
    base_url = any_s3_url()
    metadata_url = f"{base_url}/{any_safe_filename()}"
    stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    asset_url = f"{base_url}/{any_safe_filename()}"
    asset_multihash = any_hex_multihash()
    stac_object[STAC_ASSETS_KEY] = {
        any_asset_name(): {
            STAC_HREF_KEY: asset_url,
            STAC_FILE_CHECKSUM_KEY: asset_multihash,
        },
    }

    url_reader = MockJSONURLReader({metadata_url: stac_object})
    expected_message = dumps({
        PROCESSING_ASSET_ASSET_KEY: {
            PROCESSING_ASSET_URL_KEY: asset_url,
            PROCESSING_ASSET_MULTIHASH_KEY: asset_multihash,
        }
    })

    with patch.object(LOGGER, "debug") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(
            url_reader, MockValidationResultFactory()).validate(metadata_url)

        logger_mock.assert_any_call(expected_message)
Example #5
0
def should_only_validate_each_file_once() -> None:
    # Given multiple references to the same URL
    # Given relative and absolute URLs to the same file
    base_url = any_s3_url()
    root_url = f"{base_url}/{any_safe_filename()}"
    child_filename = any_safe_filename()
    child_url = f"{base_url}/{child_filename}"
    leaf_url = f"{base_url}/{any_safe_filename()}"

    root_stac_object = deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT)
    root_stac_object[STAC_LINKS_KEY] = [
        {STAC_HREF_KEY: child_url, "rel": "child"},
        {STAC_HREF_KEY: root_url, "rel": "root"},
        {STAC_HREF_KEY: root_url, "rel": "self"},
    ]
    child_stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    child_stac_object[STAC_LINKS_KEY] = [
        {STAC_HREF_KEY: leaf_url, "rel": "child"},
        {STAC_HREF_KEY: root_url, "rel": "root"},
        {STAC_HREF_KEY: child_filename, "rel": "self"},
    ]
    leaf_stac_object = deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT)
    leaf_stac_object[STAC_LINKS_KEY] = [
        {STAC_HREF_KEY: root_url, "rel": "root"},
        {STAC_HREF_KEY: leaf_url, "rel": "self"},
    ]
    url_reader = MockJSONURLReader(
        {root_url: root_stac_object, child_url: child_stac_object, leaf_url: leaf_stac_object},
        call_limit=3,
    )

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(root_url)

    assert url_reader.mock_calls == [call(root_url), call(child_url), call(leaf_url)]
def should_log_staging_access_validation(validate_mock: MagicMock) -> None:
    metadata_url = any_s3_url()
    hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}"

    expected_error = ClientError(
        {"Error": {
            "Code": "TEST",
            "Message": "TEST"
        }},
        operation_name="get_object")
    validate_mock.side_effect = expected_error

    url_reader = MockJSONURLReader(
        {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT})

    expected_message = dumps({
        "success": False,
        "message": str(expected_error)
    })

    with patch.object(LOGGER, "error") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).run(
            metadata_url, hash_key)

        logger_mock.assert_any_call(expected_message)
def should_log_assets() -> None:
    base_url = any_s3_url()
    metadata_url = f"{base_url}/{any_safe_filename()}"
    stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    asset_url = f"{base_url}/{any_safe_filename()}"
    asset_multihash = any_hex_multihash()
    stac_object["assets"] = {
        any_asset_name(): {
            "href": asset_url,
            "file:checksum": asset_multihash,
        },
    }

    url_reader = MockJSONURLReader({metadata_url: stac_object})
    expected_message = dumps(
        {"asset": {
            "url": asset_url,
            "multihash": asset_multihash
        }})

    with patch.object(LOGGER, "debug") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(
            url_reader, MockValidationResultFactory()).validate(metadata_url)

        logger_mock.assert_any_call(expected_message)
Example #8
0
def should_collect_assets_from_validated_item_metadata_files(subtests: SubTests) -> None:
    base_url = any_s3_url()
    metadata_url = f"{base_url}/{any_safe_filename()}"
    stac_object = deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT)
    first_asset_url = f"{base_url}/{any_safe_filename()}"
    first_asset_multihash = any_hex_multihash()
    second_asset_filename = any_safe_filename()
    second_asset_multihash = any_hex_multihash()
    stac_object[STAC_ASSETS_KEY] = {
        any_asset_name(): {
            STAC_HREF_KEY: first_asset_url,
            STAC_FILE_CHECKSUM_KEY: first_asset_multihash,
        },
        any_asset_name(): {
            STAC_HREF_KEY: second_asset_filename,
            STAC_FILE_CHECKSUM_KEY: second_asset_multihash,
        },
    }
    expected_assets = [
        {
            PROCESSING_ASSET_MULTIHASH_KEY: first_asset_multihash,
            PROCESSING_ASSET_URL_KEY: first_asset_url,
        },
        {
            PROCESSING_ASSET_MULTIHASH_KEY: second_asset_multihash,
            PROCESSING_ASSET_URL_KEY: f"{base_url}/{second_asset_filename}",
        },
    ]
    expected_metadata = [{PROCESSING_ASSET_URL_KEY: metadata_url}]
    url_reader = MockJSONURLReader({metadata_url: stac_object})

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        validator = STACDatasetValidator(url_reader, MockValidationResultFactory())

    validator.validate(metadata_url)

    with subtests.test():
        assert _sort_assets(validator.dataset_assets) == _sort_assets(expected_assets)
    with subtests.test():
        assert validator.dataset_metadata == expected_metadata
def should_report_duplicate_asset_names(
        validation_results_factory_mock: MagicMock) -> None:
    # Given
    asset_name = "name"
    metadata = (
        "{"
        '"assets": {'
        f'"{asset_name}": {{"href": "s3://bucket/foo", "file:checksum": ""}},'
        f'"{asset_name}": {{"href": "s3://bucket/bar", "file:checksum": ""}}'
        "},"
        '"description": "any description",'
        ' "extent": {'
        '"spatial": {"bbox": [[-180, -90, 180, 90]]},'
        ' "temporal": {"interval": [["2000-01-01T00:00:00+00:00", null]]}'
        "},"
        f' "id": "{any_dataset_id()}",'
        ' "license": "MIT",'
        ' "links": [],'
        f' "stac_version": "{STAC_VERSION}",'
        ' "type": "Collection"'
        "}")
    metadata_url = any_s3_url()
    sys.argv = [
        any_program_name(),
        f"--metadata-url={metadata_url}",
        f"--dataset-id={any_dataset_id()}",
        f"--version-id={any_dataset_version_id()}",
    ]

    url_reader = MockJSONURLReader(
        {metadata_url: StringIO(initial_value=metadata)})

    with patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        # When
        STACDatasetValidator(
            url_reader, validation_results_factory_mock).validate(metadata_url)

    # Then
    validation_results_factory_mock.save.assert_any_call(
        metadata_url,
        Check.DUPLICATE_OBJECT_KEY,
        ValidationResult.FAILED,
        details={
            "message":
            f"Found duplicate object name “{asset_name}” in “{metadata_url}”"
        },
    )
Example #10
0
def should_validate_metadata_files_recursively() -> None:
    base_url = any_s3_url()
    parent_url = f"{base_url}/{any_safe_filename()}"
    child_url = f"{base_url}/{any_safe_filename()}"

    stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)
    stac_object[STAC_LINKS_KEY].append({STAC_HREF_KEY: child_url, "rel": "child"})
    url_reader = MockJSONURLReader(
        {parent_url: stac_object, child_url: deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)}
    )

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(parent_url)

    assert url_reader.mock_calls == [call(parent_url), call(child_url)]
Example #11
0
def should_report_duplicate_asset_names(validation_results_factory_mock: MagicMock) -> None:
    # Given
    asset_name = "name"
    metadata = (
        "{"
        f'"{STAC_ASSETS_KEY}": {{'
        f'"{asset_name}": '
        f'{{"{STAC_HREF_KEY}": "{S3_URL_PREFIX}bucket/foo", "{STAC_FILE_CHECKSUM_KEY}": ""}},'
        f'"{asset_name}": '
        f'{{"{STAC_HREF_KEY}": "{S3_URL_PREFIX}bucket/bar", "{STAC_FILE_CHECKSUM_KEY}": ""}}'
        "},"
        f'"{STAC_DESCRIPTION_KEY}": "any description",'
        f' "{STAC_EXTENT_KEY}": {{'
        f'"{STAC_EXTENT_SPATIAL_KEY}": {{"{STAC_EXTENT_BBOX_KEY}": [[-180, -90, 180, 90]]}},'
        f' "{STAC_EXTENT_TEMPORAL_KEY}":'
        f' {{"{STAC_EXTENT_TEMPORAL_INTERVAL_KEY}": [["2000-01-01T00:00:00+00:00", null]]}}'
        "},"
        f' "{STAC_ID_KEY}": "{any_dataset_id()}",'
        f' "{STAC_LICENSE_KEY}": "MIT",'
        f' "{STAC_LINKS_KEY}": [],'
        f' "{STAC_VERSION_KEY}": "{STAC_VERSION}",'
        f' "{STAC_TYPE_KEY}": "{STAC_TYPE_COLLECTION}"'
        "}"
    )
    metadata_url = any_s3_url()
    sys.argv = [
        any_program_name(),
        f"--metadata-url={metadata_url}",
        f"--dataset-id={any_dataset_id()}",
        f"--version-id={any_dataset_version_id()}",
    ]

    url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value=metadata)})

    with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"):
        # When
        STACDatasetValidator(url_reader, validation_results_factory_mock).validate(metadata_url)

    # Then
    validation_results_factory_mock.save.assert_any_call(
        metadata_url,
        Check.DUPLICATE_OBJECT_KEY,
        ValidationResult.FAILED,
        details={MESSAGE_KEY: f"Found duplicate object name “{asset_name}” in “{metadata_url}”"},
    )
def should_log_non_s3_url_prefix_validation() -> None:
    metadata_url = any_https_url()
    hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}"
    url_reader = MockJSONURLReader(
        {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT})
    expected_message = dumps({
        "success":
        False,
        "message":
        f"URL doesn't start with “{S3_URL_PREFIX}”: “{metadata_url}”"
    })

    with patch.object(LOGGER, "error") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).run(
            metadata_url, hash_key)

        logger_mock.assert_any_call(expected_message)
def should_log_json_parse_validation(validate_mock: MagicMock) -> None:
    metadata_url = any_s3_url()
    hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}"

    url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value="{")})

    expected_error = JSONDecodeError(any_error_message(), "", 0)
    validate_mock.side_effect = expected_error

    expected_message = dumps({
        "success": False,
        "message": str(expected_error)
    })

    with patch.object(LOGGER, "error") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).run(
            metadata_url, hash_key)

        logger_mock.assert_any_call(expected_message)
Example #14
0
def should_log_schema_mismatch_validation(validate_mock: MagicMock) -> None:
    metadata_url = any_s3_url()
    hash_key = (
        f"{DATASET_ID_PREFIX}{any_dataset_id()}"
        f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{any_dataset_version_id()}")

    expected_error = ValidationError(any_error_message())
    validate_mock.side_effect = expected_error

    url_reader = MockJSONURLReader(
        {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT})

    expected_message = dumps({
        SUCCESS_KEY: False,
        MESSAGE_KEY: expected_error.message
    })

    with patch.object(LOGGER, "error") as logger_mock, patch(
            "backend.check_stac_metadata.utils.processing_assets_model_with_meta"
    ):
        STACDatasetValidator(url_reader, MockValidationResultFactory()).run(
            metadata_url, hash_key)

        logger_mock.assert_any_call(expected_message)