def should_report_invalid_json( validation_results_factory_mock: MagicMock) -> None: # Given metadata_url = any_s3_url() url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value="{")}) validator = STACDatasetValidator(url_reader, validation_results_factory_mock) # When with raises(JSONDecodeError): validator.validate(metadata_url) # Then assert validation_results_factory_mock.mock_calls == [ call.save( metadata_url, Check.JSON_PARSE, ValidationResult.FAILED, details={ "message": "Expecting property name enclosed in double quotes:" " line 1 column 2 (char 1)" }, ), ]
def should_collect_assets_from_validated_collection_metadata_files( subtests: SubTests) -> None: # Given one asset in another directory and one relative link base_url = any_s3_url() metadata_url = f"{base_url}/{any_safe_filename()}" stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT) first_asset_url = f"{base_url}/{any_safe_filename()}/{any_safe_filename()}" first_asset_multihash = any_hex_multihash() second_asset_filename = any_safe_filename() second_asset_url = f"{base_url}/{second_asset_filename}" second_asset_multihash = any_hex_multihash() stac_object["assets"] = { any_asset_name(): { "href": first_asset_url, "file:checksum": first_asset_multihash }, any_asset_name(): { "href": second_asset_filename, "file:checksum": second_asset_multihash, }, } expected_assets = [ { "multihash": first_asset_multihash, "url": first_asset_url }, { "multihash": second_asset_multihash, "url": second_asset_url }, ] expected_metadata = [{"url": metadata_url}] url_reader = MockJSONURLReader({metadata_url: stac_object}) with patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): validator = STACDatasetValidator(url_reader, MockValidationResultFactory()) # When validator.validate(metadata_url) # Then with subtests.test(): assert _sort_assets( validator.dataset_assets) == _sort_assets(expected_assets) with subtests.test(): assert validator.dataset_metadata == expected_metadata
def should_log_staging_access_validation(validate_mock: MagicMock) -> None: metadata_url = any_s3_url() hash_key = ( f"{DATASET_ID_PREFIX}{any_dataset_id()}" f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{any_dataset_version_id()}") expected_error = ClientError( { AWS_RESPONSE_ERROR_KEY: { AWS_RESPONSE_ERROR_CODE_KEY: "TEST", AWS_RESPONSE_ERROR_MESSAGE_KEY: "TEST", } }, operation_name="get_object", ) validate_mock.side_effect = expected_error url_reader = MockJSONURLReader( {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT}) expected_message = dumps({ SUCCESS_KEY: False, MESSAGE_KEY: str(expected_error) }) with patch.object(LOGGER, "error") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator(url_reader, MockValidationResultFactory()).run( metadata_url, hash_key) logger_mock.assert_any_call(expected_message)
def should_log_assets() -> None: base_url = any_s3_url() metadata_url = f"{base_url}/{any_safe_filename()}" stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT) asset_url = f"{base_url}/{any_safe_filename()}" asset_multihash = any_hex_multihash() stac_object[STAC_ASSETS_KEY] = { any_asset_name(): { STAC_HREF_KEY: asset_url, STAC_FILE_CHECKSUM_KEY: asset_multihash, }, } url_reader = MockJSONURLReader({metadata_url: stac_object}) expected_message = dumps({ PROCESSING_ASSET_ASSET_KEY: { PROCESSING_ASSET_URL_KEY: asset_url, PROCESSING_ASSET_MULTIHASH_KEY: asset_multihash, } }) with patch.object(LOGGER, "debug") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator( url_reader, MockValidationResultFactory()).validate(metadata_url) logger_mock.assert_any_call(expected_message)
def should_only_validate_each_file_once() -> None: # Given multiple references to the same URL # Given relative and absolute URLs to the same file base_url = any_s3_url() root_url = f"{base_url}/{any_safe_filename()}" child_filename = any_safe_filename() child_url = f"{base_url}/{child_filename}" leaf_url = f"{base_url}/{any_safe_filename()}" root_stac_object = deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT) root_stac_object[STAC_LINKS_KEY] = [ {STAC_HREF_KEY: child_url, "rel": "child"}, {STAC_HREF_KEY: root_url, "rel": "root"}, {STAC_HREF_KEY: root_url, "rel": "self"}, ] child_stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT) child_stac_object[STAC_LINKS_KEY] = [ {STAC_HREF_KEY: leaf_url, "rel": "child"}, {STAC_HREF_KEY: root_url, "rel": "root"}, {STAC_HREF_KEY: child_filename, "rel": "self"}, ] leaf_stac_object = deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT) leaf_stac_object[STAC_LINKS_KEY] = [ {STAC_HREF_KEY: root_url, "rel": "root"}, {STAC_HREF_KEY: leaf_url, "rel": "self"}, ] url_reader = MockJSONURLReader( {root_url: root_stac_object, child_url: child_stac_object, leaf_url: leaf_stac_object}, call_limit=3, ) with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"): STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(root_url) assert url_reader.mock_calls == [call(root_url), call(child_url), call(leaf_url)]
def should_log_staging_access_validation(validate_mock: MagicMock) -> None: metadata_url = any_s3_url() hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}" expected_error = ClientError( {"Error": { "Code": "TEST", "Message": "TEST" }}, operation_name="get_object") validate_mock.side_effect = expected_error url_reader = MockJSONURLReader( {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT}) expected_message = dumps({ "success": False, "message": str(expected_error) }) with patch.object(LOGGER, "error") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator(url_reader, MockValidationResultFactory()).run( metadata_url, hash_key) logger_mock.assert_any_call(expected_message)
def should_log_assets() -> None: base_url = any_s3_url() metadata_url = f"{base_url}/{any_safe_filename()}" stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT) asset_url = f"{base_url}/{any_safe_filename()}" asset_multihash = any_hex_multihash() stac_object["assets"] = { any_asset_name(): { "href": asset_url, "file:checksum": asset_multihash, }, } url_reader = MockJSONURLReader({metadata_url: stac_object}) expected_message = dumps( {"asset": { "url": asset_url, "multihash": asset_multihash }}) with patch.object(LOGGER, "debug") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator( url_reader, MockValidationResultFactory()).validate(metadata_url) logger_mock.assert_any_call(expected_message)
def should_collect_assets_from_validated_item_metadata_files(subtests: SubTests) -> None: base_url = any_s3_url() metadata_url = f"{base_url}/{any_safe_filename()}" stac_object = deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT) first_asset_url = f"{base_url}/{any_safe_filename()}" first_asset_multihash = any_hex_multihash() second_asset_filename = any_safe_filename() second_asset_multihash = any_hex_multihash() stac_object[STAC_ASSETS_KEY] = { any_asset_name(): { STAC_HREF_KEY: first_asset_url, STAC_FILE_CHECKSUM_KEY: first_asset_multihash, }, any_asset_name(): { STAC_HREF_KEY: second_asset_filename, STAC_FILE_CHECKSUM_KEY: second_asset_multihash, }, } expected_assets = [ { PROCESSING_ASSET_MULTIHASH_KEY: first_asset_multihash, PROCESSING_ASSET_URL_KEY: first_asset_url, }, { PROCESSING_ASSET_MULTIHASH_KEY: second_asset_multihash, PROCESSING_ASSET_URL_KEY: f"{base_url}/{second_asset_filename}", }, ] expected_metadata = [{PROCESSING_ASSET_URL_KEY: metadata_url}] url_reader = MockJSONURLReader({metadata_url: stac_object}) with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"): validator = STACDatasetValidator(url_reader, MockValidationResultFactory()) validator.validate(metadata_url) with subtests.test(): assert _sort_assets(validator.dataset_assets) == _sort_assets(expected_assets) with subtests.test(): assert validator.dataset_metadata == expected_metadata
def should_report_duplicate_asset_names( validation_results_factory_mock: MagicMock) -> None: # Given asset_name = "name" metadata = ( "{" '"assets": {' f'"{asset_name}": {{"href": "s3://bucket/foo", "file:checksum": ""}},' f'"{asset_name}": {{"href": "s3://bucket/bar", "file:checksum": ""}}' "}," '"description": "any description",' ' "extent": {' '"spatial": {"bbox": [[-180, -90, 180, 90]]},' ' "temporal": {"interval": [["2000-01-01T00:00:00+00:00", null]]}' "}," f' "id": "{any_dataset_id()}",' ' "license": "MIT",' ' "links": [],' f' "stac_version": "{STAC_VERSION}",' ' "type": "Collection"' "}") metadata_url = any_s3_url() sys.argv = [ any_program_name(), f"--metadata-url={metadata_url}", f"--dataset-id={any_dataset_id()}", f"--version-id={any_dataset_version_id()}", ] url_reader = MockJSONURLReader( {metadata_url: StringIO(initial_value=metadata)}) with patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): # When STACDatasetValidator( url_reader, validation_results_factory_mock).validate(metadata_url) # Then validation_results_factory_mock.save.assert_any_call( metadata_url, Check.DUPLICATE_OBJECT_KEY, ValidationResult.FAILED, details={ "message": f"Found duplicate object name “{asset_name}” in “{metadata_url}”" }, )
def should_validate_metadata_files_recursively() -> None: base_url = any_s3_url() parent_url = f"{base_url}/{any_safe_filename()}" child_url = f"{base_url}/{any_safe_filename()}" stac_object = deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT) stac_object[STAC_LINKS_KEY].append({STAC_HREF_KEY: child_url, "rel": "child"}) url_reader = MockJSONURLReader( {parent_url: stac_object, child_url: deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT)} ) with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"): STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(parent_url) assert url_reader.mock_calls == [call(parent_url), call(child_url)]
def should_report_duplicate_asset_names(validation_results_factory_mock: MagicMock) -> None: # Given asset_name = "name" metadata = ( "{" f'"{STAC_ASSETS_KEY}": {{' f'"{asset_name}": ' f'{{"{STAC_HREF_KEY}": "{S3_URL_PREFIX}bucket/foo", "{STAC_FILE_CHECKSUM_KEY}": ""}},' f'"{asset_name}": ' f'{{"{STAC_HREF_KEY}": "{S3_URL_PREFIX}bucket/bar", "{STAC_FILE_CHECKSUM_KEY}": ""}}' "}," f'"{STAC_DESCRIPTION_KEY}": "any description",' f' "{STAC_EXTENT_KEY}": {{' f'"{STAC_EXTENT_SPATIAL_KEY}": {{"{STAC_EXTENT_BBOX_KEY}": [[-180, -90, 180, 90]]}},' f' "{STAC_EXTENT_TEMPORAL_KEY}":' f' {{"{STAC_EXTENT_TEMPORAL_INTERVAL_KEY}": [["2000-01-01T00:00:00+00:00", null]]}}' "}," f' "{STAC_ID_KEY}": "{any_dataset_id()}",' f' "{STAC_LICENSE_KEY}": "MIT",' f' "{STAC_LINKS_KEY}": [],' f' "{STAC_VERSION_KEY}": "{STAC_VERSION}",' f' "{STAC_TYPE_KEY}": "{STAC_TYPE_COLLECTION}"' "}" ) metadata_url = any_s3_url() sys.argv = [ any_program_name(), f"--metadata-url={metadata_url}", f"--dataset-id={any_dataset_id()}", f"--version-id={any_dataset_version_id()}", ] url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value=metadata)}) with patch("backend.check_stac_metadata.utils.processing_assets_model_with_meta"): # When STACDatasetValidator(url_reader, validation_results_factory_mock).validate(metadata_url) # Then validation_results_factory_mock.save.assert_any_call( metadata_url, Check.DUPLICATE_OBJECT_KEY, ValidationResult.FAILED, details={MESSAGE_KEY: f"Found duplicate object name “{asset_name}” in “{metadata_url}”"}, )
def should_log_non_s3_url_prefix_validation() -> None: metadata_url = any_https_url() hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}" url_reader = MockJSONURLReader( {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT}) expected_message = dumps({ "success": False, "message": f"URL doesn't start with “{S3_URL_PREFIX}”: “{metadata_url}”" }) with patch.object(LOGGER, "error") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator(url_reader, MockValidationResultFactory()).run( metadata_url, hash_key) logger_mock.assert_any_call(expected_message)
def should_log_json_parse_validation(validate_mock: MagicMock) -> None: metadata_url = any_s3_url() hash_key = f"DATASET#{any_dataset_id()}#VERSION#{any_dataset_version_id()}" url_reader = MockJSONURLReader({metadata_url: StringIO(initial_value="{")}) expected_error = JSONDecodeError(any_error_message(), "", 0) validate_mock.side_effect = expected_error expected_message = dumps({ "success": False, "message": str(expected_error) }) with patch.object(LOGGER, "error") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator(url_reader, MockValidationResultFactory()).run( metadata_url, hash_key) logger_mock.assert_any_call(expected_message)
def should_log_schema_mismatch_validation(validate_mock: MagicMock) -> None: metadata_url = any_s3_url() hash_key = ( f"{DATASET_ID_PREFIX}{any_dataset_id()}" f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{any_dataset_version_id()}") expected_error = ValidationError(any_error_message()) validate_mock.side_effect = expected_error url_reader = MockJSONURLReader( {metadata_url: MINIMAL_VALID_STAC_COLLECTION_OBJECT}) expected_message = dumps({ SUCCESS_KEY: False, MESSAGE_KEY: expected_error.message }) with patch.object(LOGGER, "error") as logger_mock, patch( "backend.check_stac_metadata.utils.processing_assets_model_with_meta" ): STACDatasetValidator(url_reader, MockValidationResultFactory()).run( metadata_url, hash_key) logger_mock.assert_any_call(expected_message)