async def test_datasets(async_client): """Basic test to check if empty data api response as expected.""" dataset = "test" response = await async_client.get("/datasets") assert response.status_code == 200 assert response.json() == {"data": [], "status": "success"} response = await async_client.put(f"/dataset/{dataset}", json=payload) assert response.status_code == 201 assert response.json()["data"]["metadata"] == payload["metadata"] response = await async_client.get("/datasets") assert response.status_code == 200 assert len(response.json()["data"]) == 1 assert response.json()["data"][0]["metadata"] == payload["metadata"] response = await async_client.get(f"/dataset/{dataset}") assert response.status_code == 200 assert response.json()["data"]["metadata"] == payload["metadata"] async with ContextEngine("READ"): rows = await db.all( f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';" ) assert len(rows) == 1 new_payload = {"metadata": {"title": "New Title"}} response = await async_client.patch(f"/dataset/{dataset}", json=new_payload) assert response.status_code == 200 assert response.json()["data"]["metadata"] != payload["metadata"] assert response.json()["data"]["metadata"]["title"] == "New Title" assert response.json()["data"]["metadata"]["subtitle"] == "string" response = await async_client.delete(f"/dataset/{dataset}") assert response.status_code == 200 assert response.json()["data"]["dataset"] == "test" async with ContextEngine("READ"): rows = await db.all( f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';" ) assert len(rows) == 0 response = await async_client.get("/datasets") assert response.status_code == 200 assert response.json() == {"data": [], "status": "success"}
async def _check_dynamic_vector_tile_cache_status(dataset, version): rows = await assets.get_assets(dataset, version) asset_row = rows[0] asset_row = await _update_asset_field_metadata( asset_row.dataset, asset_row.version, asset_row.asset_id, ) # SHP files have one additional attribute (fid) if asset_row.version == "v1.1.0": assert len(asset_row.metadata["fields"]) == 10 else: assert len(asset_row.metadata["fields"]) == 9 # We need the asset status in saved to create dynamic vector tile cache async with ContextEngine("WRITE"): asset_row = await assets.update_asset(asset_row.asset_id, status=AssetStatus.saved) await _register_dynamic_vector_tile_cache(asset_row.dataset, asset_row.version, asset_row.metadata) rows = await assets.get_assets(dataset, version) v = await versions.get_version(dataset, version) print(v.change_log) assert len(rows) == 2 assert rows[0].asset_type == AssetType.database_table assert rows[1].asset_type == AssetType.dynamic_vector_tile_cache assert rows[1].status == AssetStatus.saved
async def test_update_dataset__is_downloadable(generic_vector_source_version): dataset, version, _ = generic_vector_source_version dataset_row = await get_dataset(dataset) version_row = await get_version(dataset, version) asset_row = await get_default_asset(dataset, version) # Check if default value is correctly populated assert dataset_row.is_downloadable is True assert version_row.is_downloadable is True assert asset_row.is_downloadable is True # This should update the downstream versions and assets only async with ContextEngine("WRITE"): await update_dataset(dataset, **{ "is_downloadable": False, "metadata": {} }) dataset_row = await get_dataset(dataset) version_row = await get_version(dataset, version) asset_row = await get_default_asset(dataset, version) assert dataset_row.is_downloadable is False assert version_row.is_downloadable is False assert asset_row.is_downloadable is False
async def test_batch_failure(): dataset = "test" version = "v1.1.1" creation_options = { "source_type": "vector", "source_uri": [f"s3://{BUCKET}/{GEOJSON_NAME}"], "source_driver": "GeoJSON", "zipped": False, } async with ContextEngine("WRITE"): await datasets.create_dataset(dataset) await versions.create_version(dataset, version) new_asset = await assets.create_asset( dataset, version, asset_type="Database table", asset_uri="s3://path/to/file", creation_options=creation_options, ) job_env = writer_secrets + [ {"name": "STATUS_URL", "value": f"http://app_test:{PORT}/tasks"} ] callback = callback_constructor(new_asset.asset_id) # Can't have two parents with same name job1 = PostgresqlClientJob( dataset=dataset, job_name="job1", command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"], environment=job_env, callback=callback, ) job2 = PostgresqlClientJob( dataset=dataset, job_name="job1", command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"], environment=job_env, callback=callback, ) job3 = PostgresqlClientJob( dataset=dataset, job_name="job3", command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"], environment=job_env, callback=callback, parents=[job1.job_name, job2.job_name], ) message = "" try: await execute([job1, job2, job3]) except TooManyRetriesError as e: message = str(e) assert message == ""
async def test_create_api_key_missing_value( user_id, alias, organization, email, domains ): async with ContextEngine("WRITE"): with pytest.raises(asyncpg.exceptions.NotNullViolationError): await create_api_key( user_id, alias, organization, email, domains, never_expires=False )
async def test_create_api_key_domains_not_list( user_id, alias, organization, email, domains ): async with ContextEngine("WRITE"): with pytest.raises(AssertionError): await create_api_key( user_id, alias, organization, email, domains, never_expires=False )
async def test_create_api_key_unique_constraint( user_id, alias, organization, email, domains ): """We should be able to submit the same payload twice.""" async with ContextEngine("WRITE"): await create_api_key( user_id, alias, organization, email, domains, never_expires=False ) with pytest.raises(asyncpg.exceptions.UniqueViolationError): await create_api_key( user_id, alias, organization, email, domains, never_expires=False )
async def create_version(dataset, version, input_data) -> None: # Create dataset and version records async with ContextEngine("WRITE"): await versions.create_version(dataset, version, **input_data) # Make sure everything we need is in place # To start off, version should be in status "pending" # and changelog should be an empty list # and dataset schema should exist row = await versions.get_version(dataset, version) assert row.status == "pending" assert row.change_log == []
async def create_asset(dataset, version, asset_type, asset_uri, input_data) -> AssetORM: # Create dataset and version records await create_dataset(dataset) await create_version(dataset, version, input_data) async with ContextEngine("WRITE"): new_asset = await assets.create_asset( dataset, version, asset_type=asset_type, asset_uri=asset_uri, ) return new_asset
async def test_get_api_key(user_id, alias, organization, email, domains): async with ContextEngine("WRITE"): create_row: ORMApiKey = await create_api_key( user_id, alias, organization, email, domains, never_expires=False ) get_row: ORMApiKey = await get_api_key(create_row.api_key) assert create_row.api_key == get_row.api_key assert create_row.alias == get_row.alias assert create_row.email == get_row.email assert create_row.organization == get_row.organization assert create_row.expires_on == get_row.expires_on assert create_row.created_on == get_row.created_on assert create_row.updated_on == get_row.updated_on
async def create_dataset(dataset) -> None: # Create dataset record and dataset schema async with ContextEngine("WRITE"): await datasets.create_dataset(dataset) await db.status(CreateSchema(dataset)) await db.status( f"GRANT USAGE ON SCHEMA {dataset} TO {READER_USERNAME};") await db.status( f"ALTER DEFAULT PRIVILEGES IN SCHEMA {dataset} GRANT SELECT ON TABLES TO {READER_USERNAME};" ) row = await datasets.get_dataset(dataset) assert row.dataset == dataset assert dataset == await db.scalar( f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';" )
async def test_create_api_key(user_id, alias, organization, email, domains): async with ContextEngine("WRITE"): row: ORMApiKey = await create_api_key( user_id, alias, organization, email, domains, never_expires=False ) assert isinstance(row.api_key, UUID) assert row.user_id == user_id assert row.alias == alias assert row.organization == organization assert row.email == email assert row.domains == domains assert (row.expires_on - datetime.now()).total_seconds() == pytest.approx( timedelta(days=365).total_seconds(), 0.1 ) assert isinstance(row.created_on, datetime) assert isinstance(row.updated_on, datetime)
async def test_create_api_key_empty_domains( user_id, alias, organization, email, domains ): async with ContextEngine("WRITE"): row: ORMApiKey = await create_api_key( user_id, alias, organization, email, domains=[], never_expires=True ) assert isinstance(row.api_key, UUID) assert row.user_id == user_id assert row.alias == alias assert row.organization == organization assert row.email == email assert row.domains == [] assert row.expires_on is None assert isinstance(row.created_on, datetime) assert isinstance(row.updated_on, datetime)
async def _create_vector_source_assets(dataset_name, version_name): # TODO: we currently only do the bare minimum here. # still need to add gfw columns # and check back in all task so that asset and versions are correctly set to saved from app.application import db with open(f"{os.path.dirname(__file__)}/fixtures/geojson/test.geojson" ) as src: geojson = json.load(src) async with ContextEngine("WRITE"): await db.all( f"""CREATE TABLE "{dataset_name}"."{version_name}" (fid integer, geom geometry);""" ) await db.all( f"""INSERT INTO "{dataset_name}"."{version_name}" (fid, geom) SELECT 1, ST_GeomFromGeoJSON('{json.dumps(geojson["features"][0]["geometry"])}');""" )
async def test_delete_api_key(user_id, alias, organization, email, domains): async with ContextEngine("WRITE"): create_row: ORMApiKey = await create_api_key( user_id, alias, organization, email, domains, never_expires=False ) delete_row: ORMApiKey = await delete_api_key(create_row.api_key) with pytest.raises(RecordNotFoundError): await get_api_key(create_row.api_key) assert create_row.api_key == delete_row.api_key assert create_row.alias == delete_row.alias assert create_row.email == delete_row.email assert create_row.organization == delete_row.organization assert create_row.expires_on == delete_row.expires_on assert create_row.created_on == delete_row.created_on assert create_row.updated_on == delete_row.updated_on
async def test_latest_versions(): """Test if trigger function on versions table work It is suppose to reset is_latest field to False for all versions of a dataset Once a version's is_latest field is set to True Get Latest Version function should always return the latest version number.""" dataset_name = "test" # Add a dataset async with ContextEngine("WRITE"): await create_dataset(dataset_name) await create_version(dataset_name, "v1.1.1", is_latest=True) await create_version(dataset_name, "v1.1.2", is_latest=True) latest = await get_latest_version(dataset_name) first_row = await get_version(dataset_name, "v1.1.1") second_row = await get_version(dataset_name, "v1.1.2") assert first_row.is_latest is False assert second_row.is_latest is True assert latest == "v1.1.2"
async def test_delete_database_table(): dataset = "test" version = "table" async with ContextEngine("WRITE"): # create schema and stable await db.all(f"CREATE SCHEMA {dataset};") await db.all(f"CREATE TABLE {dataset}.{version} (col1 text);") rows = await db.all( f"select * from pg_tables where schemaname='{dataset}';") assert len(rows) == 1 # test if function drops table await delete_database_table_asset(dataset, version) rows = await db.all( f"select * from pg_tables where schemaname='{dataset}';") assert len(rows) == 0 # clean up await db.all(f"DROP SCHEMA {dataset};")
async def test_get_api_key_from_user(user_id, alias, organization, email, domains): api_keys1 = list() api_keys2 = list() async with ContextEngine("WRITE"): row: ORMApiKey = await create_api_key( user_id, alias, organization, email, domains, never_expires=False ) api_keys1.append(row.api_key) row = await create_api_key( user_id, str(uuid.uuid4()), organization, email, domains, never_expires=False, ) api_keys1.append(row.api_key) new_user_id = str(uuid.uuid4()) row = await create_api_key( new_user_id, alias, organization, email, domains, never_expires=False ) api_keys2.append(row.api_key) rows = await get_api_keys_from_user(user_id) assert len(rows) == len(api_keys1) for row in rows: assert row.api_key in api_keys1 rows = await get_api_keys_from_user(new_user_id) assert len(rows) == len(api_keys2) for row in rows: assert row.api_key in api_keys2
async def test_assets(): """Testing all CRUD operations on assets in one go.""" dataset_name = "test" version_name = "v1.1.1" # Add a dataset async with ContextEngine("WRITE"): new_dataset = await create_dataset(dataset_name) new_version = await create_version(dataset_name, version_name) assert new_dataset.dataset == dataset_name assert new_version.dataset == dataset_name assert new_version.version == version_name # There should be no assert for current version # This will throw an error b/c when initialized correctly, # there will be always a default asset result = "" try: await get_assets(dataset_name, version_name) except RecordNotFoundError as e: result = str(e) assert ( result == f"No assets for version with name {dataset_name}.{version_name} found") # Writing to DB using context engine with "READ" shouldn't work async with ContextEngine("READ"): result = "" try: await create_asset( dataset_name, version_name, asset_type="Database table", asset_uri="s3://path/to/file", ) except asyncpg.exceptions.InsufficientPrivilegeError as e: result = str(e) assert result == "permission denied for table assets" # Using context engine with "WRITE" should work async with ContextEngine("WRITE"): new_row = await create_asset( dataset_name, version_name, asset_type="Database table", asset_uri="s3://path/to/file", ) assert isinstance(new_row.asset_id, UUID) assert new_row.dataset == dataset_name assert new_row.version == version_name assert new_row.asset_type == "Database table" assert new_row.asset_uri == "s3://path/to/file" assert new_row.status == "pending" assert new_row.is_managed is True assert new_row.creation_options == {} assert new_row.metadata == {} assert new_row.change_log == [] # This shouldn't work a second time async with ContextEngine("WRITE"): result = "" try: await create_asset( dataset_name, version_name, asset_type="Database table", asset_uri="s3://path/to/file", ) except RecordAlreadyExistsError as e: result = str(e) assert result == ( "Cannot create asset of type Database table. " "Asset uri must be unique. An asset with uri s3://path/to/file already exists" ) # There should be an entry now rows = await get_assets(dataset_name, version_name) assert isinstance(rows, list) assert len(rows) == 1 assert rows[0].dataset == dataset_name assert rows[0].version == version_name assert isinstance(rows[0].asset_id, UUID) asset_id = rows[0].asset_id # There should be an entry now rows = await get_all_assets() assert isinstance(rows, list) assert len(rows) == 1 assert rows[0].dataset == dataset_name assert rows[0].version == version_name # There should be an entry now rows = await get_assets_by_type("Database table") assert isinstance(rows, list) assert len(rows) == 1 assert rows[0].dataset == dataset_name assert rows[0].version == version_name # There should be no such entry rows = await get_assets_by_type("Vector tile cache") assert isinstance(rows, list) assert len(rows) == 0 # It should be possible to access the asset by asset id row = await get_asset(asset_id) assert row.dataset == dataset_name assert row.version == version_name # But only if the asset exists result = "" _asset_id = uuid4() try: await get_asset(_asset_id) except RecordNotFoundError as e: result = str(e) assert result == f"Could not find requested asset {_asset_id}" # It should be possible to update a dataset using a context engine metadata = DatabaseTableMetadata( title="Test Title", tags=["tag1", "tag2"], ) logs = ChangeLog(date_time=datetime.now(), status="pending", message="all good") async with ContextEngine("WRITE"): row = await update_asset( asset_id, metadata=metadata.dict(by_alias=True), change_log=[logs.dict(by_alias=True)], ) assert row.metadata["title"] == "Test Title" assert row.metadata["tags"] == ["tag1", "tag2"] assert row.change_log[0]["date_time"] == json.loads( logs.json())["date_time"] assert row.change_log[0]["status"] == logs.dict(by_alias=True)["status"] assert row.change_log[0]["message"] == logs.dict(by_alias=True)["message"] # When deleting a dataset, method should return the deleted object async with ContextEngine("WRITE"): row = await delete_asset(asset_id) assert row.dataset == dataset_name assert row.version == version_name # After deleting the dataset, there should be an empty DB rows = await get_all_assets() assert isinstance(rows, list) assert len(rows) == 0
async def test_vector_source_asset(batch_client, async_client): _, logs = batch_client ############################ # Setup test ############################ dataset = "test" sources = (SHP_NAME, GEOJSON_NAME) for i, source in enumerate(sources): version = f"v1.1.{i}" input_data = { "creation_options": { "source_type": "vector", "source_uri": [f"s3://{BUCKET}/{source}"], "source_driver": "GeoJSON", "create_dynamic_vector_tile_cache": True, }, "metadata": {}, } # we only need to create the dataset once if i > 0: skip_dataset = True else: skip_dataset = False asset = await create_default_asset( dataset, version, version_payload=input_data, async_client=async_client, logs=logs, execute_batch_jobs=True, skip_dataset=skip_dataset, ) asset_id = asset["asset_id"] await check_version_status(dataset, version, 3) await check_asset_status(dataset, version, 1) await check_task_status(asset_id, 7, "inherit_from_geostore") # There should be a table called "test"."v1.1.1" with one row async with ContextEngine("READ"): count = await db.scalar( db.text(f'SELECT count(*) FROM {dataset}."{version}"') ) assert count == 1 # The geometry should also be accessible via geostore async with ContextEngine("READ"): rows: List[Geostore] = await Geostore.query.gino.all() assert len(rows) == 1 + i assert rows[0].gfw_geostore_id == UUID("1b368160-caf8-2bd7-819a-ad4949361f02") await check_dynamic_vector_tile_cache_status(dataset, version) # Queries response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select count(*) from mytable;" ) assert response.status_code == 200 assert len(response.json()["data"]) == 1 assert response.json()["data"][0]["count"] == 1 with open(GEOJSON_PATH, "r") as geojson: raw_geom = json.load(geojson)["features"][0]["geometry"] geom = Geometry(type=raw_geom["type"], coordinates=raw_geom["coordinates"]) geostore = GeostoreCommon( geojson=geom, geostore_id="17076d5ea9f214a5bdb68cc40433addb", area__ha=214324, bbox=[0, 0, 10, 10], ) with patch( "app.utils.rw_api.get_geostore", return_value=geostore, ): response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=SELECT count(*) FROM mytable&geostore_id=17076d5ea9f214a5bdb68cc40433addb&geostore_origin=rw" ) # print(response.json()) assert response.status_code == 200 assert len(response.json()["data"]) == 1 assert response.json()["data"][0]["count"] == 1 with open(GEOJSON_PATH2, "r") as geojson: raw_geom = json.load(geojson)["features"][0]["geometry"] geom = Geometry(type=raw_geom["type"], coordinates=raw_geom["coordinates"]) geostore = GeostoreCommon( geojson=geom, geostore_id="17076d5ea9f214a5bdb68cc40433addb", area__ha=214324, bbox=[0, 0, 10, 10], ) with patch( "app.utils.rw_api.get_geostore", return_value=geostore, ): response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=SELECT count(*) FROM mytable&geostore_id=17076d5ea9f214a5bdb68cc40433addb&geostore_origin=rw" ) # print(response.json()) assert response.status_code == 200 assert len(response.json()["data"]) == 1 assert response.json()["data"][0]["count"] == 0 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select current_catalog from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select version() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select has_any_column_privilege() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select format_type() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select col_description() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select txid_current() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select current_setting() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select pg_cancel_backend() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select brin_summarize_new_values() from mytable;" ) assert response.status_code == 400 response = await async_client.get( f"/dataset/{dataset}/{version}/query?sql=select doesnotexist() from mytable;" ) assert response.status_code == 400 # Downloads response = await async_client.get( f"/dataset/{dataset}/{version}/download/csv?sql=select count(*) from mytable;" ) assert response.status_code == 200 assert response.text == '"count"\r\n1\r\n' # Stats # TODO: We currently don't compute stats, will need update this test once feature is available response = await async_client.get(f"/dataset/{dataset}/{version}/stats") print(response.json()) assert response.status_code == 200 assert response.json()["data"] is None # Fields response = await async_client.get(f"/dataset/{dataset}/{version}/fields") assert response.status_code == 200 if i == 0: assert response.json()["data"] == [ { "field_name": "gfw_fid", "field_alias": "gfw_fid", "field_description": None, "field_type": "integer", "is_feature_info": True, "is_filter": True, }, { "field_name": "fid", "field_alias": "fid", "field_description": None, "field_type": "numeric", "is_feature_info": True, "is_filter": True, }, { "field_name": "geom", "field_alias": "geom", "field_description": None, "field_type": "geometry", "is_feature_info": False, "is_filter": False, }, { "field_name": "geom_wm", "field_alias": "geom_wm", "field_description": None, "field_type": "geometry", "is_feature_info": False, "is_filter": False, }, { "field_name": "gfw_area__ha", "field_alias": "gfw_area__ha", "field_description": None, "field_type": "numeric", "is_feature_info": True, "is_filter": True, }, { "field_name": "gfw_geostore_id", "field_alias": "gfw_geostore_id", "field_description": None, "field_type": "uuid", "is_feature_info": True, "is_filter": True, }, { "field_name": "gfw_geojson", "field_alias": "gfw_geojson", "field_description": None, "field_type": "text", "is_feature_info": False, "is_filter": False, }, { "field_name": "gfw_bbox", "field_alias": "gfw_bbox", "field_description": None, "field_type": "ARRAY", "is_feature_info": False, "is_filter": False, }, { "field_name": "created_on", "field_alias": "created_on", "field_description": None, "field_type": "timestamp without time zone", "is_feature_info": False, "is_filter": False, }, { "field_name": "updated_on", "field_alias": "updated_on", "field_description": None, "field_type": "timestamp without time zone", "is_feature_info": False, "is_filter": False, }, ] else: # JSON file does not have fid field assert response.json()["data"] == [ { "field_name": "gfw_fid", "field_alias": "gfw_fid", "field_description": None, "field_type": "integer", "is_feature_info": True, "is_filter": True, }, { "field_name": "geom", "field_alias": "geom", "field_description": None, "field_type": "geometry", "is_feature_info": False, "is_filter": False, }, { "field_name": "geom_wm", "field_alias": "geom_wm", "field_description": None, "field_type": "geometry", "is_feature_info": False, "is_filter": False, }, { "field_name": "gfw_area__ha", "field_alias": "gfw_area__ha", "field_description": None, "field_type": "numeric", "is_feature_info": True, "is_filter": True, }, { "field_name": "gfw_geostore_id", "field_alias": "gfw_geostore_id", "field_description": None, "field_type": "uuid", "is_feature_info": True, "is_filter": True, }, { "field_name": "gfw_geojson", "field_alias": "gfw_geojson", "field_description": None, "field_type": "text", "is_feature_info": False, "is_filter": False, }, { "field_name": "gfw_bbox", "field_alias": "gfw_bbox", "field_description": None, "field_type": "ARRAY", "is_feature_info": False, "is_filter": False, }, { "field_name": "created_on", "field_alias": "created_on", "field_description": None, "field_type": "timestamp without time zone", "is_feature_info": False, "is_filter": False, }, { "field_name": "updated_on", "field_alias": "updated_on", "field_description": None, "field_type": "timestamp without time zone", "is_feature_info": False, "is_filter": False, }, ] httpx.delete(f"http://localhost:{PORT}") response = await async_client.get(f"/asset/{asset_id}") assert response.status_code == 200 response = await async_client.get("/dataset/different/v1.1.1/assets") assert response.status_code == 404 response = await async_client.delete(f"/asset/{asset_id}") assert response.status_code == 409 print(response.json()) assert ( response.json()["message"] == "Deletion failed. You cannot delete a default asset. To delete a default asset you must delete the parent version." )
async def test_features(async_client, batch_client): _, logs = batch_client ############################ # Setup test ############################ dataset = "table_test" version = "v202002.1" # define partition schema partition_schema = list() years = range(2018, 2021) for year in years: for week in range(1, 54): try: name = f"y{year}_w{week:02}" start = pendulum.parse(f"{year}-W{week:02}").to_date_string() end = pendulum.parse(f"{year}-W{week:02}").add( days=7).to_date_string() partition_schema.append({ "partition_suffix": name, "start_value": start, "end_value": end }) except ParserError: # Year has only 52 weeks pass input_data = { "creation_options": { "source_type": "table", "source_uri": [f"s3://{BUCKET}/{TSV_NAME}"], "create_dynamic_vector_tile_cache": True, "source_driver": "text", "delimiter": "\t", "has_header": True, "latitude": "latitude", "longitude": "longitude", "cluster": { "index_type": "gist", "column_names": ["geom_wm"] }, "partitions": { "partition_type": "range", "partition_column": "alert__date", "partition_schema": partition_schema, }, "indices": [ { "index_type": "gist", "column_names": ["geom"] }, { "index_type": "gist", "column_names": ["geom_wm"] }, { "index_type": "btree", "column_names": ["alert__date"] }, ], "table_schema": [ { "field_name": "rspo_oil_palm__certification_status", "field_type": "text", }, { "field_name": "per_forest_concession__type", "field_type": "text" }, { "field_name": "idn_forest_area__type", "field_type": "text" }, { "field_name": "alert__count", "field_type": "integer" }, { "field_name": "adm1", "field_type": "integer" }, { "field_name": "adm2", "field_type": "integer" }, ], }, "metadata": {}, } # Create default asset in mocked Batch asset = await create_default_asset( dataset, version, version_payload=input_data, async_client=async_client, logs=logs, execute_batch_jobs=True, ) asset_id = asset["asset_id"] response = await async_client.get(f"/asset/{asset_id}") assert response.json()["data"]["status"] == "saved" ######################## # Test features endpoint ######################## async with ContextEngine("READ"): row = await db.scalar( f"""SELECT COUNT(*) FROM "{dataset}"."{version}" """) print(row) # Exact match, z > 9 (though see FIXME in app/routes/features/features.py) resp = await async_client.get( f"/dataset/{dataset}/{version}/features?lat=4.42813&lng=17.97655&z=10") print(resp.json()) assert resp.status_code == 200 assert len(resp.json()["data"]) == 1 assert resp.json()["data"][0]["iso"] == "CAF" # Nearby match resp = await async_client.get( f"/dataset/{dataset}/{version}/features?lat=9.40645&lng=-3.3681&z=9") assert resp.status_code == 200 assert len(resp.json()["data"]) == 1 assert resp.json()["data"][0]["iso"] == "CIV" # No match resp = await async_client.get( f"/dataset/{dataset}/{version}/features?lat=10&lng=-10&z=22") assert resp.status_code == 200 assert len(resp.json()["data"]) == 0 # Invalid latitude, longitude, or zoom level # Check all the constraints at once, why not? expected_messages = [ { "loc": ["query", "lat"], "msg": "ensure this value is less than or equal to 90", "type": "value_error.number.not_le", "ctx": { "limit_value": 90 }, }, { "loc": ["query", "lng"], "msg": "ensure this value is less than or equal to 180", "type": "value_error.number.not_le", "ctx": { "limit_value": 180 }, }, { "loc": ["query", "z"], "msg": "ensure this value is less than or equal to 22", "type": "value_error.number.not_le", "ctx": { "limit_value": 22 }, }, ] resp = await async_client.get( f"/dataset/{dataset}/{version}/features?lat=360&lng=360&z=25") assert resp.status_code == 422 assert resp.json()["status"] == "failed" assert set([ json.dumps(msg, sort_keys=True) for msg in resp.json()["message"] ]) == set(json.dumps(msg, sort_keys=True) for msg in expected_messages) # Invalid latitude, longitude, or zoom level, opposite limits # Check all the constraints at once, why not? expected_messages = [ { "loc": ["query", "lat"], "msg": "ensure this value is greater than or equal to -90", "type": "value_error.number.not_ge", "ctx": { "limit_value": -90 }, }, { "loc": ["query", "lng"], "msg": "ensure this value is greater than or equal to -180", "type": "value_error.number.not_ge", "ctx": { "limit_value": -180 }, }, { "loc": ["query", "z"], "msg": "ensure this value is greater than or equal to 0", "type": "value_error.number.not_ge", "ctx": { "limit_value": 0 }, }, ] resp = await async_client.get( f"/dataset/{dataset}/{version}/features?lat=-360&lng=-360&z=-1") print(resp.json()) assert resp.status_code == 422 assert resp.json()["status"] == "failed" assert set([ json.dumps(msg, sort_keys=True) for msg in resp.json()["message"] ]) == set(json.dumps(msg, sort_keys=True) for msg in expected_messages)
async def test_dataset_version_geostore(async_client, batch_client): _, logs = batch_client ############################ # Setup test ############################ dataset = "test" source = GEOJSON_NAME version = "v1.1.1" input_data = { "creation_options": { "source_type": "vector", "source_uri": [f"s3://{BUCKET}/{source}"], "source_driver": "GeoJSON", "create_dynamic_vector_tile_cache": True, }, "metadata": {}, } _ = await create_default_asset( dataset, version, version_payload=input_data, async_client=async_client, logs=logs, execute_batch_jobs=True, skip_dataset=False, ) # There should be a table called "test"."v1.1.1" with one row async with ContextEngine("READ"): count = await db.scalar( db.text(f'SELECT count(*) FROM {dataset}."{version}"')) assert count == 1 ############################ # Test geostore endpoints ############################ # This is the hash obtained by POSTing the sample GeoJSON with Postman sample_geojson_hash = "41b67a74-4ea2-df3f-c3f3-d7131a645f9a" # The geometry should be accessible via the geostore table async with ContextEngine("READ"): rows: List[Geostore] = await Geostore.query.gino.all() assert len(rows) == 1 assert rows[0].gfw_geostore_id == UUID(sample_geojson_hash) # The geostore should be accessible with its hash via the geostore endpoint resp = await async_client.get(f"/geostore/{sample_geojson_hash}") # Validate response structure GeostoreResponse.parse_raw(resp.text) # ...and via the dataset + version-specific endpoint resp_by_version = await async_client.get( f"/dataset/{dataset}/{version}/geostore/{sample_geojson_hash}") # Validate response structure GeostoreResponse.parse_raw(resp_by_version.text) # If we POST a user area there should then be two geostore records # The new one should not be findable via the dataset.version # endpoint. Let's test that. payload = { "geometry": { "type": "MultiPolygon", "coordinates": [[[[8, 51], [11, 55], [12, 49], [8, 51]]]], } } # This is the gfw_geostore_id returned when POSTing the payload with Postman second_sample_geojson_hash = "b44a9213-4fc2-14e6-02e3-96faf0d89499" # Create the new geostore record post_resp = await async_client.post("/geostore", json=payload) assert post_resp.status_code == 201 assert post_resp.json( )["data"]["gfw_geostore_id"] == second_sample_geojson_hash # The second geometry should be accessible via the geostore table async with ContextEngine("READ"): rows: List[Geostore] = await Geostore.query.gino.all() assert len(rows) == 2 # ... but it should not be visible in the dataset.version child table get_resp = await async_client.get( f"/dataset/{dataset}/{version}/geostore/{second_sample_geojson_hash}") assert get_resp.status_code == 404 assert get_resp.json() == { "status": "failed", "message": f'Area with gfw_geostore_id {second_sample_geojson_hash} does not exist in "{dataset}"."{version}"', }
async def test_create_api_key_wrong_type(user_id, alias, organization, email, domains): async with ContextEngine("WRITE"): with pytest.raises(asyncpg.exceptions.DataError): await create_api_key( user_id, alias, organization, email, domains, never_expires=False )
async def test_assets_metadata(): """Testing all CRUD operations on dataset in one go.""" dataset = "test" version = "v1.1.1" dataset_metadata = {"title": "Title", "subtitle": "Subtitle"} version_metadata = {"subtitle": "New Subtitle", "version_number": version} asset_metadata = { "title": "New Title", } # Add a dataset async with ContextEngine("WRITE"): await create_dataset(dataset, metadata=dataset_metadata) await create_version(dataset, version, metadata=version_metadata) new_asset = await create_asset( dataset, version, asset_type="Database table", asset_uri="s3://path/to/file", metadata=asset_metadata, ) result_metadata = { "title": "New Title", "subtitle": "New Subtitle", "version_number": version, } asset_id = new_asset.asset_id assert new_asset.metadata == result_metadata async with ContextEngine("READ"): asset = await get_asset(asset_id) assert asset.metadata == result_metadata async with ContextEngine("READ"): assets = await get_assets(dataset, version) assert assets[0].metadata == result_metadata async with ContextEngine("READ"): assets = await get_assets_by_type("Database table") assert assets[0].metadata == result_metadata async with ContextEngine("READ"): assets = await get_all_assets() assert assets[0].metadata == result_metadata result_metadata = { "title": "New Title", "subtitle": "New Subtitle", "source": "Source", "version_number": version, } async with ContextEngine("WRITE"): asset = await update_asset(asset_id, metadata={"source": "Source"}) assert asset.metadata == result_metadata async with ContextEngine("WRITE"): asset = await delete_asset(asset_id) assert asset.metadata == result_metadata
async def test_asset_extent_stats_empty(async_client): dataset = "test_asset_extent_stats_empty" version = "v1.0.0" pixetl_output_files_prefix = ( f"{dataset}/{version}/raster/epsg-4326/90/27008/percent/") delete_s3_files(DATA_LAKE_BUCKET, pixetl_output_files_prefix) raster_version_payload = { "creation_options": { "source_type": "raster", "source_uri": [ f"s3://{DATA_LAKE_BUCKET}/{FAKE_INT_DATA_PARAMS['prefix']}/tiles.geojson" ], "source_driver": "GeoTIFF", "data_type": FAKE_INT_DATA_PARAMS["dtype_name"], "no_data": FAKE_INT_DATA_PARAMS["no_data"], "pixel_meaning": "percent", "grid": "90/27008", "resampling": "nearest", "overwrite": True, "compute_histogram": False, "compute_stats": False, }, } await create_default_asset( dataset, version, version_payload=raster_version_payload, async_client=async_client, execute_batch_jobs=True, ) resp = await async_client.get(f"/dataset/{dataset}/{version}/assets") asset_id = resp.json()["data"][0]["asset_id"] # # Update the extent fields of the asset to be None to simulate # # older assets in the DB async with ContextEngine("WRITE"): _ = await update_asset(asset_id, extent=None) # Verify that hitting the stats and extent endpoint for such assets # yields data=None rather than a 500 resp = await async_client.get(f"/asset/{asset_id}/extent") assert resp.status_code == 200 assert resp.json()["data"] is None resp = await async_client.get(f"/dataset/{dataset}/{version}/extent") assert resp.status_code == 200 assert resp.json()["data"] is None resp = await async_client.get(f"/asset/{asset_id}/stats") assert resp.status_code == 200 assert resp.json()["data"] is None resp = await async_client.get(f"/dataset/{dataset}/{version}/stats") assert resp.status_code == 200 assert resp.json()["data"] is None
async def test_versions(): """Testing all CRUD operations on dataset in one go.""" dataset_name = "test" version_name = "v1.1.1" # Add a dataset async with ContextEngine("WRITE"): new_row = await create_dataset(dataset_name) assert new_row.dataset == dataset_name # There should be no versions for new datasets rows = await get_versions(dataset_name) assert isinstance(rows, list) assert len(rows) == 0 # Writing to DB using context engine with "READ" shouldn't work async with ContextEngine("READ"): result = "" try: await create_version(dataset_name, version_name) except asyncpg.exceptions.InsufficientPrivilegeError as e: result = str(e) assert result == "permission denied for table versions" # Using context engine with "PUT" should work async with ContextEngine("WRITE"): new_row = await create_version(dataset_name, version_name, source_type="table") assert new_row.dataset == dataset_name assert new_row.version == version_name assert new_row.is_latest is False assert new_row.is_mutable is False assert new_row.source_type == "table" assert new_row.source_uri == [] assert new_row.status == "pending" assert new_row.has_geostore is False assert new_row.metadata == {} assert new_row.change_log == [] # This shouldn't work a second time async with ContextEngine("WRITE"): result = "" try: await create_version(dataset_name, version_name, source_type="table") except RecordAlreadyExistsError as e: result = str(e) assert ( result == f"Version with name {dataset_name}.{version_name} already exists" ) # There should be an entry now rows = await get_versions(dataset_name) assert isinstance(rows, list) assert len(rows) == 1 assert rows[0].dataset == dataset_name assert rows[0].version == version_name # Version names should only have a single column names = await get_version_names(dataset_name) assert isinstance(names, list) assert len(names) == 1 assert names[0].version == version_name result = "" try: _ = names[0].dataset except AttributeError as e: result = str(e) assert result == "Could not locate column in row for column 'dataset'" # It should be possible to access the dataset by dataset name row = await get_version(dataset_name, version_name) assert row.dataset == dataset_name assert row.version == version_name # But only if the dataset exists result = "" try: await get_version("test2", version_name) except RecordNotFoundError as e: result = str(e) assert result == f"Version with name test2.{version_name} does not exist" # It should be possible to update a dataset using a context engine metadata = VersionMetadata(title="Test Title", tags=["tag1", "tag2"]) logs = ChangeLog(date_time=datetime.now(), status="pending", message="all good") async with ContextEngine("WRITE"): row = await update_version( dataset_name, version_name, metadata=metadata.dict(by_alias=True), change_log=[logs.dict(by_alias=True)], ) assert row.metadata["title"] == "Test Title" assert row.metadata["tags"] == ["tag1", "tag2"] assert row.change_log[0]["date_time"] == json.loads(logs.json())["date_time"] assert row.change_log[0]["status"] == logs.dict(by_alias=True)["status"] assert row.change_log[0]["message"] == logs.dict(by_alias=True)["message"] # When deleting a dataset, method should return the deleted object async with ContextEngine("WRITE"): row = await delete_version(dataset_name, version_name) assert row.dataset == dataset_name assert row.version == version_name # After deleting the dataset, there should be an empty DB rows = await get_versions(dataset_name) assert isinstance(rows, list) assert len(rows) == 0
async def test_get_api_key_bad_type(api_key): async with ContextEngine("READ"): with pytest.raises(asyncpg.exceptions.DataError): await get_api_key(api_key)
async def test_get_api_key_bad_key(api_key): async with ContextEngine("READ"): with pytest.raises(RecordNotFoundError): await get_api_key(api_key)
async def delete_api_keys(): yield async with ContextEngine("WRITE"): await ORMApiKey.delete.gino.status()
async def test_dataset(): """Testing all CRUD operations on dataset in one go.""" # There should be an empty DB rows = await get_datasets() assert isinstance(rows, list) assert len(rows) == 0 # Writing to DB using context engine with "READ" shouldn't work async with ContextEngine("READ"): result = "" try: await create_dataset("test") except asyncpg.exceptions.InsufficientPrivilegeError as e: result = str(e) assert result == "permission denied for table datasets" # Using context engine with "PUT" should work async with ContextEngine("WRITE"): new_row = await create_dataset("test") assert new_row.dataset == "test" # This shouldn't work a second time async with ContextEngine("WRITE"): result = "" try: await create_dataset("test") except RecordAlreadyExistsError as e: result = str(e) assert result == "Dataset with name test already exists" # Trying to write without context shouldn't work result = "" try: await create_dataset("test2") except asyncpg.exceptions.InsufficientPrivilegeError as e: result = str(e) assert result == "permission denied for table datasets" # There should be an entry now rows = await get_datasets() assert isinstance(rows, list) assert len(rows) == 1 assert rows[0].dataset == "test" # It should be possible to access the dataset by dataset name row = await get_dataset("test") assert row.dataset == "test" assert row.metadata == {} # But only if the dataset exists result = "" try: await get_dataset("test2") except RecordNotFoundError as e: result = str(e) assert result == "Dataset with name test2 does not exist" # It should be possible to update a dataset using a context engine metadata = DatasetMetadata(title="Test Title", tags=["tag1", "tag2"]) data = DatasetUpdateIn(metadata=metadata) async with ContextEngine("WRITE"): row = await update_dataset("test", **data.dict(exclude_unset=True)) assert row.metadata["title"] == "Test Title" assert row.metadata["tags"] == ["tag1", "tag2"] # When deleting a dataset, method should return the deleted object async with ContextEngine("WRITE"): row = await delete_dataset("test") assert row.dataset == "test" # After deleting the dataset, there should be an empty DB rows = await get_datasets() assert isinstance(rows, list) assert len(rows) == 0