async def no_symbology( dataset: str, version: str, pixel_meaning: str, source_asset_co: RasterTileSetSourceCreationOptions, zoom_level: int, max_zoom: int, jobs_dict: Dict, ) -> Tuple[List[Job], str]: """Skip symbology step.""" if source_asset_co.source_uri: wm_source_uri: str = tile_uri_to_tiles_geojson( get_asset_uri( dataset, version, AssetType.raster_tile_set, source_asset_co.copy(deep=True, update={ "grid": f"zoom_{zoom_level}" }).dict(by_alias=True), "epsg:3857", )) return list(), wm_source_uri else: raise RuntimeError("No source URI set.")
def get_zoom_source_uri( dataset: str, version: str, creation_options: RasterTileSetSourceCreationOptions, zoom_level: int, max_zoom: int, ) -> Optional[List[str]]: """Return URI specified in creation options for highest zoom level, otherwise return URI of same tileset but one zoom level up.""" alternate_source_uri = [ get_asset_uri( dataset, version, AssetType.raster_tile_set, { "grid": f"zoom_{zoom_level + 1}", "pixel_meaning": creation_options.pixel_meaning, }, "epsg:3857", ) ] source_uri = (creation_options.source_uri if zoom_level == max_zoom else alternate_source_uri) return ([tile_uri_to_tiles_geojson(uri) for uri in source_uri] if source_uri else None)
async def static_vector_1x1_asset( dataset: str, version: str, asset_id: UUID, input_data: Dict[str, Any], ) -> ChangeLog: """Create Vector tile cache and NDJSON file as intermediate data.""" ####################### # Update asset metadata ####################### creation_options = creation_option_factory(AssetType.grid_1x1, input_data["creation_options"]) field_attributes: List[Dict[str, Any]] = await get_field_attributes( dataset, version, creation_options) grid_1x1_uri = get_asset_uri(dataset, version, AssetType.grid_1x1) await assets.update_asset( asset_id, fields=field_attributes, ) ############################ # Define jobs ############################ # Create table schema command: List[str] = [ "export_1x1_grid.sh", "-d", dataset, "-v", version, "-C", ",".join([field["field_name"] for field in field_attributes]), "-T", grid_1x1_uri, ] export_1x1_grid = PostgresqlClientJob( dataset=dataset, job_name="export_1x1_grid", job_queue=DATA_LAKE_JOB_QUEUE, command=command, memory=9000, environment=reader_secrets, callback=callback_constructor(asset_id), ) ####################### # execute jobs ####################### log: ChangeLog = await execute([export_1x1_grid]) return log
async def create_gdaldem_job( dataset: str, version: str, co: PixETLCreationOptions, job_name: str, callback: Callback, parents: Optional[List[Job]] = None, ): """Create a Batch job that applies a colormap to a raster tile set using gdaldem.""" symbology = json.dumps(jsonable_encoder(co.symbology)) no_data = json.dumps(co.no_data) # Possibly not after https://github.com/wri/gfw-data-api/pull/153 ? assert isinstance(co.source_uri, List) and len(co.source_uri) == 1 source_asset_uri = co.source_uri[0] target_asset_uri = tile_uri_to_tiles_geojson( get_asset_uri( dataset, version, AssetType.raster_tile_set, co.dict(by_alias=True), "epsg:3857", ) ) target_prefix = posixpath.dirname(split_s3_path(target_asset_uri)[1]) command = [ "apply_colormap.sh", "-d", dataset, "-v", version, "-j", symbology, "-n", no_data, "-s", source_asset_uri, "-T", target_prefix, ] kwargs = dict() if co.timeout_sec is not None: kwargs["attempt_duration_seconds"] = co.timeout_sec return GDALDEMJob( dataset=dataset, job_name=job_name, command=command, environment=JOB_ENV, callback=callback, parents=[parent.job_name for parent in parents] if parents else None, **kwargs, )
async def create_wm_tile_set_job( dataset: str, version: str, creation_options: RasterTileSetSourceCreationOptions, job_name: str, parents: Optional[List[Job]] = None, use_resampler: bool = False, ) -> Tuple[Job, str]: asset_uri = get_asset_uri( dataset, version, AssetType.raster_tile_set, creation_options.dict(by_alias=True), "epsg:3857", ) # Create an asset record asset_options = AssetCreateIn( asset_type=AssetType.raster_tile_set, asset_uri=asset_uri, is_managed=True, creation_options=creation_options, metadata=RasterTileSetMetadata(), ).dict(by_alias=True) wm_asset_record = await create_asset(dataset, version, **asset_options) logger.debug(f"Created asset for {asset_uri}") # TODO: Consider removing the use_resampler argument and changing this # to "if creation_options.calc is None:" # Make sure to test different scenarios when done! if use_resampler: job = await create_resample_job( dataset, version, creation_options, int(creation_options.grid.strip("zoom_")), job_name, callback_constructor(wm_asset_record.asset_id), parents=parents, ) else: job = await create_pixetl_job( dataset, version, creation_options, job_name, callback_constructor(wm_asset_record.asset_id), parents=parents, ) zoom_level = int(creation_options.grid.strip("zoom_")) job = scale_batch_job(job, zoom_level) return job, asset_uri
async def get_extent(asset_id: UUID) -> Optional[Extent]: asset_row: ORMAsset = await get_asset(asset_id) asset_uri: str = get_asset_uri( asset_row.dataset, asset_row.version, asset_row.asset_type, asset_row.creation_options, srid=infer_srid_from_grid(asset_row.creation_options.get("grid")), ) bucket, key = split_s3_path(tile_uri_to_extent_geojson(asset_uri)) s3_client = get_s3_client() resp = s3_client.get_object(Bucket=bucket, Key=key) extent_geojson: Dict[str, Any] = json.loads(resp["Body"].read().decode("utf-8")) if extent_geojson: return Extent(**extent_geojson) return None
async def _get_raster_stats(asset_id: UUID) -> RasterStats: asset_row: ORMAsset = await get_asset(asset_id) asset_uri: str = get_asset_uri( asset_row.dataset, asset_row.version, asset_row.asset_type, asset_row.creation_options, srid=infer_srid_from_grid(asset_row.creation_options.get("grid")), ) bucket, tiles_key = split_s3_path(tile_uri_to_tiles_geojson(asset_uri)) s3_client = get_s3_client() tiles_resp = s3_client.get_object(Bucket=bucket, Key=tiles_key) tiles_geojson: Dict[str, Any] = json.loads( tiles_resp["Body"].read().decode("utf-8")) bandstats: List[BandStats] = _collect_bandstats( FeatureCollection(**tiles_geojson)) return RasterStats(bands=bandstats)
async def create_resample_job( dataset: str, version: str, co: PixETLCreationOptions, zoom_level: int, job_name: str, callback: Callback, parents: Optional[List[Job]] = None, ): """Create a Batch job to process rasters using the GDAL CLI utilities rather than pixetl. Suitable only for resampling from (EPSG:4326 or EPSG:3857) to EPSG:3857 with no calc string. """ assert isinstance(co.source_uri, List) and len(co.source_uri) == 1 source_asset_uri = co.source_uri[0] if co.calc is not None: raise ValueError( "Attempting to run the resample script with a calc string specified!" ) target_asset_uri = tile_uri_to_tiles_geojson( get_asset_uri( dataset, version, AssetType.raster_tile_set, co.dict(by_alias=True), "epsg:3857", ) ) # We want to wind up with "dataset/version/raster/projection/zoom_level/implementation" target_prefix = posixpath.dirname( posixpath.dirname(split_s3_path(target_asset_uri)[1]) ) # The GDAL utilities use "near" whereas rasterio/pixetl use "nearest" resampling_method = ( "near" if co.resampling == ResamplingMethod.nearest else co.resampling ) command = [ "resample.sh", "-d", dataset, "-v", version, "-s", source_asset_uri, "-r", f"{resampling_method}", "--zoom_level", f"{zoom_level}", "-T", target_prefix, ] kwargs = dict() if co.timeout_sec is not None: kwargs["attempt_duration_seconds"] = co.timeout_sec return PixETLJob( dataset=dataset, job_name=job_name, command=command, environment=JOB_ENV, callback=callback, parents=[parent.job_name for parent in parents] if parents else None, **kwargs, )
async def raster_tile_cache_asset( dataset: str, version: str, asset_id: UUID, input_data: Dict[str, Any], ) -> ChangeLog: """Generate Raster Tile Cache Assets.""" # TODO: Refactor to be easier to test min_zoom = input_data["creation_options"]["min_zoom"] max_zoom = input_data["creation_options"]["max_zoom"] max_static_zoom = input_data["creation_options"]["max_static_zoom"] implementation = input_data["creation_options"]["implementation"] symbology = input_data["creation_options"]["symbology"] resampling = input_data["creation_options"]["resampling"] # source_asset_id is currently required. Could perhaps make it optional # in the case that the default asset is the only one. source_asset: ORMAsset = await get_asset( input_data["creation_options"]["source_asset_id"] ) # Get the creation options from the original raster tile set asset and # overwrite settings. Make sure source_type and source_driver are set in # case it is an auxiliary asset new_source_uri = [ tile_uri_to_tiles_geojson( get_asset_uri( dataset, version, AssetType.raster_tile_set, source_asset.creation_options, ) ).replace("/geotiff", "/gdal-geotiff") ] # The first thing we do for each zoom level is reproject the source asset # to web-mercator. We don't want the calc string (if any) used to # create the source asset to be applied again to the already transformed # data, so set it to None. source_asset_co = RasterTileSetSourceCreationOptions( # TODO: With python 3.9, we can use the `|` operator here # waiting for https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker/pull/67 **{ **source_asset.creation_options, **{ "source_type": RasterSourceType.raster, "source_driver": RasterDrivers.geotiff, "source_uri": new_source_uri, "calc": None, "resampling": resampling, "compute_stats": False, "compute_histogram": False, "symbology": Symbology(**symbology), "subset": None, }, } ) # If float data type, convert to int in derivative assets for performance # FIXME: Make this work for multi-band inputs max_zoom_calc = None if source_asset_co.data_type == DataType.boolean: pass # So the next line doesn't break elif np.issubdtype(np.dtype(source_asset_co.data_type), np.floating): logger.info("Source datatype is float subtype, converting to int") source_asset_co, max_zoom_calc = convert_float_to_int( source_asset.stats, source_asset_co ) assert source_asset_co.symbology is not None symbology_function = symbology_constructor[source_asset_co.symbology.type].function # We want to make sure that the final RGB asset is named after the # implementation of the tile cache and that the source_asset name is not # already used by another intermediate asset. # TODO: Actually make sure the intermediate assets aren't going to # overwrite any existing assets if symbology_function == no_symbology: source_asset_co.pixel_meaning = implementation else: source_asset_co.pixel_meaning = ( f"{source_asset_co.pixel_meaning}_{implementation}" ) job_list: List[Job] = [] jobs_dict: Dict[int, Dict[str, Job]] = dict() for zoom_level in range(max_zoom, min_zoom - 1, -1): jobs_dict[zoom_level] = dict() if zoom_level == max_zoom: source_reprojection_parent_jobs: List[Job] = [] else: source_reprojection_parent_jobs = [ jobs_dict[zoom_level + 1]["source_reprojection_job"] ] ( source_reprojection_job, source_reprojection_uri, ) = await reproject_to_web_mercator( dataset, version, source_asset_co, zoom_level, max_zoom, source_reprojection_parent_jobs, max_zoom_resampling=PIXETL_DEFAULT_RESAMPLING, max_zoom_calc=max_zoom_calc, use_resampler=max_zoom_calc is None, ) jobs_dict[zoom_level]["source_reprojection_job"] = source_reprojection_job job_list.append(source_reprojection_job) symbology_jobs: List[Job] symbology_uri: str symbology_co = source_asset_co.copy(deep=True) symbology_jobs, symbology_uri = await symbology_function( dataset, version, implementation, symbology_co, zoom_level, max_zoom, jobs_dict, ) job_list += symbology_jobs bit_depth: int = symbology_constructor[source_asset_co.symbology.type].bit_depth if zoom_level <= max_static_zoom: tile_cache_job: Job = await create_tile_cache( dataset, version, symbology_uri, zoom_level, implementation, callback_constructor(asset_id), [*symbology_jobs, source_reprojection_job], bit_depth, ) job_list.append(tile_cache_job) log: ChangeLog = await execute(job_list) return log
async def _merge_assets( dataset: str, version: str, pixel_meaning: str, asset1_uri: str, asset2_uri: str, zoom_level: int, parents: List[Job], calc_str: str = "np.ma.array([A, B, C, D])", band_count: int = 4, ) -> Tuple[List[Job], str]: """Create RGBA-encoded raster tile set from two source assets, potentially using a custom merge function (the default works for 3+1 band sources, such as RGB + Intensity as Alpha)""" encoded_co = RasterTileSetSourceCreationOptions( pixel_meaning=pixel_meaning, data_type=DataType.uint8, # FIXME: Revisit for 16-bit assets band_count=band_count, no_data=None, resampling=ResamplingMethod.nearest, grid=Grid(f"zoom_{zoom_level}"), compute_stats=False, compute_histogram=False, source_type=RasterSourceType.raster, source_driver=RasterDrivers.geotiff, source_uri=[asset1_uri, asset2_uri], calc=calc_str, photometric=PhotometricType.rgb, ) asset_uri = get_asset_uri( dataset, version, AssetType.raster_tile_set, encoded_co.dict(by_alias=True), "epsg:3857", ) logger.debug( f"ATTEMPTING TO CREATE MERGED ASSET WITH THESE CREATION OPTIONS: {encoded_co}" ) # Create an asset record asset_options = AssetCreateIn( asset_type=AssetType.raster_tile_set, asset_uri=asset_uri, is_managed=True, creation_options=encoded_co, metadata=RasterTileSetMetadata(), ).dict(by_alias=True) asset = await create_asset(dataset, version, **asset_options) logger.debug( f"ZOOM LEVEL {zoom_level} MERGED ASSET CREATED WITH ASSET_ID {asset.asset_id}" ) callback = callback_constructor(asset.asset_id) pixetl_job = await create_pixetl_job( dataset, version, encoded_co, job_name=f"merge_assets_zoom_{zoom_level}", callback=callback, parents=parents, ) pixetl_job = scale_batch_job(pixetl_job, zoom_level) return ( [pixetl_job], tile_uri_to_tiles_geojson(asset_uri), )
async def _create_colormapped_asset( dataset: str, version: str, pixel_meaning: str, source_asset_co: RasterTileSetSourceCreationOptions, zoom_level: int, jobs_dict: Dict, ) -> Tuple[List[Job], str]: wm_source_co = source_asset_co.copy(deep=True, update={"grid": f"zoom_{zoom_level}"}) wm_source_uri: str = tile_uri_to_tiles_geojson( get_asset_uri( dataset, version, AssetType.raster_tile_set, wm_source_co.dict(by_alias=True), "epsg:3857", )) colormap_co = wm_source_co.copy( deep=True, update={ "source_uri": [wm_source_uri], "calc": None, "resampling": PIXETL_DEFAULT_RESAMPLING, "pixel_meaning": pixel_meaning, }, ) colormap_asset_uri = get_asset_uri( dataset, version, AssetType.raster_tile_set, colormap_co.dict(by_alias=True), "epsg:3857", ) # Create an asset record colormap_asset_model = AssetCreateIn( asset_type=AssetType.raster_tile_set, asset_uri=colormap_asset_uri, is_managed=True, creation_options=colormap_co, ).dict(by_alias=True) colormap_asset_record = await create_asset(dataset, version, **colormap_asset_model) logger.debug(f"Created asset record for {colormap_asset_uri} " f"with creation options: {colormap_co}") parents = [jobs_dict[zoom_level]["source_reprojection_job"]] job_name = sanitize_batch_job_name( f"{dataset}_{version}_{pixel_meaning}_{zoom_level}") # Apply the colormap gdaldem_job = await create_gdaldem_job( dataset, version, colormap_co, job_name, callback_constructor(colormap_asset_record.asset_id), parents=parents, ) gdaldem_job = scale_batch_job(gdaldem_job, zoom_level) return [gdaldem_job], colormap_asset_uri
async def year_intensity_symbology( dataset: str, version: str, pixel_meaning: str, source_asset_co: RasterTileSetSourceCreationOptions, zoom_level: int, max_zoom: int, jobs_dict: Dict, ) -> Tuple[List[Job], str]: """Create Raster Tile Set asset which combines year raster and intensity raster into one. At native resolution (max_zoom) it will create intensity raster based on given source. For lower zoom levels it will resample higher zoom level tiles using average resampling method. Once intensity raster tile set is created it will combine it with source (year) raster into an RGB-encoded raster. This symbology is used for the Tree Cover Loss dataset. """ intensity_calc_string = "(A > 0) * 255" intensity_jobs, intensity_uri = await _create_intensity_asset( dataset, version, pixel_meaning, source_asset_co, zoom_level, max_zoom, jobs_dict, intensity_calc_string, ResamplingMethod.average, ) # The resulting raster channels are as follows: # 1. Intensity # 2. All zeros # 3. Year # 4. Alpha (which is set to 255 everywhere intensity is >0) merge_calc_string = "np.ma.array([B, np.ma.zeros(A.shape, dtype='uint8'), A, (B > 0) * 255], fill_value=0).astype('uint8')" wm_source_uri: str = get_asset_uri( dataset, version, AssetType.raster_tile_set, source_asset_co.copy(deep=True, update={ "grid": f"zoom_{zoom_level}" }).dict(by_alias=True), "epsg:3857", ) # We also need to depend on the original source reprojection job source_job = jobs_dict[zoom_level]["source_reprojection_job"] merge_jobs, final_asset_uri = await _merge_assets( dataset, version, pixel_meaning, tile_uri_to_tiles_geojson(wm_source_uri), tile_uri_to_tiles_geojson(intensity_uri), zoom_level, [*intensity_jobs, source_job], merge_calc_string, 4, ) return [*intensity_jobs, *merge_jobs], final_asset_uri
async def date_conf_intensity_multi_8_symbology( dataset: str, version: str, pixel_meaning: str, source_asset_co: RasterTileSetSourceCreationOptions, zoom_level: int, max_zoom: int, jobs_dict: Dict, ) -> Tuple[List[Job], str]: """Create a Raster Tile Set asset which combines the earliest detected alerts of three date_conf bands/alert systems (new encoding) with a new derived intensity asset, and the confidences of each of the original alerts. At native resolution (max_zoom) it an "intensity" asset which contains the value 55 everywhere there is data in any of the source bands. For lower zoom levels it resamples the previous zoom level intensity asset using the bilinear resampling method, causing isolated pixels to "fade". Finally the merge function takes the alert with the minimum date of the three bands and encodes its date, confidence, and the intensities into three 8-bit bands according to the formula the front end expects, and also adds a fourth band which encodes the confidences of all three original alert systems. """ # What we want is a value of 55 (max intensity for this scenario) # anywhere there is an alert in any system. intensity_max_calc_string = ( f"np.ma.array((A.data > 0) * {MAX_8_BIT_INTENSITY}, mask=False)") intensity_co = source_asset_co.copy( deep=True, update={ "calc": None, "band_count": 1, "data_type": DataType.uint8, }, ) intensity_jobs, intensity_uri = await _create_intensity_asset( dataset, version, pixel_meaning, intensity_co, zoom_level, max_zoom, jobs_dict, intensity_max_calc_string, ResamplingMethod.bilinear, ) wm_date_conf_uri: str = get_asset_uri( dataset, version, AssetType.raster_tile_set, source_asset_co.copy(deep=True, update={ "grid": f"zoom_{zoom_level}" }).dict(by_alias=True), "epsg:3857", ) merge_calc_string: str = integrated_alerts_merge_calc() # We also need to depend on the original source reprojection job source_job = jobs_dict[zoom_level]["source_reprojection_job"] merge_jobs, final_asset_uri = await _merge_assets( dataset, version, pixel_meaning, tile_uri_to_tiles_geojson(wm_date_conf_uri), tile_uri_to_tiles_geojson(intensity_uri), zoom_level, [*intensity_jobs, source_job], merge_calc_string, ) return [*intensity_jobs, *merge_jobs], final_asset_uri
async def date_conf_intensity_symbology( dataset: str, version: str, pixel_meaning: str, source_asset_co: RasterTileSetSourceCreationOptions, zoom_level: int, max_zoom: int, jobs_dict: Dict, ) -> Tuple[List[Job], str]: """Create a Raster Tile Set asset which is the combination of a date_conf asset and a new derived intensity asset. At native resolution (max_zoom) it creates an "intensity" asset which contains the value 55 everywhere there is data in the source (date_conf) raster. For lower zoom levels it resamples the higher zoom level intensity tiles using the "bilinear" resampling method. Finally the merge function combines the date_conf and intensity assets into a three band RGB-encoded asset suitable for converting to PNGs with gdal2tiles in the final stage of raster_tile_cache_asset """ intensity_co = source_asset_co.copy(deep=True, update={ "calc": None, "band_count": 1, "data_type": DataType.uint8 }) intensity_max_calc_string = f"(A > 0) * {MAX_8_BIT_INTENSITY}" intensity_jobs, intensity_uri = await _create_intensity_asset( dataset, version, pixel_meaning, intensity_co, zoom_level, max_zoom, jobs_dict, intensity_max_calc_string, ResamplingMethod.bilinear, ) wm_date_conf_uri: str = get_asset_uri( dataset, version, AssetType.raster_tile_set, source_asset_co.copy(deep=True, update={ "grid": f"zoom_{zoom_level}" }).dict(by_alias=True), "epsg:3857", ) merge_calc_string: str = date_conf_merge_calc() # We also need to depend on the original source reprojection job source_job = jobs_dict[zoom_level]["source_reprojection_job"] merge_jobs, final_asset_uri = await _merge_assets( dataset, version, pixel_meaning, tile_uri_to_tiles_geojson(wm_date_conf_uri), tile_uri_to_tiles_geojson(intensity_uri), zoom_level, [*intensity_jobs, source_job], merge_calc_string, 3, ) return [*intensity_jobs, *merge_jobs], final_asset_uri