def export_daily_surface_water( region, target_date, harmonic_coefs=None, harmonic_collection=None, feature_names=None, label=None, look_back=30, lag=4, output_confidence=False, output_flood = False, fusion_samples=None, fusion_model_asset=None, output_asset_path=None, output_bucket_path=None, initial_threshold=0.1, tile=False, tile_buffer=100000 ): def get_weights(i): i = ee.Number(i) t_diff = ( ee.Number(i).multiply(-1).subtract(lag) ) # calc how many days to adjust ini date new_date = target_date.advance(t_diff, "day") # calculate new date corr_img = ( ds.collection.select(label) .filterDate(new_date, new_date.advance(1, "day")) .qualityMosaic(label) ) time_img = timeseries.get_dummy_img(new_date) harmon_pred = ( timeseries.add_harmonic_coefs(time_img) .multiply(harmonic_coefs) .reduce("sum") ) harmon_diff = harmon_pred.subtract(corr_img).rename("residual") return harmon_diff.set("system:time_start", new_date.millis()) def calc_confidence(i): i = ee.Number(i) # uniform sampling of std dev at 95% confidence interval long_term_seed = i.add(500) short_term_seed = i.add(1000) long_term_random = ( ee.Image.random(long_term_seed).multiply(3.92).subtract(1.96) ) short_term_random = ( ee.Image.random(short_term_seed).multiply(3.92).subtract(1.96) ) lin_sim = lin_pred.add(short_term_random.multiply(linCi)) har_sim = har_pred.add(long_term_random.multiply(harCi)) sim_pred = har_sim.subtract(lin_sim) # random_water = thresholding.bmax_otsu(random_combination,invert=True) # naive estimate of water (>0) return sim_pred.gt(ci_threshold).uint8() if tile: if tile: land_area = ( ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017") .filterBounds(region) .geometry(100) .buffer(2500, maxError=100) ) grid = geeutils.tile_region(region, intersect_geom=land_area, grid_size=1.0) n = grid.size().getInfo() grid_list = grid.toList(n) for i in range(n): if output_asset_path is not None: output_asset_tile = output_asset_path + f"daily_tile{i:05d}" else: output_asset_tile = None if output_bucket_path is not None: output_bucket_tile = output_bucket_path + f"_tile{i:05d}" else: output_bucket_tile = None grid_tile = ee.Feature(grid_list.get(i)).geometry() export_daily_surface_water( grid_tile, target_date, harmonic_coefs, harmonic_collection, feature_names, label, look_back, lag, output_confidence, output_flood, fusion_samples, fusion_model_asset, output_asset_tile, output_bucket_tile, initial_threshold, tile=False, tile_buffer=tile_buffer ) else: end_time = ee.Date(target_date).advance(-(lag - 1), "day") start_time = end_time.advance(-look_back, "day") if fusion_samples is not None: fusion_model, scaling_dict = ml.random_forest_ee( 25, fusion_samples, feature_names, label, mode="regression" ) elif fusion_model_asset is not None: raise NotImplementedError() else: raise ValueError( "Either 'fusion_samples' or 'fusion_model_path' needs to be defined to run fusion process" ) if not isinstance(target_date, ee.Date): target_date = ee.Date(target_date) now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") time_str = now.strftime("%Y-%m-%d %H:%M:%s") if harmonic_coefs is not None: harmonic_coefs = ee.Image(harmonic_coefs) harmonic_coefs = harmonic_coefs.multiply( ee.Image(ee.Number(harmonic_coefs.get("scale_factor"))) ) elif harmonic_collection is not None: harmonic_collection = ee.ImageCollection(harmonic_collection) first = ee.Image(harmonic_collection.first()) harmonic_coefs = harmonic_collection.mosaic().multiply( ee.Image(ee.Number(first.get("scale_factor"))) ) else: raise ValueError( "Either 'harmonic_coefs' or 'harmonic_collection' needs to be defined to run fusion process" ) if output_confidence: harmonic_err = harmonic_coefs.select(".*(x|y|n)$") harmonic_coefs = harmonic_coefs.select("^(c|t|s).*") else: harmonic_coefs = harmonic_coefs.select("^(c|t|s).*") prod_region = region.buffer(tile_buffer,100) ds, label = _fuse_dataset( prod_region, start_time, end_time, fusion_model, scaling_dict, target_band=label, use_viirs=True, use_modis=False, ) dummy_target = timeseries.get_dummy_img(target_date) weights = ee.ImageCollection.fromImages( ee.List.sequence(0, look_back - 1).map(get_weights) ).sort("system:time_start") weights_lr = timeseries.fit_linear_trend( weights, dependent="residual", output_err=output_confidence ) weights_coefs = weights_lr.select("^(c|t).*") lin_pred = ( dummy_target.multiply(weights_coefs).reduce("sum").rename("residual_est") ) har_pred = ( timeseries.add_harmonic_coefs(dummy_target) .multiply(harmonic_coefs) .reduce("sum") ) fused_pred = ( (har_pred.subtract(lin_pred)) .convolve(ee.Kernel.gaussian(2.5)) .rename("fused_product") ) # water,threshold = thresholding.bmax_otsu( # fused_pred, # initial_threshold=0, # grid_size=0.2, # region=region, # invert=True, # reduction_scale=100, # return_threshold=True # ) ci_threshold = thresholding.edge_otsu( fused_pred, initial_threshold=initial_threshold, edge_buffer=300, region=prod_region, invert=True, reduction_scale=200, return_threshold=True, ) # ci_threshold = ee.Number(ee.Algorithms.If(ci_threshold.lt(-0.1),-0.1,ci_threshold)) permanent_water = ( ee.ImageCollection("JRC/GSW1_2/YearlyHistory") .limit(5,"system:time_start",False) .map(lambda x: x.select('waterClass').eq(3)) .sum().unmask(0).gt(0) ) water = fused_pred.gt(ci_threshold).Or(permanent_water).rename("water").uint8() if output_flood: flood = water.select("water").And(permanent_water.Not()).rename("flood") water = water.addBands(flood) if output_confidence: weights_err = weights_lr.select(".*(x|y|n)$") linCi = weights_err.expression( "mse * ((1/n) + ((t-xmean)**2/xr))**(1/2)", { "mse": weights_err.select("residual_y"), "n": weights_err.select("n"), "xmean": weights_err.select("mean_x"), "xr": weights_err.select("residual_x"), "t": dummy_target.select("time"), }, ) harCi = harmonic_err.expression( "mse * ((1/n) + ((t-xmean)**2/xr))**(1/2)", { "mse": harmonic_err.select("residual_y"), "n": harmonic_err.select("n"), "xmean": harmonic_err.select("mean_x"), "xr": harmonic_err.select("residual_x"), "t": dummy_target.select("time"), }, ) # ci_threshold = ee.Number(fused_pred.updateMask(water).reduceRegion( # reducer=ee.Reducer.min(), # geometry=region, # scale=100, # bestEffort=True, # maxPixels=1e6 # ).get('fused_product')) confidence = ( ee.ImageCollection.fromImages( ee.List.sequence(0, 99).map(calc_confidence) ) .reduce(ee.Reducer.mean(), 16) .multiply(100) .uint8() .rename("confidence") ) out_water = ee.Image.cat([confidence, water,]) else: out_water = water # water.uint8().rename("water"), fused_pred = fused_pred.multiply(10000).int16() if output_asset_path is not None: # create metadata dict metadata = ee.Dictionary( { "hf_version": hf.__version__, "system:time_start": target_date.millis(), "system:time_end": target_date.advance(86399, "seconds").millis(), "execution_time": time_str, "lag": lag, "look_back": look_back, } ) geeutils.export_image( out_water.set(metadata.combine({"product": "water"})), region, output_asset_path + "_water", description=f"hydrafloods_water_ee_export_{time_id}", scale=10, crs="EPSG:4326", ) geeutils.export_image( fused_pred.set(metadata.combine({"product": "fusion"})), region, output_asset_path + "_fusion", description=f"hydrafloods_fusion_ee_export_{time_id}", scale=10, crs="EPSG:4326", ) elif output_bucket_path is not None: export_region = region.bounds(maxError=100).getInfo()["coordinates"] bucket_path, ext = os.path.splitext(output_bucket_path) fcomponents = bucket_path.split("/") bucket = fcomponents[2] fpath = fcomponents[3:-1] f_water = "/".join(fpath + [fcomponents[-1] + "_water" + ext]) f_fusion = "/".join(fpath + [fcomponents[-1] + "_fusion" + ext]) water_task = ee.batch.Export.image.toCloudStorage( image=out_water, description=f"hydrafloods_water_gcp_export_{time_id}", bucket=bucket, fileNamePrefix=f_water, region=export_region, scale=10, crs="EPSG:4326", maxPixels=1e13, fileFormat="GeoTIFF", formatOptions={"cloudOptimized": True}, ) water_task.start() fusion_task = ee.batch.Export.image.toCloudStorage( image=fused_pred, description=f"hydrafloods_fusion_gcp_export_{time_id}", bucket=bucket, fileNamePrefix=f_fusion, region=export_region, scale=10, crs="EPSG:4326", maxPixels=1e13, fileFormat="GeoTIFF", formatOptions={"cloudOptimized": True}, ) fusion_task.start() else: raise ValueError( "Either 'output_asset_path' or 'output_bucket_path' needs to be defined to run fusion export process" ) return
def export_harmonics( region, start_time, end_time, feature_names=None, label=None, fusion_samples=None, fusion_model_asset=None, output_asset_path=None, output_bucket=None, tile=False, ): if tile: land_area = ( ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017") .filterBounds(region) .geometry(100) .buffer(2500, maxError=100) ) grid = geeutils.tile_region(region, intersect_geom=land_area, grid_size=1.0) n = grid.size().getInfo() grid_list = grid.toList(n) for i in range(n): grid_tile = ee.Feature(grid_list.get(i)).geometry() if output_asset_path is not None: output_tile_path = output_asset_path + f"harmonics_t{i}" export_harmonics( grid_tile, start_time, end_time, feature_names, label, fusion_samples, fusion_model_asset, output_tile_path, output_bucket, tile=False, ) else: if fusion_samples is not None: fusion_model, scaling_dict = ml.random_forest_ee( 25, fusion_samples, feature_names, label, mode="regression" ) elif fusion_model_asset is not None: raise NotImplementedError() else: raise ValueError( "Either 'fusion_samples' or 'fusion_model_path' needs to be defined to run fusion process" ) ds, label = _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict, target_band="mndwi", ) now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") time_str = now.strftime("%Y-%m-%d %H:%M:%s") scale_factor = 0.0001 # create metadata dict metadata = ee.Dictionary( { "hf_version": hf.__version__, "scale_factor": scale_factor, "fit_time_start": start_time, "fit_time_end": end_time, "execution_time": time_str, } ) harmonic_coefs = timeseries.fit_harmonic_trend( ds, dependent="mndwi", output_err=True ) harmonic_coefs = harmonic_coefs.divide(scale_factor).int32().set(metadata) if output_asset_path is not None: geeutils.export_image( harmonic_coefs, region, output_asset_path, description=f"hydrafloods_harmonic_coefficient_export_{time_id}", scale=10, crs="EPSG:4326", ) elif output_bucket is not None: raise NotImplementedError() else: raise ValueError( "Either 'output_asset_path' or 'output_bucket' needs to be defined to run fusion export process" ) return
def export_surface_water_harmonics( region, start_time, end_time, output_asset_path, n_cycles=2, feature_names=None, label=None, fusion_samples=None, tile=False, tile_size=1.0, output_scale=30, ): """Second step of the daily surface water fusion process. This procedure uses samples from `export_fusion_samples` to build a random forest model to predict a water index from SAR imagery. This a time series of optical-SAR fused data is used to calculate a harmonic model for long-term surface water trend and is exported to an Earth Engine asset. args: region (ee.Geometry): geographic region to look for coincident data and sample from start_time (str | datetime.datetime): start time used to look for coincident data end_time (str | datetime.datetime): end time used to look for coincident data output_asset_path (str): Earth Engine asset id to save harmonic model weights to as image. If tile==True, then output_asset_path much be a precreated ImageCollection asset n_cycles (int, optional): number of interannual cycles to model. default = 2 feature_names (list[str],): names of feature columns used to calculate `label` from label (str): name of feature column to predict using `feature_names` fusion_samples (str): Earth Engine FeatureCollection asset id of samples to get a data fusion model from. Should be the asset output from `export_fusion_samples` tile (bool, optional): boolean keyword to tile exports. If false will try to calculate harmonic weights as image. If true, it will tile area and recusively call to export smaller areas. If true then expects that `output_asset_path` is an ImageCollection. default = False tile_size (float, optional): resolution in decimal degrees to create tiles over region for smaller exports. Only used if tile==True. default = 1.0 output_scale (float, optional): output resolution of harmonic weight image. default = 30 """ if tile: land_area = (ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017"). filterBounds(region).geometry(1000).buffer(2500, maxError=1000)) grid = geeutils.tile_region(region, intersect_geom=land_area, grid_size=tile_size) n = grid.size().getInfo() grid_list = grid.toList(n) for i in range(n): grid_tile = ee.Feature(grid_list.get(i)).geometry() if output_asset_path is not None: output_tile_path = output_asset_path + f"harmonics_t{i:05d}" export_surface_water_harmonics( region=grid_tile, start_time=start_time, end_time=end_time, n_cycles=n_cycles, feature_names=feature_names, label=label, fusion_samples=fusion_samples, output_asset_path=output_tile_path, tile=False, tile_size=tile_size, output_scale=output_scale, ) else: if fusion_samples is not None: fusion_model, scaling_dict = ml.random_forest_ee( 25, fusion_samples.limit(10000), feature_names, label, scaling="standard", mode="regression", ) else: raise ValueError( "Either 'fusion_samples' or 'fusion_model_path' needs to be defined to run fusion process" ) ds = _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict, feature_names, target_band=label, ) now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") time_str = now.strftime("%Y-%m-%d %H:%M:%s") scale_factor = 0.0001 # create metadata dict metadata = ee.Dictionary({ "hf_version": hf.__version__, "scale_factor": scale_factor, "fit_time_start": start_time, "fit_time_end": end_time, "execution_time": time_str, }) harmonic_coefs = timeseries.fit_harmonic_trend(ds, dependent=label, n_cycles=n_cycles, output_err=True) harmonic_coefs = harmonic_coefs.divide(scale_factor).int32().set( metadata) if output_asset_path is not None: geeutils.export_image( harmonic_coefs, region, output_asset_path, description= f"hydrafloods_harmonic_coefficient_export_{time_id}", scale=output_scale, crs="EPSG:4326", ) else: raise ValueError( "'output_asset_path' needs to be defined to run fusion export process" ) return
def export_daily_surface_water( region, target_date, harmonic_image=None, harmonic_collection=None, feature_names=None, label=None, look_back=30, lag=4, n_cycles=2, include_confidence=False, include_flood=False, export_fusion=False, fusion_samples=None, output_asset_path=None, output_bucket_path=None, initial_threshold=0.1, tile=False, tile_size=1.0, tile_buffer=100000, output_scale=30, ): """Last and repeated step of the daily surface water fusion process. This procedure uses the results from `export_fusion_samples` and `export_surface_water_harmonics` to build a random forest model to predict a water index from SAR imagery and predict water using the harmonic model. This process will correct the harmonic estimate using observed data and export the resulting imagery. args: region (ee.Geometry): geographic region to look for coincident data and sample from target_date (str | datetime.datetime): date to estimate surface water extent for harmonic_image (str, optional): Earth Engine Image asset id of the harmonic model weights exported by `export_surface_water_harmonics`. If left as None then `harmonic_collection` must be defined. default = None harmonic_collection (str, optional): Earth Engine ImageCollection asset id of the harmonic model weights from tile `export_surface_water_harmonics`. If left as None then `harmonic_image` must be defined. default = None feature_names (list[str],): names of feature columns used to calculate `label` from label (str): name of feature column to predict using `feature_names` look_back (int,optional): number of days used to estimate short-term trend in water. default = 30 lag (int, optional): number of days after `target_date` to begin `look_back`. default=4 n_cycles (int, optional): number of interannual cycles to model. default = 2 include_confidence (bool, optional): boolean keyword to specify if a confidence band will be exported with surface water image. If True then confidence will be calculated. default = False include_flood (bool, optional): boolean keyword to specify if a flood band will be exported with surface water image. If True then flood will be calculated based on JRC permanent water data. default = False export_fusion (bool, optional): boolean keyword to specify if the fusion image used to calculate water should be exported as a seperated task. If True then run fusion export task. default = False fusion_samples (str): Earth Engine FeatureCollection asset id of samples to get a data fusion model from. Should be the asset output from `export_fusion_samples` output_asset_path (str): Earth Engine asset id to save estimate water and fusion results to as image. If tile==True, then output_asset_path much be a precreated ImageCollection asset. If left as None then `output_bucket_path` must be specified. default = None output_bucket_path (str): GCP cloud bucket path to save estimate water and fusion results to cloud optimized geotiffs. If tile==True, then multiple file will be created. If left as None then `output_asset_path` must be specified. default = None initial_threshold (float, optional): initial threshold value used in `edge_otsu` thresholding algorithm to segment water from fusion image. default = 0.1 tile (bool, optional): boolean keyword to tile exports. If false will try to calculate harmonic weights as image. If true, it will tile area and recusively call to export smaller areas. If true then expects that `output_asset_path` is an ImageCollection. default = False tile_size (float, optional): resolution in decimal degrees to create tiles over region for smaller exports. Only used if tile==True. default = 1.0 tile_buffer (float,optional): buffer size in meters to buffer tiles to calculate threshold. This is used to ensure running tiled exports produces consistent results at tile seams. default = 100000 output_scale (float, optional): output resolution of harmonic weight image. default = 30 raises: ValueError: if `fusion_samples` is None ValueError: if both`harmonic_image` and `harmonic_collection` is None ValueError: if both 'output_asset_path' and 'output_bucket_path' is None """ def get_residuals(i): """Closure function to calculate residuals of harmonic water estimate compared to observed data. """ i = ee.Number(i) t_diff = (ee.Number(i).multiply(-1).subtract(lag) ) # calc how many days to adjust ini date new_date = target_date.advance(t_diff, "day") # calculate new date corr_img = (ds.collection.select(label).filterDate( new_date, new_date.advance(1, "day")).qualityMosaic(label)) time_img = timeseries.get_dummy_img(new_date) harmon_pred = (timeseries.add_harmonic_coefs(time_img).multiply( harmonic_coefs).reduce("sum")) harmon_diff = harmon_pred.subtract(corr_img).rename("residual") return harmon_diff.set("system:time_start", new_date.millis()) def calc_confidence(i): """Closure function to calculate confidence in water estimate using monte carlo methods and simulating errors in long- and short-term water dynamics """ i = ee.Number(i) # uniform sampling of std dev at 95% confidence interval long_term_seed = i.add(500) short_term_seed = i.add(1000) long_term_random = ee.Image.random(long_term_seed).multiply( 3.92).subtract(1.96) short_term_random = ( ee.Image.random(short_term_seed).multiply(3.92).subtract(1.96)) lin_sim = lin_pred.add(short_term_random.multiply(linCi)) har_sim = har_pred.add(long_term_random.multiply(harCi)) sim_pred = har_sim.subtract(lin_sim) # random_water = thresholding.bmax_otsu(random_combination,invert=True) # naive estimate of water (>0) return sim_pred.gt(ci_threshold).uint8() if tile: if tile: land_area = (ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017"). filterBounds(region).geometry(100).buffer( 2500, maxError=100)) grid = geeutils.tile_region(region, intersect_geom=land_area, grid_size=tile_size) n = grid.size().getInfo() grid_list = grid.toList(n) for i in range(n): if output_asset_path is not None: output_asset_tile = output_asset_path + f"daily_tile{i:05d}" else: output_asset_tile = None if output_bucket_path is not None: output_bucket_tile = output_bucket_path + f"_tile{i:05d}" else: output_bucket_tile = None grid_tile = ee.Feature(grid_list.get(i)).geometry() export_daily_surface_water( region=grid_tile, target_date=target_date, harmonic_image=harmonic_image, harmonic_collection=harmonic_collection, feature_names=feature_names, label=label, look_back=look_back, lag=lag, n_cycles=n_cycles, include_confidence=include_confidence, include_flood=include_flood, export_fusion=export_fusion, fusion_samples=fusion_samples, output_asset_path=output_asset_tile, output_bucket_path=output_bucket_tile, initial_threshold=initial_threshold, tile=False, tile_buffer=tile_buffer, output_scale=output_scale, ) else: if not isinstance(target_date, ee.Date): target_date = ee.Date(target_date) end_time = target_date.advance(-(lag - 1), "day") start_time = end_time.advance(-look_back, "day") if fusion_samples is not None: fusion_model, scaling_dict = ml.random_forest_ee( 25, fusion_samples.limit(10000), feature_names, label, scaling="standard", mode="regression", ) else: raise ValueError( "'fusion_samples' needs to be defined to run fusion process") now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") time_str = now.strftime("%Y-%m-%d %H:%M:%s") if harmonic_image is not None: harmonic_coefs = ee.Image(harmonic_image) harmonic_coefs = harmonic_coefs.multiply( ee.Image(ee.Number(harmonic_coefs.get("scale_factor")))) elif harmonic_collection is not None: harmonic_collection = ee.ImageCollection(harmonic_collection) first = ee.Image(harmonic_collection.first()) harmonic_coefs = harmonic_collection.mosaic().multiply( ee.Image(ee.Number(first.get("scale_factor")))) else: raise ValueError( "Either 'harmonic_image' or 'harmonic_collection' needs to be defined to run fusion process" ) if include_confidence: harmonic_err = harmonic_coefs.select(".*(x|y|n)$") harmonic_coefs = harmonic_coefs.select("^(c|t|s).*") else: harmonic_coefs = harmonic_coefs.select("^(c|t|s).*") prod_region = region.buffer(tile_buffer, 100) ds = _fuse_dataset(region, start_time, end_time, fusion_model, scaling_dict, feature_names, target_band=label, use_viirs=True) dummy_target = timeseries.get_dummy_img(target_date) weights = ee.ImageCollection.fromImages( ee.List.sequence(0, look_back - 1).map(get_residuals)).sort("system:time_start") weights_lr = timeseries.fit_linear_trend(weights, dependent="residual", output_err=include_confidence) weights_coefs = weights_lr.select("^(c|t).*") lin_pred = (dummy_target.multiply(weights_coefs).reduce("sum").rename( "residual_est")) har_pred = (timeseries.add_harmonic_coefs( dummy_target, n_cycles=n_cycles).multiply(harmonic_coefs).reduce("sum")) fused_pred = hf.filtering.p_median( (har_pred.subtract(lin_pred))).rename("fused_product") ci_threshold = thresholding.edge_otsu( fused_pred, initial_threshold=initial_threshold, edge_buffer=300, region=prod_region, invert=True, scale=200, return_threshold=True, ) permanent_water = ( ee.ImageCollection("JRC/GSW1_2/YearlyHistory").filterDate( "1985-01-01", end_time).limit(5, "system:time_start", False). map(lambda x: x.select("waterClass").eq(3)).sum().unmask(0).gt(0)) water = fused_pred.gt(ci_threshold).Or(permanent_water).rename( "water").uint8() if include_flood: flood = water.select("water").And( permanent_water.Not()).rename("flood") water = water.addBands(flood) if include_confidence: weights_err = weights_lr.select(".*(x|y|n)$") linCi = weights_err.expression( "mse * (1 + (1/n) + ((t-xmean)**2/xr))**(1/2)", { "mse": weights_err.select("residual_y"), "n": weights_err.select("n"), "xmean": weights_err.select("mean_x"), "xr": weights_err.select("residual_x"), "t": dummy_target.select("time"), }, ) harCi = harmonic_err.expression( "mse * (1 + (1/n) + ((t-xmean)**2/xr))**(1/2)", { "mse": harmonic_err.select("residual_y"), "n": harmonic_err.select("n"), "xmean": harmonic_err.select("mean_x"), "xr": harmonic_err.select("residual_x"), "t": dummy_target.select("time"), }, ) confidence = (ee.ImageCollection.fromImages( ee.List.sequence(0, 99).map(calc_confidence)).reduce( ee.Reducer.mean(), 16).multiply(100).uint8().rename("confidence")) out_water = ee.Image.cat([ confidence, water, ]) else: out_water = water fused_pred = fused_pred.multiply(10000).int16() if output_asset_path is not None: # create metadata dict metadata = ee.Dictionary({ "hf_version": hf.__version__, "system:time_start": target_date.millis(), "system:time_end": target_date.advance(86399, "seconds").millis(), "execution_time": time_str, "lag": lag, "look_back": look_back, }) geeutils.export_image( out_water.set(metadata.combine({"product": "water"})), region, output_asset_path + "_water", description=f"hydrafloods_water_ee_export_{time_id}", scale=output_scale, crs="EPSG:4326", ) if export_fusion: geeutils.export_image( fused_pred.set(metadata.combine({"product": "fusion"})), region, output_asset_path + "_fusion", description=f"hydrafloods_fusion_ee_export_{time_id}", scale=output_scale, crs="EPSG:4326", ) elif output_bucket_path is not None: export_region = region.bounds( maxError=100).getInfo()["coordinates"] bucket_path, ext = os.path.splitext(output_bucket_path) fcomponents = bucket_path.split("/") bucket = fcomponents[2] fpath = fcomponents[3:-1] # TODO: remove extension from string formulation f_water = "/".join(fpath + [fcomponents[-1] + "_water" + ext]) f_fusion = "/".join(fpath + [fcomponents[-1] + "_fusion" + ext]) water_task = ee.batch.Export.image.toCloudStorage( image=out_water, description=f"hydrafloods_water_gcp_export_{time_id}", bucket=bucket, fileNamePrefix=f_water, region=export_region, scale=output_scale, crs="EPSG:4326", maxPixels=1e13, fileFormat="GeoTIFF", formatOptions={"cloudOptimized": True}, ) water_task.start() if export_fusion: fusion_task = ee.batch.Export.image.toCloudStorage( image=fused_pred, description=f"hydrafloods_fusion_gcp_export_{time_id}", bucket=bucket, fileNamePrefix=f_fusion, region=export_region, scale=output_scale, crs="EPSG:4326", maxPixels=1e13, fileFormat="GeoTIFF", formatOptions={"cloudOptimized": True}, ) fusion_task.start() else: raise ValueError( "Either 'output_asset_path' or 'output_bucket_path' needs to be defined to run fusion export process" ) return
def export_fusion_product( region, target_date, harmonic_image=None, harmonic_collection=None, feature_names=None, label=None, look_back=30, lag=4, n_cycles=2, fusion_samples=None, output_asset_path=None, output_bucket_path=None, tile=False, tile_size=1.0, tile_buffer=100000, output_scale=30, ): def get_residuals(i): """Closure function to calculate residuals of harmonic water estimate compared to observed data. """ i = ee.Number(i) t_diff = ( ee.Number(i).multiply(-1).subtract(lag) ) # calc how many days to adjust ini date new_date = target_date.advance(t_diff, "day") # calculate new date corr_img = ( ds.collection.select(label) .filterDate(new_date, new_date.advance(1, "day")) .median() ) # .select(f"^{label}.*",label) time_img = timeseries.get_dummy_img(new_date) harmon_pred = ( timeseries.add_harmonic_coefs(time_img) .multiply(harmonic_coefs) .reduce("sum") ) harmon_diff = harmon_pred.subtract(corr_img).rename("residual") return harmon_diff.set("system:time_start", new_date.millis()) if tile: if tile: land_area = ( ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017") .filterBounds(region) .geometry(100) .buffer(2500, maxError=100) ) grid = geeutils.tile_region( region, intersect_geom=land_area, grid_size=tile_size ) n = grid.size().getInfo() grid_list = grid.toList(n) for i in range(n): if output_asset_path is not None: output_asset_tile = output_asset_path + f"daily_tile{i:05d}" else: output_asset_tile = None if output_bucket_path is not None: output_bucket_tile = output_bucket_path + f"_tile{i:05d}" else: output_bucket_tile = None grid_tile = ee.Feature(grid_list.get(i)).geometry() export_fusion_product( region=grid_tile, target_date=target_date, harmonic_image=harmonic_image, harmonic_collection=harmonic_collection, feature_names=feature_names, label=label, look_back=look_back, lag=lag, n_cycles=n_cycles, fusion_samples=fusion_samples, output_asset_path=output_asset_path, output_bucket_path=output_bucket_path, tile=tile, tile_size=tile_size, tile_buffer=tile_buffer, output_scale=output_scale, ) else: if not isinstance(target_date, ee.Date): target_date = ee.Date(target_date) end_time = target_date.advance(-(lag - 1), "day") start_time = end_time.advance(-look_back, "day") if fusion_samples is not None: fusion_model, scaling_dict = ml.random_forest_ee( 30, fusion_samples, feature_names, label, scaling=None, mode="regression", ) else: raise ValueError( "'fusion_samples' needs to be defined to run fusion process" ) now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") time_str = now.strftime("%Y-%m-%d %H:%M:%s") if harmonic_image is not None: harmonic_coefs = ee.Image(harmonic_image) harmonic_coefs = harmonic_coefs.multiply( ee.Image(ee.Number(harmonic_coefs.get("scale_factor"))) ) elif harmonic_collection is not None: harmonic_collection = ee.ImageCollection(harmonic_collection) first = ee.Image(harmonic_collection.first()) harmonic_coefs = harmonic_collection.mosaic().multiply( ee.Image(ee.Number(first.get("scale_factor"))) ) else: raise ValueError( "Either 'harmonic_image' or 'harmonic_collection' needs to be defined to run fusion process" ) harmonic_coefs = harmonic_coefs.select("^(c|t|s).*") prod_region = region.buffer(tile_buffer, 100) ds = _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict, feature_names, target_band=label, use_viirs=True, ) dummy_target = timeseries.get_dummy_img(target_date) weights = ee.ImageCollection.fromImages( ee.List.sequence(0, look_back - 1).map(get_residuals) ).sort("system:time_start") weights_lr = timeseries.fit_linear_trend( weights, dependent="residual", output_err=False ) weights_coefs = weights_lr.select("^(c|t).*") lin_pred = ( dummy_target.multiply(weights_coefs).reduce("sum").rename("residual_est") ) har_pred = ( timeseries.add_harmonic_coefs(dummy_target, n_cycles=n_cycles) .multiply(harmonic_coefs) .reduce("sum") ) fused_pred = (har_pred.subtract(lin_pred)).rename("fused_product") fused_pred = fused_pred.multiply(10000).int16() if output_asset_path is not None: # create metadata dict metadata = ee.Dictionary( { "hf_version": hf.__version__, "system:time_start": target_date.millis(), "system:time_end": target_date.advance(86399, "seconds").millis(), "execution_time": time_str, "lag": lag, "look_back": look_back, "scale_factor": 0.0001, } ) geeutils.export_image( fused_pred.set(metadata.combine({"product": "fusion"})), region, output_asset_path + "_fusion", description=f"hydrafloods_fusion_ee_export_{time_id}", scale=output_scale, crs="EPSG:4326", ) elif output_bucket_path is not None: export_region = region.bounds(maxError=100).getInfo()["coordinates"] bucket_path, ext = os.path.splitext(output_bucket_path) fcomponents = bucket_path.split("/") bucket = fcomponents[2] fpath = fcomponents[3:-1] # TODO: remove extension from string formulation f_water = "/".join(fpath + [fcomponents[-1] + "_water" + ext]) f_fusion = "/".join(fpath + [fcomponents[-1] + "_fusion" + ext]) fusion_task = ee.batch.Export.image.toCloudStorage( image=fused_pred, description=f"hydrafloods_fusion_gcp_export_{time_id}", bucket=bucket, fileNamePrefix=f_fusion, region=export_region, scale=output_scale, crs="EPSG:4326", maxPixels=1e13, fileFormat="GeoTIFF", formatOptions={"cloudOptimized": True}, ) fusion_task.start() else: raise ValueError( "Either 'output_asset_path' or 'output_bucket_path' needs to be defined to run fusion export process" ) return