def _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict=None, target_band="mndwi", use_viirs=False, use_modis=False, ): @decorators.carry_metadata def _apply_scaling(img): return img.subtract(min_img).divide(max_img.subtract(min_img)).float() @decorators.carry_metadata def _apply_fusion(img): return img.classify(fusion_model, target_band) ds_kwargs = dict(region=region, start_time=start_time, end_time=end_time) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) optical = lc8.merge(le7).merge(s2) if use_viirs: viirs = datasets.Viirs(**ds_kwargs) optical = optical.merge(viirs) if use_modis: modis = datasets.Modis(**ds_kwargs) optical = optical.merge(modis) s1 = datasets.Sentinel1(**ds_kwargs) s1 = s1.add_fusion_features() if scaling_dict is not None: scaling_img = scaling_dict.toImage() min_img = scaling_img.select(".*_min") max_img = scaling_img.select(".*_max") s1 = s1.apply_func(_apply_scaling) s1.collection = s1.collection.map(_apply_fusion) fused_ds = optical.merge(s1) fused_ds.collection = ( fused_ds.collection.select(target_band) .cast({target_band: "float"}, [target_band]) .sort("system:time_start") ) return fused_ds, target_band
def _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict, feature_names, target_band="mndwi", use_viirs=False, use_modis=False, ): @decorators.carry_metadata def _apply_fusion(img): img_norm = ml.standard_image_scaling(img, scaling_dict, feature_names) return img_norm.classify(fusion_model, target_band) ds_kwargs = dict(region=region, start_time=start_time, end_time=end_time) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) optical = lc8.merge(le7).merge(s2) if use_viirs: viirs = datasets.Viirs(**dsa_kwargs) optical = optical.merge(viirs) if use_modis: modis = datasets.Modis(**ds_kwargs) optical = optical.merge(modis) optical = optical.apply_func(geeutils.add_indices, indices=[target_band]) s1 = datasets.Sentinel1(**ds_kwargs) s1 = s1.apply_func( geeutils.add_indices, indices=["vv_vh_ratio", "ndpi", "nvvi", "nvhi"] ) s1 = s1.apply_func(_apply_fusion) fused_ds = optical.merge(s1) fused_ds.collection = ( fused_ds.collection.select(target_band) .cast({target_band: "float"}, [target_band]) .sort("system:time_start") ) return fused_ds
def export_fusion_samples( region, start_time, end_time, stratify_samples=False, sample_scale=30, n_samples=100, img_limit=1000, export_to="asset", output_asset_path=None, export_kwargs=None, skip_empty=True, ): """ """ export_opts = dict( cloud=ee.batch.Export.table.toCloudStorage, asset=ee.batch.Export.table.toAsset, ) export_func = export_opts[export_to] ds_kwargs = dict(region=region, start_time=start_time, end_time=end_time) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) viirs = datasets.Sentinel2(**ds_kwargs) s1 = datasets.Sentinel1(**ds_kwargs) s1 = s1.add_fusion_features() optical = lc8.merge(s2).merge(le7) ds = optical.join(s1) n = img_limit if img_limit is not None else ds.n_images img_list = ds.collection.toList(n) output_features = ee.FeatureCollection([]) if stratify_samples: # jrc_img = ( # ee.Image("JRC/GSW1_2/GlobalSurfaceWater") # .select("occurrence") # .unmask(0) # .rename("water") # ) # stratification_img = ( # jrc_img.where(jrc_img.lt(10), 0) # .where(jrc_img.gte(10), 1) # .where(jrc_img.gte(65), 2) # ) class_band = "landcover" igbp_classes = ee.List( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] ) ipcc_classes = ee.List([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 6, 3, 5, 3, 4, 4, 6]) stratification_img = ( ee.ImageCollection("MODIS/006/MCD12Q1") .mode() .remap(igbp_classes, ipcc_classes) .focal_mode() .rename(class_band) ) classes = ipcc_classes.distinct() else: stratification_img = None for i in range(n): try: sample_img = ee.Image(img_list.get(i)) sample_region = sample_img.geometry(10).bounds(10) if stratification_img is not None: samples = sample_img.addBands( stratification_img.select(class_band) ).stratifiedSample( region=sample_region, numPoints=n_samples, classBand=class_band, scale=sample_scale, seed=i, classValues=classes, classPoints=ee.List.repeat( n_samples, classes.size().subtract(1) ).add(n_samples * 2), tileScale=16, geometries=True, ) else: samples = sample_img.sample( region=sample_region, scale=sample_scale, numPixels=n_samples, seed=i, tileScale=16, geometries=True, ) if skip_empty: output_features = ( output_features.merge(samples) if samples.size().getInfo() > 0 else output_features ) else: output_features = output_features.merge(samples) except EEException as e: break export_info = dict(collection=output_features, assetId=output_asset_path) if export_kwargs is not None: export_info = {**export_info, **export_kwargs} # if "fileNamePrefix" in export_info.keys(): # prefix = export_kwargs["fileNamePrefix"] # true_prefix = ( # prefix + desc # if prefix.endswith("/") # else prefix + f"_{dstr}_{i}" # ) # export_info["fileNamePrefix"] = true_prefix task = export_func(collection=output_features, assetId=output_asset_path) task.start() logging.info(f"Started task") return
def export_fusion_samples( region, start_time, end_time, output_asset_path, stratify_samples=False, sample_scale=30, n_samples=25, max_samples=10000, seed=0, ): """First step of the daily surface water fusion process. This procedure samples values from coincident optical and SAR data so that we can use ML for data fusion. This will calculate MNDWI, NWI, AEWInsh, and AEWIsh optical water indices and a few indices from SAR imagery (VV/VH, NDPI, NVVI, NVHI) to predict a water index. args: region (ee.Geometry): geographic region to look for coincident data and sample from start_time (str | datetime.datetime): start time used to look for coincident data end_time (str | datetime.datetime): end time used to look for coincident data output_asset_path (str): Earth Engine asset id to save sampled values too stratify_samples (bool, optional): boolean keyword to specify for sampling data stratified by MODIS land cover. If False, then a random sampling wil be used. default = False sample_scale (float, optional): resolution in meters to sample data at n_samples (int, optional): number of samples to collect per coincident image pair. If stratified_samples == True, this value be be samples per class. default = 25 max_samples (int,optional): maximum number of samples to collect for the sampling process. Once max_samples are collected, then the sampling process will stop and export to asset. default = 10000 seed (int,optional): random number generator seed, used for setting random sampling. default = 0 """ optical_water_indices = ["mndwi", "nwi", "aewish", "aewinsh"] ds_kwargs = dict(region=region, start_time=start_time, end_time=end_time, rescale=True) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) _ = ds_kwargs.pop("rescale") s1 = datasets.Sentinel1(**ds_kwargs) # s1.collection = timeseries.temporal_iqr_filter(s1.collection) s1 = s1.apply_func(geeutils.add_indices, indices=["vv_vh_ratio", "ndpi", "nvvi", "nvhi"]) optical = lc8.merge(s2).merge(le7) optical = optical.apply_func(geeutils.add_indices, indices=optical_water_indices) optical.collection = optical.collection.select(optical_water_indices) ds = optical.join(s1) n = ds.n_images logging.info(f"Found {n} images to sample from") img_list = ds.collection.toList(n) output_features = ee.FeatureCollection([]) aggregate_samples = 0 if stratify_samples: class_band = "landcover" igbp_classes = ee.List( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]) ipcc_classes = ee.List( [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 6, 3, 5, 3, 4, 4, 6]) stratification_img = (ee.ImageCollection("MODIS/006/MCD12Q1").limit( 1, "system:time_start", True).first().remap(igbp_classes, ipcc_classes).rename(class_band)) classes = ipcc_classes.distinct() else: stratification_img = None for i in range(n): try: sample_img = ee.Image(img_list.get(i)) sample_region = sample_img.geometry(sample_scale).bounds( sample_scale) if stratification_img is not None: samples = (sample_img.addBands( stratification_img.select(class_band)).stratifiedSample( region=sample_region, numPoints=n_samples, classBand=class_band, scale=sample_scale, seed=seed + i, classValues=classes, classPoints=ee.List.repeat( n_samples, classes.size().subtract(1)).add(n_samples * 4), tileScale=16, geometries=True, )) else: samples = sample_img.sample( region=sample_region, scale=sample_scale, numPixels=n_samples, seed=seed + i, tileScale=16, geometries=True, ) these_samples = samples.size().getInfo() output_features = (output_features.merge(samples.randomColumn()) if these_samples > 0 else output_features) aggregate_samples += these_samples if aggregate_samples > max_samples: logging.info(f"max samples reached, beginning export!") break except EEException as e: warnings.warn(f"Sampling process ran into an error: {str(e)}") break task = ee.batch.Export.table.toAsset(collection=output_features, assetId=output_asset_path) task.start() logging.info(f"Started export task for {output_asset_path}") return
def export_fusion_samples( region, start_time, end_time, output_asset_path, stratify_samples=True, sample_scale=30, n_samples=25, seed=0, ): """First step of the daily surface water fusion process. This procedure samples values from coincident optical and SAR data so that we can use ML for data fusion. This will calculate MNDWI, NWI, AEWInsh, and AEWIsh optical water indices and a few indices from SAR imagery (VV/VH, NDPI, NVVI, NVHI) to predict a water index. args: region (ee.Geometry): geographic region to look for coincident data and sample from start_time (str | datetime.datetime): start time used to look for coincident data end_time (str | datetime.datetime): end time used to look for coincident data output_asset_path (str): Earth Engine asset id to save sampled values too stratify_samples (bool, optional): boolean keyword to specify for sampling data stratified by a combination of the MODIS land cover and JRC surface water occurrence. If False, then a random sampling wil be used. default = False sample_scale (float, optional): resolution in meters to sample data at n_samples (int, optional): number of samples to collect per coincident image pair. If stratified_samples == True, this value be be samples per class. default = 25 seed (int,optional): random number generator seed, used for setting random sampling. default = 0 """ dem = ee.Image("NASA/NASADEM_HGT/001").select("elevation") optical_water_indices = ["mndwi", "nwi", "aewish", "aewinsh"] ds_kwargs = dict( region=region, start_time=start_time, end_time=end_time, rescale=True ) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) _ = ds_kwargs.pop("rescale") s1a = datasets.Sentinel1Asc(**ds_kwargs) s1d = datasets.Sentinel1Desc(**ds_kwargs) years = ( s1a.collection.aggregate_array("system:time_start") .map(lambda x: ee.Date(x).get("year")) .distinct() ) sar_proc = ( ( corrections.slope_correction, dict( elevation=dem, buffer=50, ), ), hf.gamma_map, (geeutils.add_indices, dict(indices=["vv_vh_ratio", "ndpi", "nvvi", "nvhi"])), ) s1a.pipe(sar_proc, inplace=True) s1d.pipe(sar_proc, inplace=True) s1a_anomalies = _calc_sar_anomalies(years, s1a) s1d_anomalies = _calc_sar_anomalies(years, s1d) s1a.collection = s1a_anomalies s1d.collection = s1d_anomalies s1 = s1a.merge(s1d) optical = lc8.merge(s2).merge(le7) optical = optical.apply_func(geeutils.add_indices, indices=optical_water_indices) ds = optical.join(s1) sample_region = ( ds.collection.map(geeutils.get_geoms) .union(maxError=1000) .geometry(maxError=1000) ).intersection(region, maxError=1000) img_list = ds.collection.toList(ds.collection.size()) output_features = ee.FeatureCollection([]) aggregate_samples = 0 if stratify_samples: class_band = "strata" interval = 20 water_freq = ee.Image("JRC/GSW1_2/GlobalSurfaceWater").select("occurrence") class_intervals = ee.List.sequence(0, 80, interval) logging.info(f"Water intervals: {class_intervals.getInfo()}") n_water_classes = class_intervals.size() water_classes = ee.List.sequence(1, n_water_classes) logging.info(f"Water Classes: {water_classes.getInfo()}") water_img = ( ee.ImageCollection.fromImages( class_intervals.map(lambda x: water_freq.gte(ee.Number(x))) ) .reduce(ee.Reducer.sum()) .uint8() .rename(class_band) ) # class_band = "landcover" igbp_classes = ee.List( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] ) ipcc_classes = ee.List([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 6, 3, 5, 3, 4, 4, 6]) lc_img = ( ee.ImageCollection("MODIS/006/MCD12Q1") .limit(1, "system:time_start", True) .first() .remap(igbp_classes, ipcc_classes) .rename(class_band) ).add(water_classes.size()) lc_classes = ( ipcc_classes.distinct() .map(lambda x: ee.Number(x).add(water_classes.size())) .sort() ) logging.info(f"LC Classes: {lc_classes.getInfo()}") final_strata_img = water_img.unmask(lc_img).rename(class_band) half = ee.Number(n_samples).multiply(n_water_classes.subtract(1)) n_lc = half.divide(lc_classes.size()).round() all_classes = ( water_classes.slice(1) .cat(lc_classes) .map(lambda x: ee.Number(x).subtract(1)) ) n_water_samples = ee.List.repeat(n_samples, n_water_classes) n_lc_samples = ee.List.repeat(n_lc, lc_classes.size()) logging.info(f"n Water Samples {n_water_samples.getInfo()}") logging.info(f"n LC Samples {n_lc_samples.getInfo()}") base_samples = water_img.select(class_band).stratifiedSample( region=sample_region, numPoints=n_samples, classBand=class_band, scale=sample_scale, seed=seed, classValues=water_classes, classPoints=n_water_samples, tileScale=16, geometries=True, ) else: base_samples = ee.FeatureCollection.randomPoints(sample_region, n_samples, seed) def sample_img(img): geom = img.geometry() date = img.date() week = date.get("week") year = date.get("year") new_seed = week.add(year).add(seed) lc_samples = lc_img.select(class_band).stratifiedSample( region=sample_region, numPoints=n_samples, classBand=class_band, scale=sample_scale, seed=new_seed, classValues=lc_classes, classPoints=n_lc_samples, tileScale=16, geometries=True, ) samples = ( base_samples.merge(lc_samples) .filterBounds(geom) .randomColumn("random", seed) ) features = img.sampleRegions( samples, scale=sample_scale, tileScale=16, geometries=True ) features = features.map(lambda x: ee.Feature(x).set("timestamp", date.millis())) return features sample_features = ds.collection.map(sample_img).flatten() output_features = ee.FeatureCollection( sample_features.aggregate_array(class_band) .distinct() .map( lambda x: sample_features.filter(ee.Filter.eq(class_band, x)).randomColumn() ) ).flatten() now = datetime.datetime.now() time_id = now.strftime("%Y%m%d%H%M%s") task = ee.batch.Export.table.toAsset( collection=output_features, assetId=output_asset_path, description=f"hydrafloods_fusion_samples_export_{time_id}", ) task.start() logging.info(f"Started export task for {output_asset_path}") return
def _fuse_dataset( region, start_time, end_time, fusion_model, scaling_dict, feature_names, target_band="mndwi", use_viirs=False, use_modis=False, ): ds_kwargs = dict(region=region, start_time=start_time, end_time=end_time) dsa_kwargs = {**ds_kwargs, **{"apply_band_adjustment": True}} lc8 = datasets.Landsat8(**ds_kwargs) le7 = datasets.Landsat7(**dsa_kwargs) s2 = datasets.Sentinel2(**dsa_kwargs) optical = lc8.merge(le7).merge(s2) if use_viirs: viirs = datasets.Viirs(**dsa_kwargs) optical = optical.merge(viirs) if use_modis: modis = datasets.Modis(**ds_kwargs) optical = optical.merge(modis) optical = optical.apply_func(geeutils.add_indices, indices=[target_band]) s1 = datasets.Sentinel1(**ds_kwargs) years = ( s1.collection.aggregate_array("system:time_start") .map(lambda x: ee.Date(x).get("year")) .distinct() ) dem = ee.Image("NASA/NASADEM_HGT/001").select("elevation") s1_proc = ( (corrections.slope_correction, dict(elevation=dem, buffer=30)), (geeutils.add_indices, dict(indices=["vv_vh_ratio", "ndpi", "nvvi", "nvhi"])), # ( # ml.standard_image_scaling, # dict(scaling_dict=scaling_dict, feature_names=feature_names), # ), # lambda x: x.classify(fusion_model, target_band), ) s1 = s1.pipe(s1_proc) s1_anomalies = _calc_sar_anomalies(years, s1) s1.collection = s1_anomalies.map( decorators.keep_attrs(lambda x: x.classify(fusion_model, target_band)) ) fused_ds = optical.merge(s1) fused_ds.collection = ( fused_ds.collection.select(target_band) .cast({target_band: "float"}, [target_band]) .sort("system:time_start") ) return fused_ds