def main(): problem_files = [] # 2. Loop Through Activation Codes with tqdm.tqdm(ALL_S2_FILES[24:]) as pbar_files: for i_file in pbar_files: try: aoi_meta = find_leaf_nodes(i_file) # update progress bar pbar_files.set_description( f"ESMR Code: {aoi_meta.event_activation}, AOI: {aoi_meta.directory_aoi}" ) # ====================== # LOAD S2 IMAGE PATH # ====================== pbar_files.set_description("Getting S2 Image Path...") s2_image_filepath = "gs://" + str( Path(BUCKET_NAME).joinpath(PARENT_DIR_S2).joinpath( aoi_meta.event_activation).joinpath( aoi_meta.directory_aoi).joinpath( aoi_meta.file_name)) if not GCPPath(s2_image_filepath).check_if_file_exists(): # problem file problem_files.append("gs://" + i_file) continue # ====================== # LOAD JRC IMAGE PATH # ====================== pbar_files.set_description("Getting JRC Image Path...") jrc_image_filepath = "gs://" + str( Path(BUCKET_NAME).joinpath(PARENT_DIR_JRC).joinpath( aoi_meta.event_activation).joinpath( aoi_meta.directory_aoi).joinpath( aoi_meta.file_name)) if not GCPPath(jrc_image_filepath).check_if_file_exists(): # problem file problem_files.append("gs://" + i_file) continue # ======================= # LOAD FLOODMAP, geojson # ======================= # Load Floodmap geojson pbar_files.set_description("Getting Floodmap...") floodmap_geojson_path = "gs://" + str( Path(BUCKET_NAME).joinpath(PARENT_DIR_FLOODS).joinpath( aoi_meta.event_activation).joinpath( aoi_meta.directory_aoi).joinpath( aoi_meta.core_name + "_floodmap.geojson")) # ====================== # LOAD FLOODMAP META # ====================== pbar_files.set_description("Getting Floodmap meta...") meta_floodmap_filepath = "gs://" + str( Path(BUCKET_NAME).joinpath(PARENT_DIR_FLOOD_META).joinpath( aoi_meta.event_activation).joinpath( aoi_meta.directory_aoi). joinpath(aoi_meta.core_name + "_metadata_floodmap.pickle")) floodmap_meta = read_pickle_from_gcp(meta_floodmap_filepath) # ====================== # LOAD GT # ====================== pbar_files.set_description("Load Groundtruth...") gt_binary, gt_meta_binary = generate_water_cloud_binary_gt( s2_image_filepath, floodmap_geojson_path, metadata_floodmap=floodmap_meta, keep_streams=True, cloudprob_in_lastband=True, permanent_water_image_path=jrc_image_filepath, ) # ====================== # SAVE GT (LOCALLY) # ====================== pbar_files.set_description("Saving GT Locally...") LOCAL_DIR = LOCAL_PATH.joinpath(Path(s2_image_filepath).name) # save ground truth save_groundtruth_tiff_rasterio( gt_binary, str(LOCAL_DIR), gt_meta=None, crs=gt_meta_binary["crs"], transform=gt_meta_binary["transform"], ) # ====================== # UPLOAD GT (GCP) # ====================== pbar_files.set_description("Upload GT to bucket...") TARGET_DIR = "gs://" + str( Path(BUCKET_NAME).joinpath(PARENT_DIR_GT).joinpath( aoi_meta.event_activation).joinpath( aoi_meta.directory_aoi).joinpath( aoi_meta.file_name)) save_file_to_bucket( TARGET_DIR, str(LOCAL_DIR), ) # delate local file LOCAL_DIR.unlink() except KeyboardInterrupt: break except: problem_files.append(i_file) import pickle with open("./problems_jrc.pickle", "wb") as fp: pickle.dump(problem_files, fp)
def main(): # looping through the ML parts ml_paths = [ # "val", # "test", "train", ] local_path = Path(root).joinpath("datasets") bucket_id = "ml4floods" destination_bucket_id = "ml4cc_data_lake" parent_path = "worldfloods/public" destination_parent_path = "0_DEV/2_Mart/worldfloods_v2_0" cloud_prob_parent_path = "worldfloods/tiffimages" permanent_water_parent_path = "worldfloods/tiffimages/PERMANENTWATERJRC" save_s2_image = False save_meta_data = False save_permanent_water_image = False save_cloud_prob = False save_floodmap_meta = False demo_image = "gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif" # want the appropate ml path problem_files = [] for ipath in ml_paths: # ensure path name is the same as ipath for the loooop demo_image_gcp = GCPPath(demo_image) demo_image_gcp = demo_image_gcp.replace("test", ipath) # get all files in the parent directory files_in_bucket = demo_image_gcp.get_files_in_parent_directory_with_suffix( ".tif") # # TESTINGGGGGGG # files_in_bucket = [ # "gs://ml4cc_data_lake/0_DEV/2_Mart/worldfloods_v2_0/train/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif" # ] # # HACK FOR SLICING last_x_slices = slice(-50, None) # loop through files in the bucket print(f"Generating ML GT for {ipath.title()}") with tqdm.tqdm(list(reversed(files_in_bucket[last_x_slices]))) as pbar: for s2_image_path in pbar: try: pbar.set_description("Getting Paths...") s2_image_path = GCPPath(s2_image_path) # create floodmap path floodmap_path = s2_image_path.replace( "/S2/", "/floodmaps/") floodmap_path = floodmap_path.replace(".tif", ".shp") # create cloudprob path try: cloudprob_path = GCPPath( str( Path(bucket_id).joinpath( cloud_prob_parent_path).joinpath( "cloudprob_edited").joinpath( s2_image_path.file_name))) assert cloudprob_path.check_if_file_exists() is True except AssertionError: cloudprob_path = GCPPath( str( Path(bucket_id).joinpath( cloud_prob_parent_path).joinpath( "cloudprob").joinpath( s2_image_path.file_name))) # create meta path meta_path = s2_image_path.replace("/S2/", "/meta/") meta_path = meta_path.replace(".tif", ".json") # ============================== # OPEN PERMANENT WATER TIFF # ============================== try: pbar.set_description( "Grabbing Permanent Water Tiff...") permenant_water_path = GCPPath( str( Path(bucket_id).joinpath( permanent_water_parent_path).joinpath( s2_image_path.file_name))) assert permenant_water_path.check_if_file_exists( ) is True permenant_water_path = permenant_water_path.full_path except AssertionError: pbar.set_description("Didnt Find...") permenant_water_path = None # ============================== # Generate GT Image # ============================== pbar.set_description("Generating Ground Truth...") # load the meta floodmap_meta = load_json_from_bucket( meta_path.bucket_id, meta_path.get_file_path()) # generate gt and gt meta # Run it through the GT script gt, gt_meta = generate_water_cloud_binary_gt( s2_image_path.full_path, floodmap_path.full_path, floodmap_meta, keep_streams=True, cloudprob_image_path=cloudprob_path.full_path, permanent_water_image_path=permenant_water_path, ) # ============================== # SAVE Permanent Water Image # ============================== if save_permanent_water_image and permenant_water_path is not None: pbar.set_description("Saving permanent water image...") permenant_water_path = GCPPath( str( Path(bucket_id).joinpath( permanent_water_parent_path).joinpath( s2_image_path.file_name))) # NEW WAY!!! permanent_water_image_path_dest = GCPPath( str( Path(destination_bucket_id).joinpath( destination_parent_path).joinpath(ipath). joinpath("permanent_water").joinpath( s2_image_path.file_name))) permenant_water_path.transfer_file_to_bucket_gsutils( permanent_water_image_path_dest.full_path, file_name=True) # ============================== # SAVE S2 Image # ============================== if save_s2_image: pbar.set_description("Saving S2 image...") # NEW WAY!!! s2_image_path_dest = GCPPath( str( Path(destination_bucket_id).joinpath( destination_parent_path).joinpath( ipath).joinpath("S2").joinpath( s2_image_path.file_name))) s2_image_path.transfer_file_to_bucket_gsutils( s2_image_path_dest.full_path, file_name=True) # ============================== # SAVE Meta Data # ============================== if save_meta_data: pbar.set_description("Saving meta data...") # get parent path name meta_parent_destination = ( Path(destination_parent_path).joinpath( ipath).joinpath("meta")) meta_path.transfer_file_to_bucket( destination_bucket_id, meta_parent_destination) # ============================== # SAVE Cloud Probabilities # ============================== if save_cloud_prob: pbar.set_description("Saving cloud probs data...") # get parent path name cloudprob_path_dest = GCPPath( str( Path(destination_bucket_id).joinpath( destination_parent_path).joinpath( ipath).joinpath("cloudprob").joinpath( cloudprob_path.file_name))) cloudprob_path.transfer_file_to_bucket_gsutils( cloudprob_path_dest.full_path, file_name=True) # ============================== # SAVE FloodMap Data # ============================== if save_floodmap_meta: # special case of multiple files pbar.set_description("Saving floodmap meta data...") # get parent path name floodmap_parent_destination = ( Path(destination_parent_path).joinpath( ipath).joinpath("floodmap")) floodmap_meta_files = ( floodmap_path. get_files_in_parent_directory_with_name()) for ifloodmap_meta_file in floodmap_meta_files: GCPPath( ifloodmap_meta_file).transfer_file_to_bucket( destination_bucket_id, floodmap_parent_destination) # ============================== # SAVE GT Data (WorldFloods 1.1) # ============================== # print("here!") pbar.set_description("Saving GT data...") # replace parent path gt_path = s2_image_path.replace(bucket_id, destination_bucket_id) gt_path = gt_path.replace("/S2/", "/gt/") gt_path = gt_path.replace(parent_path, destination_parent_path) ################################## # PLOTTING (FOR DEBUGGING) ################################## # import matplotlib.pyplot as plt # from rasterio import plot as rasterioplt # fig, ax = plt.subplots() # rasterioplt.show(gt[1], transform=gt_meta["transform"], ax=ax) # fig.savefig("./temp_water.png") # save ground truth save_groundtruth_tiff_rasterio( gt, str(local_path.joinpath(gt_path.file_name)), gt_meta=None, crs=gt_meta["crs"], transform=gt_meta["transform"], ) save_file_to_bucket( gt_path.full_path, str(local_path.joinpath(gt_path.file_name))) # delate local file local_path.joinpath(gt_path.file_name).unlink() except KeyboardInterrupt: break except: problem_files.append(s2_image_path.full_path) print(problem_files) import pickle with open("./momoney_moprobs_v2.pickle", "wb") as fp: pickle.dump(problem_files, fp)
def main(): # looping through the ML parts ml_paths = [ # "test", # "val", "train" ] local_path = Path(root).joinpath("datasets") bucket_id = "ml4floods" destination_bucket_id = "ml4cc_data_lake" parent_path = "worldfloods/public" destination_parent_path = "0_DEV/2_Mart/worldfloods_v1_0" cloud_prob_parent_path = "worldfloods/tiffimages" permanent_water_parent_path = "worldfloods/tiffimages/PERMANENTWATERJRC" save_s2_image = False save_meta_data = False save_permanent_water_image = False save_cloud_prob = False save_floodmap_meta = False # demo image demo_image = "gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif" # want the appropate ml path problem_files = [] for ipath in ml_paths: # ensure path name is the same as ipath for the loooop demo_image_gcp = GCPPath(demo_image) demo_image_gcp = demo_image_gcp.replace("test", ipath) demo_image_gcp = demo_image_gcp.replace("train", ipath) demo_image_gcp = demo_image_gcp.replace("val", ipath) # get all files in the parent directory files_in_bucket = demo_image_gcp.get_files_in_parent_directory_with_suffix( ".tif" ) # loop through files in the bucket print(f"Generating ML GT for {ipath.title()}, {len(files_in_bucket)} files") with tqdm.tqdm(files_in_bucket[207:]) as pbar: for s2_image_path in pbar: try: s2_image_path = GCPPath(s2_image_path) # create floodmap path floodmap_path = s2_image_path.replace("/S2/", "/floodmaps/") floodmap_path = floodmap_path.replace(".tif", ".shp") # create cloudprob path try: cloudprob_path = GCPPath( str( Path(bucket_id) .joinpath(cloud_prob_parent_path) .joinpath("cloudprob_edited") .joinpath(s2_image_path.file_name) ) ) assert cloudprob_path.check_if_file_exists() is True except AssertionError: cloudprob_path = GCPPath( str( Path(bucket_id) .joinpath(cloud_prob_parent_path) .joinpath("cloudprob") .joinpath(s2_image_path.file_name) ) ) # create meta path meta_path = s2_image_path.replace("/S2/", "/meta/") meta_path = meta_path.replace(".tif", ".json") # create permenant water path try: permenant_water_path = GCPPath( str( Path(bucket_id) .joinpath(permanent_water_parent_path) .joinpath(s2_image_path.file_name) ) ) assert permenant_water_path.check_if_file_exists() is True permenant_water_path = permenant_water_path.full_path except AssertionError: permenant_water_path = None # ============================== # Generate GT Image # ============================== pbar.set_description("Generating Ground Truth...") # generate gt and gt meta gt, gt_meta = generate_land_water_cloud_gt( s2_image_path.full_path, floodmap_path.full_path, keep_streams=True, cloudprob_image_path=cloudprob_path.full_path, permanent_water_image_path=permenant_water_path, ) # ============================== # SAVE Permanent Water Image # ============================== if save_permanent_water_image and permenant_water_path is not None: pbar.set_description("Saving permanent water image...") permenant_water_path = GCPPath( str( Path(bucket_id) .joinpath(permanent_water_parent_path) .joinpath(s2_image_path.file_name) ) ) # NEW WAY!!! permanent_water_image_path_dest = GCPPath( str( Path(destination_bucket_id) .joinpath(destination_parent_path) .joinpath(ipath) .joinpath("permanent_water") .joinpath(s2_image_path.file_name) ) ) permenant_water_path.transfer_file_to_bucket_gsutils( permanent_water_image_path_dest.full_path, file_name=True ) # ============================== # SAVE S2 Image # ============================== if save_s2_image: pbar.set_description("Saving S2 image...") # NEW WAY!!! s2_image_path_dest = GCPPath( str( Path(destination_bucket_id) .joinpath(destination_parent_path) .joinpath(ipath) .joinpath("S2") .joinpath(s2_image_path.file_name) ) ) s2_image_path.transfer_file_to_bucket_gsutils( s2_image_path_dest.full_path, file_name=True ) # ============================== # SAVE Meta Data # ============================== if save_meta_data: pbar.set_description("Saving meta data...") # get parent path name meta_parent_destination = ( Path(destination_parent_path) .joinpath(ipath) .joinpath("meta") ) meta_path.transfer_file_to_bucket( destination_bucket_id, meta_parent_destination ) # ============================== # SAVE Cloud Probabilities # ============================== if save_cloud_prob: pbar.set_description("Saving cloud probs data...") # get parent path name cloudprob_path_dest = GCPPath( str( Path(destination_bucket_id) .joinpath(destination_parent_path) .joinpath(ipath) .joinpath("cloudprob") .joinpath(cloudprob_path.file_name) ) ) cloudprob_path.transfer_file_to_bucket_gsutils( cloudprob_path_dest.full_path, file_name=True ) # ============================== # SAVE FloodMap Data # ============================== if save_floodmap_meta: # special case of multiple files pbar.set_description("Saving floodmap meta data...") # get parent path name floodmap_parent_destination = ( Path(destination_parent_path) .joinpath(ipath) .joinpath("floodmap") ) floodmap_meta_files = ( floodmap_path.get_files_in_parent_directory_with_name() ) for ifloodmap_meta_file in floodmap_meta_files: GCPPath(ifloodmap_meta_file).transfer_file_to_bucket( destination_bucket_id, floodmap_parent_destination ) # ============================== # SAVE GT Data (WorldFloods 1.1) # ============================== pbar.set_description("Saving GT data...") # replace parent path gt_path = s2_image_path.replace(bucket_id, destination_bucket_id) gt_path = gt_path.replace("/S2/", "/gt/") gt_path = gt_path.replace(parent_path, destination_parent_path) # save ground truth save_groundtruth_tiff_rasterio( gt, str(local_path.joinpath(gt_path.file_name)), gt_meta=gt_meta, crs=gt_meta["crs"], transform=gt_meta["transform"], ) save_file_to_bucket( gt_path.full_path, str(local_path.joinpath(gt_path.file_name)) ) # delate local file local_path.joinpath(gt_path.file_name).unlink() except KeyboardInterrupt: break except: problem_files.append(s2_image_path.full_path) print(problem_files)
except AttributeError: print(f"Metadata for {dc.file_name} not found") dc = open_source_tiff(dc) return dc for ifile in tqdm.tqdm(files): # initialize dc_example = init_wfs2_dataclass(ifile) # save name save_name = Path(dc_example.file_name).stem suffix = ".pkl" # local directory save local_dir = root.joinpath("datasets/test/") local_dir = str(local_dir.joinpath(save_name + suffix)) save_dataclass_pickle(dc_example, local_dir) # bucket save target_dir = f"gs://ml4cc_data_lake/0_DEV/0_Raw/WorldFloods/tiffimages_dataclass/{save_name}{suffix}" save_file_to_bucket(target_dir, local_dir) # remove from local directory rem_file = Path(local_dir) rem_file.unlink() del dc_example