def main():

    problem_files = []

    # 2. Loop Through Activation Codes
    with tqdm.tqdm(ALL_S2_FILES[24:]) as pbar_files:

        for i_file in pbar_files:

            try:

                aoi_meta = find_leaf_nodes(i_file)

                # update progress bar
                pbar_files.set_description(
                    f"ESMR Code: {aoi_meta.event_activation}, AOI: {aoi_meta.directory_aoi}"
                )

                # ======================
                # LOAD S2 IMAGE PATH
                # ======================
                pbar_files.set_description("Getting S2 Image Path...")
                s2_image_filepath = "gs://" + str(
                    Path(BUCKET_NAME).joinpath(PARENT_DIR_S2).joinpath(
                        aoi_meta.event_activation).joinpath(
                            aoi_meta.directory_aoi).joinpath(
                                aoi_meta.file_name))

                if not GCPPath(s2_image_filepath).check_if_file_exists():

                    # problem file
                    problem_files.append("gs://" + i_file)
                    continue

                # ======================
                # LOAD JRC IMAGE PATH
                # ======================
                pbar_files.set_description("Getting JRC Image Path...")
                jrc_image_filepath = "gs://" + str(
                    Path(BUCKET_NAME).joinpath(PARENT_DIR_JRC).joinpath(
                        aoi_meta.event_activation).joinpath(
                            aoi_meta.directory_aoi).joinpath(
                                aoi_meta.file_name))

                if not GCPPath(jrc_image_filepath).check_if_file_exists():

                    # problem file
                    problem_files.append("gs://" + i_file)
                    continue
                # =======================
                # LOAD FLOODMAP, geojson
                # =======================

                # Load Floodmap geojson
                pbar_files.set_description("Getting Floodmap...")
                floodmap_geojson_path = "gs://" + str(
                    Path(BUCKET_NAME).joinpath(PARENT_DIR_FLOODS).joinpath(
                        aoi_meta.event_activation).joinpath(
                            aoi_meta.directory_aoi).joinpath(
                                aoi_meta.core_name + "_floodmap.geojson"))

                # ======================
                # LOAD FLOODMAP META
                # ======================
                pbar_files.set_description("Getting Floodmap meta...")
                meta_floodmap_filepath = "gs://" + str(
                    Path(BUCKET_NAME).joinpath(PARENT_DIR_FLOOD_META).joinpath(
                        aoi_meta.event_activation).joinpath(
                            aoi_meta.directory_aoi).
                    joinpath(aoi_meta.core_name + "_metadata_floodmap.pickle"))
                floodmap_meta = read_pickle_from_gcp(meta_floodmap_filepath)

                # ======================
                # LOAD GT
                # ======================
                pbar_files.set_description("Load Groundtruth...")

                gt_binary, gt_meta_binary = generate_water_cloud_binary_gt(
                    s2_image_filepath,
                    floodmap_geojson_path,
                    metadata_floodmap=floodmap_meta,
                    keep_streams=True,
                    cloudprob_in_lastband=True,
                    permanent_water_image_path=jrc_image_filepath,
                )

                # ======================
                # SAVE GT (LOCALLY)
                # ======================
                pbar_files.set_description("Saving GT Locally...")
                LOCAL_DIR = LOCAL_PATH.joinpath(Path(s2_image_filepath).name)

                # save ground truth
                save_groundtruth_tiff_rasterio(
                    gt_binary,
                    str(LOCAL_DIR),
                    gt_meta=None,
                    crs=gt_meta_binary["crs"],
                    transform=gt_meta_binary["transform"],
                )
                # ======================
                # UPLOAD GT (GCP)
                # ======================
                pbar_files.set_description("Upload GT to bucket...")
                TARGET_DIR = "gs://" + str(
                    Path(BUCKET_NAME).joinpath(PARENT_DIR_GT).joinpath(
                        aoi_meta.event_activation).joinpath(
                            aoi_meta.directory_aoi).joinpath(
                                aoi_meta.file_name))

                save_file_to_bucket(
                    TARGET_DIR,
                    str(LOCAL_DIR),
                )
                # delate local file
                LOCAL_DIR.unlink()

            except KeyboardInterrupt:
                break
            except:
                problem_files.append(i_file)

    import pickle

    with open("./problems_jrc.pickle", "wb") as fp:
        pickle.dump(problem_files, fp)
def main():

    # looping through the ML parts
    ml_paths = [
        # "val",
        # "test",
        "train",
    ]

    local_path = Path(root).joinpath("datasets")

    bucket_id = "ml4floods"
    destination_bucket_id = "ml4cc_data_lake"

    parent_path = "worldfloods/public"
    destination_parent_path = "0_DEV/2_Mart/worldfloods_v2_0"
    cloud_prob_parent_path = "worldfloods/tiffimages"
    permanent_water_parent_path = "worldfloods/tiffimages/PERMANENTWATERJRC"
    save_s2_image = False
    save_meta_data = False
    save_permanent_water_image = False
    save_cloud_prob = False
    save_floodmap_meta = False

    demo_image = "gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif"

    # want the appropate ml path

    problem_files = []

    for ipath in ml_paths:

        # ensure path name is the same as ipath for the loooop
        demo_image_gcp = GCPPath(demo_image)
        demo_image_gcp = demo_image_gcp.replace("test", ipath)

        # get all files in the parent directory
        files_in_bucket = demo_image_gcp.get_files_in_parent_directory_with_suffix(
            ".tif")
        # # TESTINGGGGGGG
        # files_in_bucket = [
        #     "gs://ml4cc_data_lake/0_DEV/2_Mart/worldfloods_v2_0/train/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif"
        # ]
        # # HACK FOR SLICING
        last_x_slices = slice(-50, None)

        # loop through files in the bucket
        print(f"Generating ML GT for {ipath.title()}")

        with tqdm.tqdm(list(reversed(files_in_bucket[last_x_slices]))) as pbar:
            for s2_image_path in pbar:

                try:

                    pbar.set_description("Getting Paths...")

                    s2_image_path = GCPPath(s2_image_path)

                    # create floodmap path
                    floodmap_path = s2_image_path.replace(
                        "/S2/", "/floodmaps/")
                    floodmap_path = floodmap_path.replace(".tif", ".shp")

                    # create cloudprob path
                    try:

                        cloudprob_path = GCPPath(
                            str(
                                Path(bucket_id).joinpath(
                                    cloud_prob_parent_path).joinpath(
                                        "cloudprob_edited").joinpath(
                                            s2_image_path.file_name)))
                        assert cloudprob_path.check_if_file_exists() is True
                    except AssertionError:
                        cloudprob_path = GCPPath(
                            str(
                                Path(bucket_id).joinpath(
                                    cloud_prob_parent_path).joinpath(
                                        "cloudprob").joinpath(
                                            s2_image_path.file_name)))

                    # create meta path
                    meta_path = s2_image_path.replace("/S2/", "/meta/")
                    meta_path = meta_path.replace(".tif", ".json")

                    # ==============================
                    # OPEN PERMANENT WATER TIFF
                    # ==============================
                    try:
                        pbar.set_description(
                            "Grabbing Permanent Water Tiff...")
                        permenant_water_path = GCPPath(
                            str(
                                Path(bucket_id).joinpath(
                                    permanent_water_parent_path).joinpath(
                                        s2_image_path.file_name)))
                        assert permenant_water_path.check_if_file_exists(
                        ) is True
                        permenant_water_path = permenant_water_path.full_path

                    except AssertionError:
                        pbar.set_description("Didnt Find...")
                        permenant_water_path = None

                    # ==============================
                    # Generate GT Image
                    # ==============================
                    pbar.set_description("Generating Ground Truth...")

                    # load the meta
                    floodmap_meta = load_json_from_bucket(
                        meta_path.bucket_id, meta_path.get_file_path())

                    # generate gt and gt meta
                    # Run it through the GT script
                    gt, gt_meta = generate_water_cloud_binary_gt(
                        s2_image_path.full_path,
                        floodmap_path.full_path,
                        floodmap_meta,
                        keep_streams=True,
                        cloudprob_image_path=cloudprob_path.full_path,
                        permanent_water_image_path=permenant_water_path,
                    )
                    # ==============================
                    # SAVE Permanent Water Image
                    # ==============================

                    if save_permanent_water_image and permenant_water_path is not None:
                        pbar.set_description("Saving permanent water image...")

                        permenant_water_path = GCPPath(
                            str(
                                Path(bucket_id).joinpath(
                                    permanent_water_parent_path).joinpath(
                                        s2_image_path.file_name)))
                        # NEW WAY!!!
                        permanent_water_image_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id).joinpath(
                                    destination_parent_path).joinpath(ipath).
                                joinpath("permanent_water").joinpath(
                                    s2_image_path.file_name)))

                        permenant_water_path.transfer_file_to_bucket_gsutils(
                            permanent_water_image_path_dest.full_path,
                            file_name=True)
                    # ==============================
                    # SAVE S2 Image
                    # ==============================
                    if save_s2_image:
                        pbar.set_description("Saving S2 image...")

                        # NEW WAY!!!
                        s2_image_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id).joinpath(
                                    destination_parent_path).joinpath(
                                        ipath).joinpath("S2").joinpath(
                                            s2_image_path.file_name)))

                        s2_image_path.transfer_file_to_bucket_gsutils(
                            s2_image_path_dest.full_path, file_name=True)

                    # ==============================
                    # SAVE Meta Data
                    # ==============================
                    if save_meta_data:
                        pbar.set_description("Saving meta data...")
                        # get parent path name
                        meta_parent_destination = (
                            Path(destination_parent_path).joinpath(
                                ipath).joinpath("meta"))
                        meta_path.transfer_file_to_bucket(
                            destination_bucket_id, meta_parent_destination)
                    # ==============================
                    # SAVE Cloud Probabilities
                    # ==============================
                    if save_cloud_prob:
                        pbar.set_description("Saving cloud probs data...")
                        # get parent path name

                        cloudprob_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id).joinpath(
                                    destination_parent_path).joinpath(
                                        ipath).joinpath("cloudprob").joinpath(
                                            cloudprob_path.file_name)))

                        cloudprob_path.transfer_file_to_bucket_gsutils(
                            cloudprob_path_dest.full_path, file_name=True)
                    # ==============================
                    # SAVE FloodMap Data
                    # ==============================
                    if save_floodmap_meta:
                        # special case of multiple files
                        pbar.set_description("Saving floodmap meta data...")

                        # get parent path name
                        floodmap_parent_destination = (
                            Path(destination_parent_path).joinpath(
                                ipath).joinpath("floodmap"))

                        floodmap_meta_files = (
                            floodmap_path.
                            get_files_in_parent_directory_with_name())

                        for ifloodmap_meta_file in floodmap_meta_files:
                            GCPPath(
                                ifloodmap_meta_file).transfer_file_to_bucket(
                                    destination_bucket_id,
                                    floodmap_parent_destination)

                    # ==============================
                    # SAVE GT Data (WorldFloods 1.1)
                    # ==============================
                    # print("here!")
                    pbar.set_description("Saving GT data...")

                    # replace parent path
                    gt_path = s2_image_path.replace(bucket_id,
                                                    destination_bucket_id)
                    gt_path = gt_path.replace("/S2/", "/gt/")
                    gt_path = gt_path.replace(parent_path,
                                              destination_parent_path)
                    ##################################
                    # PLOTTING (FOR DEBUGGING)
                    ##################################
                    # import matplotlib.pyplot as plt
                    # from rasterio import plot as rasterioplt

                    # fig, ax = plt.subplots()
                    # rasterioplt.show(gt[1], transform=gt_meta["transform"], ax=ax)
                    # fig.savefig("./temp_water.png")

                    # save ground truth
                    save_groundtruth_tiff_rasterio(
                        gt,
                        str(local_path.joinpath(gt_path.file_name)),
                        gt_meta=None,
                        crs=gt_meta["crs"],
                        transform=gt_meta["transform"],
                    )
                    save_file_to_bucket(
                        gt_path.full_path,
                        str(local_path.joinpath(gt_path.file_name)))
                    # delate local file
                    local_path.joinpath(gt_path.file_name).unlink()

                except KeyboardInterrupt:
                    break
                except:
                    problem_files.append(s2_image_path.full_path)

    print(problem_files)

    import pickle

    with open("./momoney_moprobs_v2.pickle", "wb") as fp:
        pickle.dump(problem_files, fp)
Esempio n. 3
0
def main():

    # looping through the ML parts
    ml_paths = [
        # "test",
        # "val",
        "train"
    ]

    local_path = Path(root).joinpath("datasets")

    bucket_id = "ml4floods"
    destination_bucket_id = "ml4cc_data_lake"

    parent_path = "worldfloods/public"
    destination_parent_path = "0_DEV/2_Mart/worldfloods_v1_0"
    cloud_prob_parent_path = "worldfloods/tiffimages"
    permanent_water_parent_path = "worldfloods/tiffimages/PERMANENTWATERJRC"
    save_s2_image = False
    save_meta_data = False
    save_permanent_water_image = False
    save_cloud_prob = False
    save_floodmap_meta = False

    # demo image
    demo_image = "gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif"

    # want the appropate ml path

    problem_files = []

    for ipath in ml_paths:

        # ensure path name is the same as ipath for the loooop
        demo_image_gcp = GCPPath(demo_image)
        demo_image_gcp = demo_image_gcp.replace("test", ipath)
        demo_image_gcp = demo_image_gcp.replace("train", ipath)
        demo_image_gcp = demo_image_gcp.replace("val", ipath)

        # get all files in the parent directory
        files_in_bucket = demo_image_gcp.get_files_in_parent_directory_with_suffix(
            ".tif"
        )

        # loop through files in the bucket
        print(f"Generating ML GT for {ipath.title()}, {len(files_in_bucket)} files")
        with tqdm.tqdm(files_in_bucket[207:]) as pbar:
            for s2_image_path in pbar:

                try:

                    s2_image_path = GCPPath(s2_image_path)

                    # create floodmap path
                    floodmap_path = s2_image_path.replace("/S2/", "/floodmaps/")
                    floodmap_path = floodmap_path.replace(".tif", ".shp")

                    # create cloudprob path
                    try:

                        cloudprob_path = GCPPath(
                            str(
                                Path(bucket_id)
                                .joinpath(cloud_prob_parent_path)
                                .joinpath("cloudprob_edited")
                                .joinpath(s2_image_path.file_name)
                            )
                        )
                        assert cloudprob_path.check_if_file_exists() is True
                    except AssertionError:
                        cloudprob_path = GCPPath(
                            str(
                                Path(bucket_id)
                                .joinpath(cloud_prob_parent_path)
                                .joinpath("cloudprob")
                                .joinpath(s2_image_path.file_name)
                            )
                        )

                    # create meta path
                    meta_path = s2_image_path.replace("/S2/", "/meta/")
                    meta_path = meta_path.replace(".tif", ".json")

                    # create permenant water path
                    try:

                        permenant_water_path = GCPPath(
                            str(
                                Path(bucket_id)
                                .joinpath(permanent_water_parent_path)
                                .joinpath(s2_image_path.file_name)
                            )
                        )
                        assert permenant_water_path.check_if_file_exists() is True
                        permenant_water_path = permenant_water_path.full_path

                    except AssertionError:
                        permenant_water_path = None

                    # ==============================
                    # Generate GT Image
                    # ==============================
                    pbar.set_description("Generating Ground Truth...")

                    # generate gt and gt meta
                    gt, gt_meta = generate_land_water_cloud_gt(
                        s2_image_path.full_path,
                        floodmap_path.full_path,
                        keep_streams=True,
                        cloudprob_image_path=cloudprob_path.full_path,
                        permanent_water_image_path=permenant_water_path,
                    )

                    # ==============================
                    # SAVE Permanent Water Image
                    # ==============================

                    if save_permanent_water_image and permenant_water_path is not None:
                        pbar.set_description("Saving permanent water image...")

                        permenant_water_path = GCPPath(
                            str(
                                Path(bucket_id)
                                .joinpath(permanent_water_parent_path)
                                .joinpath(s2_image_path.file_name)
                            )
                        )
                        # NEW WAY!!!
                        permanent_water_image_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id)
                                .joinpath(destination_parent_path)
                                .joinpath(ipath)
                                .joinpath("permanent_water")
                                .joinpath(s2_image_path.file_name)
                            )
                        )

                        permenant_water_path.transfer_file_to_bucket_gsutils(
                            permanent_water_image_path_dest.full_path, file_name=True
                        )

                    # ==============================
                    # SAVE S2 Image
                    # ==============================

                    if save_s2_image:
                        pbar.set_description("Saving S2 image...")
                        # NEW WAY!!!
                        s2_image_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id)
                                .joinpath(destination_parent_path)
                                .joinpath(ipath)
                                .joinpath("S2")
                                .joinpath(s2_image_path.file_name)
                            )
                        )

                        s2_image_path.transfer_file_to_bucket_gsutils(
                            s2_image_path_dest.full_path, file_name=True
                        )
                    # ==============================
                    # SAVE Meta Data
                    # ==============================
                    if save_meta_data:
                        pbar.set_description("Saving meta data...")
                        # get parent path name
                        meta_parent_destination = (
                            Path(destination_parent_path)
                            .joinpath(ipath)
                            .joinpath("meta")
                        )
                        meta_path.transfer_file_to_bucket(
                            destination_bucket_id, meta_parent_destination
                        )
                    # ==============================
                    # SAVE Cloud Probabilities
                    # ==============================
                    if save_cloud_prob:
                        pbar.set_description("Saving cloud probs data...")
                        # get parent path name
                        cloudprob_path_dest = GCPPath(
                            str(
                                Path(destination_bucket_id)
                                .joinpath(destination_parent_path)
                                .joinpath(ipath)
                                .joinpath("cloudprob")
                                .joinpath(cloudprob_path.file_name)
                            )
                        )

                        cloudprob_path.transfer_file_to_bucket_gsutils(
                            cloudprob_path_dest.full_path, file_name=True
                        )
                    # ==============================
                    # SAVE FloodMap Data
                    # ==============================
                    if save_floodmap_meta:
                        # special case of multiple files
                        pbar.set_description("Saving floodmap meta data...")

                        # get parent path name
                        floodmap_parent_destination = (
                            Path(destination_parent_path)
                            .joinpath(ipath)
                            .joinpath("floodmap")
                        )

                        floodmap_meta_files = (
                            floodmap_path.get_files_in_parent_directory_with_name()
                        )

                        for ifloodmap_meta_file in floodmap_meta_files:
                            GCPPath(ifloodmap_meta_file).transfer_file_to_bucket(
                                destination_bucket_id, floodmap_parent_destination
                            )

                    # ==============================
                    # SAVE GT Data (WorldFloods 1.1)
                    # ==============================
                    pbar.set_description("Saving GT data...")

                    # replace parent path
                    gt_path = s2_image_path.replace(bucket_id, destination_bucket_id)
                    gt_path = gt_path.replace("/S2/", "/gt/")
                    gt_path = gt_path.replace(parent_path, destination_parent_path)

                    # save ground truth
                    save_groundtruth_tiff_rasterio(
                        gt,
                        str(local_path.joinpath(gt_path.file_name)),
                        gt_meta=gt_meta,
                        crs=gt_meta["crs"],
                        transform=gt_meta["transform"],
                    )
                    save_file_to_bucket(
                        gt_path.full_path, str(local_path.joinpath(gt_path.file_name))
                    )
                    # delate local file
                    local_path.joinpath(gt_path.file_name).unlink()

                except KeyboardInterrupt:
                    break
                except:
                    problem_files.append(s2_image_path.full_path)

    print(problem_files)
Esempio n. 4
0
    except AttributeError:
        print(f"Metadata for {dc.file_name} not found")

    dc = open_source_tiff(dc)

    return dc


for ifile in tqdm.tqdm(files):

    # initialize
    dc_example = init_wfs2_dataclass(ifile)

    # save name
    save_name = Path(dc_example.file_name).stem
    suffix = ".pkl"

    # local directory save
    local_dir = root.joinpath("datasets/test/")
    local_dir = str(local_dir.joinpath(save_name + suffix))
    save_dataclass_pickle(dc_example, local_dir)

    # bucket save
    target_dir = f"gs://ml4cc_data_lake/0_DEV/0_Raw/WorldFloods/tiffimages_dataclass/{save_name}{suffix}"
    save_file_to_bucket(target_dir, local_dir)

    # remove from local directory
    rem_file = Path(local_dir)
    rem_file.unlink()

    del dc_example