Esempio n. 1
0
 def test_load_image_paths(self, patches_path, split_ratios, shuffle,
                           expected):
     img_paths = load_image_paths(base_path=PATH_38CLOUD,
                                  patches_path=patches_path,
                                  split_ratios=split_ratios,
                                  shuffle=False,
                                  img_id=IMG_ID_38CLOUD)
     if shuffle:
         img_paths_shuffled_splitted = load_image_paths(
             base_path=PATH_38CLOUD,
             patches_path=patches_path,
             split_ratios=split_ratios,
             shuffle=True,
             img_id=IMG_ID_38CLOUD,
             seed=100)
         img_paths_shuffled = []
         for split in img_paths_shuffled_splitted:
             img_paths_shuffled.extend(list(split))
         img_paths_unshuffled = []
         for split in img_paths:
             img_paths_unshuffled.extend(list(split))
         saved_seed = np.random.get_state()
         np.random.seed(100)
         np.random.shuffle(img_paths_unshuffled)
         np.random.set_state(saved_seed)
         np.testing.assert_array_equal(img_paths_shuffled,
                                       img_paths_unshuffled)
     assert set(img_paths[0][0].keys()) == set(
         ("red", "green", "blue", "nir", "gt"))
     assert len(img_paths) == len(split_ratios)
     paths_sum = 0
     for i, split in enumerate(img_paths):
         assert abs(len(split) - int(split_ratios[i] * expected)) <= 1
         paths_sum += len(split)
     assert paths_sum == expected
Esempio n. 2
0
def main_38Cloud():
    """ Demo 38Cloud data loading. """
    base_path = Path("datasets/clouds/38-Cloud/38-Cloud_training")

    split_names = ("train", "validation", "test")
    splits = load_image_paths(base_path=base_path,
                              split_ratios=(0.8, 0.15, 0.05))

    for name, split in zip(split_names, splits):
        dg = DG_38Cloud(files=split, batch_size=16)
        sample_batch_x, sample_batch_y = dg[3]
        sample_batch_y = sample_batch_y[:, :, :, 0]

        plt.figure()
        plt.subplot(1, 3, 1)
        plt.imshow(strip_nir(sample_batch_x[0]))
        plt.title(f"Split: { name }\n sample image")
        plt.subplot(1, 3, 2)
        plt.imshow(sample_batch_y[0])
        plt.title(f"Split: { name }\n sample gt mask")
    plt.show()
Esempio n. 3
0
def get_img_pred(
    path: Path, img_id: str, model: keras.Model, batch_size: int,
    patch_size: int = 384
) -> Tuple[np.ndarray, float]:
    """
    Generates prediction for a given image.

    :param path: path containing directories with image channels.
    :param img_id: ID of the considered image.
    :param model: trained model to make predictions.
    :param batch_size: size of generated batches, only one batch is loaded
          to memory at a time.
    :param patch_size: size of the image patches.
    :return: prediction for the given image along with evaluation time.
    """
    (test_files,) = load_image_paths(
        base_path=path, split_ratios=[1.0], shuffle=False, img_id=img_id
    )
    testgen = DG_38Cloud(
        files=test_files, batch_size=batch_size, shuffle=False, with_gt=False
    )
    tbeg = time.time()
    pred = model.predict_generator(testgen)
    scene_time = time.time() - tbeg
    print(f"Scene prediction took { scene_time } seconds")

    img_shape = get_img_pred_shape(test_files, patch_size)
    img = np.full(img_shape, np.inf)
    for i, fnames in enumerate(test_files):
        red_fname = str(fnames["red"])
        row, col = re.search("([0-9]*)_by_([0-9]*)", red_fname).groups()
        row, col = int(row), int(col)
        img[
            (row - 1) * patch_size: row * patch_size,
            (col - 1) * patch_size: col * patch_size,
        ] = pred[i]
    return img, scene_time
Esempio n. 4
0
def main(
    run_name: str,
    train_path: str,
    C38_path: str,
    C38_gtpath: str,
    L8CCA_path: str,
    vpath: str,
    rpath: str,
    ppath: str,
    vids: Tuple[str],
    mlflow: bool,
    train_size: float,
    train_img: str,
    balance_train_dataset: bool,
    balance_val_dataset: bool,
    balance_snow: bool,
    snow_imgs_38Cloud: List[str],
    snow_imgs_L8CCA: List[str],
    batch_size: int,
    thr: float,
    learning_rate: float,
    bn_momentum: float,
    epochs: int,
    stopping_patience: int,
):
    """
    Train and test the U-Net model using 38-Cloud and L8CCA datasets.

    :param run_name: name of the run.
    :param train_path: path to train dataset.
    :param C38_path: path to 38-Cloud dataset.
    :param C38_gtpath: path to 38-Cloud groundtruth.
    :param L8CCA_path: path to L8CCA dataset.
    :param vpath: path to 38-Cloud dataset (false color) visualisation images.
    :param rpath: path to directory where results and artifacts should be
                  logged (randomly named directory will be created to store the
                  results).
    :param ppath: path to file with names of training patches
                  (if None, all training patches will be used).
    :param vids: tuple of ids of images which should be used to create
                 visualisations. If contains '*' visualisations will be
                 created for all images in the datasets.
    :param mlflow: whether to use mlflow
    :param train_size: proportion of the training set
                       (the rest goes to validation set).
    :param train_img: image ID for training; if specified,
                      load training patches for this image only.
    :param balance_train_dataset: whether to balance train dataset.
    :param balance_val_dataset: whether to balance val dataset.
    :param balance_snow: whether to balance snow images for the training set.
    :param snow_imgs_38Cloud: list of 38-Cloud snow images IDs for testing.
    :param snow_imgs_L8CCA: list of L8CCA snow images IDs for testing.
    :param batch_size: size of generated batches, only one batch is loaded
          to memory at a time.
    :param thr: threshold for determining whether pixels contain the clouds
                (if None, threshold will be determined automatically).
    :param learning_rate: learning rate for training.
    :param bn_momentum: momentum of the batch normalization layer.
    :param epochs: number of epochs.
    :param stopping_patience: patience param for early stopping.
    """
    train_path, C38_path, C38_gtpath, L8CCA_path, vpath, rpath, ppath = \
        make_paths(
            train_path, C38_path, C38_gtpath, L8CCA_path, vpath, rpath, ppath
        )
    rpath = rpath / uuid.uuid4().hex
    rpath.mkdir(parents=True, exist_ok=False)
    print(f"Working dir: {os.getcwd()}, artifacts dir: {rpath}", flush=True)
    if mlflow:
        setup_mlflow(run_name)
        log_params(locals())

    train_files, val_files = load_image_paths(
        base_path=train_path,
        patches_path=ppath,
        split_ratios=(train_size, 1 - train_size),
        img_id=train_img,
    )
    traingen = DG_38Cloud(
        files=train_files,
        batch_size=batch_size,
        balance_classes=balance_train_dataset,
        balance_snow=balance_snow,
    )
    valgen = DG_38Cloud(files=val_files,
                        batch_size=batch_size,
                        balance_classes=balance_val_dataset)

    model, auto_thr = train_model(
        traingen=traingen,
        valgen=valgen,
        rpath=rpath,
        bn_momentum=bn_momentum,
        learning_rate=learning_rate,
        stopping_patience=stopping_patience,
        epochs=epochs,
        mlflow=mlflow,
    )
    print("Finished training and validation, starting evaluation.", flush=True)
    thr = auto_thr if thr is None else thr
    evaluate_model(dataset_name="38Cloud",
                   model=model,
                   thr=thr,
                   dpath=C38_path,
                   rpath=rpath / "38Cloud_vis",
                   vids=vids,
                   batch_size=batch_size,
                   img_ids=None,
                   snow_imgs=snow_imgs_38Cloud,
                   mlflow=mlflow,
                   gtpath=C38_gtpath,
                   vpath=vpath)
    evaluate_model(dataset_name="L8CCA",
                   model=model,
                   thr=thr,
                   dpath=L8CCA_path,
                   rpath=rpath / "L8CCA_vis",
                   vids=vids,
                   batch_size=batch_size,
                   img_ids=None,
                   snow_imgs=snow_imgs_L8CCA,
                   mlflow=mlflow)

    if mlflow:
        log_param("threshold", thr)
        log_artifacts(rpath)
        end_run()
def generate(
        train_path: Path = Path("datasets/clouds/38-Cloud/38-Cloud_training"),
        train_size: float = 0.8):
    """
    Generate to file training patches paths for all 5
    experiments of cloud detection using RGBNir data.

    :param train_path: Path to 38-Cloud training dataset.
    :param train_size: Size of the training set
                       (the rest is used for validation).
    """
    # Exp 1
    T_path = Path("artifacts/T")
    T_path.mkdir(parents=True, exist_ok=False)
    V_path = Path("artifacts/T/V")
    V_path.mkdir(parents=True, exist_ok=False)
    train_files, val_files = load_image_paths(
        base_path=train_path,
        patches_path=None,
        split_ratios=(train_size, 1 - train_size),
        shuffle=True,
        img_id=None,
    )
    print("exp 1", len(train_files), len(val_files))
    for file_ in train_files:
        with open(T_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    for file_ in val_files:
        with open(V_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    # Exp 2a
    T_path = Path("artifacts/T_NS")
    T_path.mkdir(parents=True, exist_ok=False)
    V_path = Path("artifacts/T_NS/V")
    V_path.mkdir(parents=True, exist_ok=False)
    train_img = "LC08_L1TP_002053_20160520_20170324_01_T1"
    train_files, val_files = load_image_paths(
        base_path=train_path,
        patches_path=None,
        split_ratios=(train_size, 1 - train_size),
        shuffle=True,
        img_id=train_img,
    )
    print("exp 2a", len(train_files), len(val_files))
    for file_ in train_files:
        with open(T_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    for file_ in val_files:
        with open(V_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    # Exp 2b
    T_path = Path("artifacts/T_S")
    T_path.mkdir(parents=True, exist_ok=False)
    V_path = Path("artifacts/T_S/V")
    V_path.mkdir(parents=True, exist_ok=False)
    train_img = "LC08_L1TP_035034_20160120_20170224_01_T1"
    train_files, val_files = load_image_paths(
        base_path=train_path,
        patches_path=None,
        split_ratios=(train_size, 1 - train_size),
        shuffle=True,
        img_id=train_img,
    )
    print("exp 2b", len(train_files), len(val_files))
    for file_ in train_files:
        with open(T_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    for file_ in val_files:
        with open(V_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    # Exp 3
    T_path = Path("artifacts/T_C")
    T_path.mkdir(parents=True, exist_ok=False)
    V_path = Path("artifacts/T_C/V")
    V_path.mkdir(parents=True, exist_ok=False)
    train_files, val_files = load_image_paths(base_path=train_path,
                                              patches_path=None,
                                              split_ratios=(train_size,
                                                            1 - train_size),
                                              shuffle=True,
                                              img_id=None)
    traingen = DG_38Cloud(
        files=train_files,
        batch_size=4,
        balance_classes=False,
        balance_snow=True,
    )
    train_files = list(
        np.array(traingen._files)[np.sort(traingen._file_indexes)])
    print("exp 3", len(train_files), len(val_files))
    for file_ in train_files:
        with open(T_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    for file_ in val_files:
        with open(V_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    # Exp 4
    T_path = Path("artifacts/T'")
    T_path.mkdir(parents=True, exist_ok=False)
    V_path = Path("artifacts/T'/V")
    V_path.mkdir(parents=True, exist_ok=False)
    ppath = "datasets/clouds/38-Cloud/training_patches_38-cloud_nonempty.csv"
    train_files, val_files = load_image_paths(
        base_path=train_path,
        patches_path=ppath,
        split_ratios=(train_size, 1 - train_size),
        shuffle=True,
        img_id=None,
    )
    print("exp 4", len(train_files), len(val_files))
    for file_ in train_files:
        with open(T_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")
    for file_ in val_files:
        with open(V_path / "patches.csv", "a") as f:
            f.write(file_["gt"].parts[-1][3:-4] + "\n")