Example #1
0
    def cir_image(self, overwrite):
        """
        Creates CIR image
        """
        plt.ioff()
        data_path = self.data_path
        for i, img in enumerate(self.img_list):
            print('Creating CIR image for {}'.format(img))
            band_combo_dir = data_path / 'band_combos'
            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'
            cir_file = band_combo_dir / '{}'.format(img + '_cir_img' + '.png')

            try:
                band_combo_dir.mkdir(parents=True)
            except FileExistsError:
                pass

            if overwrite is False:
                if cir_file.exists():
                    print('RGB image already exists for ' + img)
                    continue
                else:
                    print('No RGB image for ' + img + ', creating one')

            # Get RGB image
            print('Stacking image')
            band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
            tif_stacker(data_path,
                        img,
                        band_list,
                        features=False,
                        overwrite=False)
            spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

            # Function to normalize the grid values
            def normalize(array):
                """Normalizes numpy arrays into scale 0.0 - 1.0"""
                array_min, array_max = np.nanmin(array), np.nanmax(array)
                return ((array - array_min) / (array_max - array_min))

            print('Processing CIR image')
            with rasterio.open(spectra_stack_path, 'r') as f:
                nir, red, green = f.read(5), f.read(4), f.read(3)
                nir[nir == -999999] = np.nan
                red[red == -999999] = np.nan
                green[green == -999999] = np.nan
                nirn = normalize(nir)
                redn = normalize(red)
                greenn = normalize(green)
                cir = np.dstack((nirn, redn, greenn))

            # Convert to PIL image, enhance, and save
            cir_img = Image.fromarray((cir * 255).astype(np.uint8()))
            cir_img = ImageEnhance.Contrast(cir_img).enhance(1.5)
            cir_img = ImageEnhance.Sharpness(cir_img).enhance(2)
            cir_img = ImageEnhance.Brightness(cir_img).enhance(2)

            print('Saving CIR image')
            cir_img.save(cir_file, dpi=(300, 300))
Example #2
0
def training_bnn(img_list, pctls, feat_list_new, data_path, batch,
                 **model_params):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[data_vector_train[:, perm_index] == 1,
                              flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            y_train = to_categorical(y_train)
            D = len(set(y_train[:, 0]))  # Target classes

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))
            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass
            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')
            print('Training model')
            start_time = time.time()
            aleatoric_model = get_aleatoric_uncertainty_model(X_train,
                                                              y_train,
                                                              **model_params,
                                                              D=D)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            aleatoric_model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #3
0
def log_reg_training(img_list, pctls, feat_list_new, data_path, batch):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            if not model_path.exists():
                model_path.mkdir(parents=True)
            if not metrics_path.exists():
                metrics_path.mkdir(parents=True)

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.sav')

            print('Training')
            start_time = time.time()
            logreg = LogisticRegression(n_jobs=-1, solver='sag')
            logreg.fit(X_train, y_train)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            joblib.dump(logreg, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #4
0
    def cir_image(self):
        """
        Creates CIR image
        """
        plt.ioff()
        data_path = self.data_path
        for i, img in enumerate(self.img_list):
            print('Creating FN/FP map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5'

            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            # Get RGB image
            print('Stacking image')
            band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
            tif_stacker(data_path,
                        img,
                        band_list,
                        features=False,
                        overwrite=False)
            spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

            # Function to normalize the grid values
            def normalize(array):
                """Normalizes numpy arrays into scale 0.0 - 1.0"""
                array_min, array_max = np.nanmin(array), np.nanmax(array)
                return ((array - array_min) / (array_max - array_min))

            print('Processing CIR image')
            with rasterio.open(spectra_stack_path, 'r') as f:
                nir, red, green = f.read(5), f.read(4), f.read(3)
                nir[nir == -999999] = np.nan
                red[red == -999999] = np.nan
                green[green == -999999] = np.nan
                nirn = normalize(nir)
                redn = normalize(red)
                greenn = normalize(green)
                cir = np.dstack((nirn, redn, greenn))

            # Convert to PIL image, enhance, and save
            cir_img = Image.fromarray((cir * 255).astype(np.uint8()))
            cir_img = ImageEnhance.Contrast(cir_img).enhance(1.5)
            cir_img = ImageEnhance.Sharpness(cir_img).enhance(2)
            cir_img = ImageEnhance.Brightness(cir_img).enhance(2)

            print('Saving CIR image')
            cir_file = plot_path / '{}'.format('cir_img' + '.png')
            cir_img.save(cir_file, dpi=(300, 300))
Example #5
0
    def rgb_image(self, percent, overwrite):
        def linear_stretch(input, percent):
            p_low, p_high = np.percentile(input[~np.isnan(input)],
                                          (percent, 100 - percent))
            img_rescale = exposure.rescale_intensity(input,
                                                     in_range=(p_low, p_high))
            return img_rescale

        for img in self.img_list:
            spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'
            band_combo_dir = data_path / 'band_combos'
            rgb_file = band_combo_dir / '{}'.format(img + '_rgb_img' + '.png')

            try:
                band_combo_dir.mkdir(parents=True)
            except FileExistsError:
                pass

            if overwrite is False:
                if rgb_file.exists():
                    print('RGB image already exists for ' + img)
                    continue
                else:
                    print('No RGB image for ' + img + ', creating one')

            print('Stacking RGB image')
            band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
            tif_stacker(data_path,
                        img,
                        band_list,
                        features=False,
                        overwrite=False)

            print('Processing RGB image')
            with rasterio.open(spectra_stack_path, 'r') as f:
                red, green, blue = f.read(4), f.read(3), f.read(2)
                red[red == -999999] = np.nan
                green[green == -999999] = np.nan
                blue[blue == -999999] = np.nan
                rgb = np.dstack((red, green, blue))

            rgb = linear_stretch(rgb, percent)

            rgb_img = Image.fromarray((rgb * 255).astype(np.uint8()))
            # rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.2)

            print('Saving RGB image')
            rgb_img.save(rgb_file, dpi=(300, 300))
Example #6
0
    def false_map(self):
        """
        Creates map of FP/FNs overlaid on RGB image
        """
        plt.ioff()
        data_path = self.data_path
        for i, img in enumerate(self.img_list):
            print('Creating FN/FP map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5'

            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            # Get RGB image
            print('Stacking RGB image')
            band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
            tif_stacker(data_path,
                        img,
                        band_list,
                        features=False,
                        overwrite=False)
            spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

            # Function to normalize the grid values
            def normalize(array):
                """Normalizes numpy arrays into scale 0.0 - 1.0"""
                array_min, array_max = np.nanmin(array), np.nanmax(array)
                return ((array - array_min) / (array_max - array_min))

            print('Processing RGB image')
            with rasterio.open(spectra_stack_path, 'r') as f:
                red, green, blue = f.read(4), f.read(3), f.read(2)
                red[red == -999999] = np.nan
                green[green == -999999] = np.nan
                blue[blue == -999999] = np.nan
                redn = normalize(red)
                greenn = normalize(green)
                bluen = normalize(blue)
                rgb = np.dstack((redn, greenn, bluen))

            # Convert to PIL image, enhance, and save
            rgb_img = Image.fromarray((rgb * 255).astype(np.uint8()))
            rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5)
            rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2)
            rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2)

            print('Saving RGB image')
            rgb_file = plot_path / '{}'.format('rgb_img' + '.png')
            rgb_img.save(rgb_file, dpi=(300, 300))

            # Reshape predicted values back into image band
            with rasterio.open(stack_path, 'r') as ds:
                shape = ds.read(1).shape  # Shape of full original image

            for pctl in self.pctls:
                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_new, test=True)
                for buffer_iter in self.buffer_iters:
                    print('Fetching flood predictions for buffer', buffer_iter,
                          'at',
                          str(pctl) + '{}'.format('%'))
                    # Read predictions
                    with h5py.File(bin_file, 'r') as f:
                        pred_name = str(pctl) + '_buff_' + str(buffer_iter)
                        predictions = f[pred_name]
                        predictions = np.array(
                            predictions)  # Copy h5 dataset to array

                    # Add predicted values to cloud-covered pixel positions
                    prediction_img = np.zeros(shape)
                    prediction_img[:] = np.nan
                    rows, cols = zip(data_ind_test)
                    prediction_img[rows, cols] = predictions

                    # Remove perm water from predictions and actual
                    perm_index = feat_keep.index('GSW_perm')
                    flood_index = feat_keep.index('flooded')
                    data_vector_test[
                        data_vector_test[:, perm_index] == 1,
                        flood_index] = 0  # Remove flood water that is perm water
                    data_shape = data_vector_test.shape
                    with rasterio.open(stack_path, 'r') as ds:
                        perm_feat = ds.read(perm_index + 1)
                        prediction_img[perm_feat == 1] = 0

                    # Add actual flood values to cloud-covered pixel positions
                    flooded_img = np.zeros(shape)
                    flooded_img[:] = np.nan
                    flooded_img[rows,
                                cols] = data_vector_test[:, data_shape[1] - 1]

                    # Visualizing FNs/FPs
                    ones = np.ones(shape=shape)
                    red_actual = np.where(ones, flooded_img, 0.5)  # Actual
                    blue_preds = np.where(ones, prediction_img,
                                          0.5)  # Predictions
                    green_combo = np.minimum(red_actual, blue_preds)

                    # Saving FN/FP comparison image
                    comparison_img = np.dstack(
                        (red_actual, green_combo, blue_preds))
                    comparison_img_file = plot_path / '{}'.format(
                        'false_map' + str(pctl) + '_buff_' + str(buffer_iter) +
                        '.png')
                    print('Saving FN/FP image for buffer', str(buffer_iter),
                          'at',
                          str(pctl) + '{}'.format('%'))
                    matplotlib.image.imsave(comparison_img_file,
                                            comparison_img,
                                            dpi=300)

                    # Load comparison image
                    flood_overlay = Image.open(comparison_img_file)

                    # Convert black pixels to transparent in comparison image so it can overlay RGB
                    datas = flood_overlay.getdata()
                    newData = []
                    for item in datas:
                        if item[0] == 0 and item[1] == 0 and item[2] == 0:
                            newData.append((255, 255, 255, 0))
                        else:
                            newData.append(item)
                    flood_overlay.putdata(newData)

                    # Superimpose comparison image and RGB image, then save and close
                    rgb_img.paste(flood_overlay, (0, 0), flood_overlay)
                    plt.imshow(rgb_img)
                    print('Saving overlay image for buffer', str(buffer_iter),
                          'at',
                          str(pctl) + '{}'.format('%'))
                    rgb_img.save(
                        plot_path /
                        '{}'.format('false_map_overlay' + str(pctl) +
                                    '_buff_' + str(buffer_iter) + '.png'),
                        dpi=(300, 300))
                    plt.close('all')
Example #7
0
def log_reg_training_buffer(img_list, pctls, feat_list_new, data_path, batch,
                            buffer_iters, buffer_flood_only):
    from imageio import imwrite

    for img in img_list:
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for pctl in pctls:
            print('Preprocessing')
            data_train_full, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            for buffer_iter in buffer_iters:
                perm_index = feat_keep.index('GSW_perm')
                flood_index = feat_keep.index('flooded')
                data_train = data_train_full.copy()
                if buffer_flood_only:
                    data_train[data_train[:, :, perm_index] == 1,
                               flood_index] = 0
                    mask = data_train[:, :, flood_index]
                    buffer_mask = np.invert(
                        binary_dilation(mask, iterations=buffer_iter))
                else:
                    mask = data_train[:, :, flood_index]
                    buffer_mask = np.invert(
                        binary_dilation(mask, iterations=buffer_iter))
                    data_train[data_train[:, :, perm_index] == 1,
                               flood_index] = 0
                data_train[buffer_mask] = np.nan

                data_vector_train = data_train.reshape([
                    data_train.shape[0] * data_train.shape[1],
                    data_train.shape[2]
                ])
                data_vector_train = data_vector_train[
                    ~np.isnan(data_vector_train).any(axis=1)]
                data_vector_train = np.delete(
                    data_vector_train, perm_index,
                    axis=1)  # Remove perm water column
                shape = data_vector_train.shape
                X_train, y_train = data_vector_train[:, 0:shape[
                    1] - 1], data_vector_train[:, shape[1] - 1]

                model_path = data_path / batch / 'models' / img
                metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                    img + '_clouds_' + str(pctl))

                if not model_path.exists():
                    model_path.mkdir(parents=True)
                if not metrics_path.exists():
                    metrics_path.mkdir(parents=True)

                model_path = model_path / '{}'.format(img + '_clouds_' + str(
                    pctl) + 'buff' + str(buffer_iter) + '.sav')

                # Save data flooding image to check that buffering is working correctly
                # imwrite(model_path.parents[0] / '{}'.format('buff' + str(buffer_iter) + '.jpg'), data_train[:, :, 6])

                print('Training')
                start_time = time.time()
                logreg = LogisticRegression(n_jobs=-1, solver='sag')
                logreg.fit(X_train, y_train)
                end_time = time.time()
                times.append(timer(start_time, end_time, False))
                joblib.dump(logreg, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([
            np.repeat(pctls, len(buffer_iters)),
            np.tile(buffer_iters, len(pctls)), times
        ])
        times_df = pd.DataFrame(
            times, columns=['cloud_cover', 'buffer_iters', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #8
0
# tf.config.experimental.set_visible_devices(NUM_PARALLEL_EXEC_UNITS, 'CPU')
os.environ["OMP_NUM_THREADS"] = str(NUM_PARALLEL_EXEC_UNITS)
os.environ["KMP_BLOCKTIME"] = "30"
os.environ["KMP_SETTINGS"] = "1"
os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0"

# ======================================================================================================================

img = img_list[0]
pctl = 30
batch = 'test'
import statsmodels.api as sm

print(img + ': stacking tif, generating clouds')
times = []
tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False)
cloud_generator(img, data_path, overwrite=False)

print(img, pctl, '% CLOUD COVER')
print('Preprocessing')
tf.keras.backend.clear_session()
data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
    data_path, img, pctl, feat_list_new, test=False)
perm_index = feat_keep.index('GSW_perm')
flood_index = feat_keep.index('flooded')
data_vector_train[data_vector_train[:, perm_index] == 1,
                  flood_index] = 0  # Remove flood water that is perm water
data_vector_train = np.delete(data_vector_train, perm_index,
                              axis=1)  # Remove perm water column
shape = data_vector_train.shape
X_train, y_train = data_vector_train[:, 0:shape[1] -
Example #9
0
def NN_training(img_list, pctls, model_func, feat_list_new, data_path, batch,
                **model_params):
    get_model = model_func
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        lr_mins = []
        lr_maxes = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            lr_plots_path = metrics_path.parents[1] / 'lr_plots'
            lr_vals_path = metrics_path.parents[1] / 'lr_vals'
            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
                lr_plots_path.mkdir(parents=True)
                lr_vals_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # ---------------------------------------------------------------------------------------------------
            # Determine learning rate by finding max loss decrease during single epoch training
            lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2)

            lr_model_params = {
                'batch_size': model_params['batch_size'],
                'epochs': 1,
                'verbose': 2,
                'callbacks': [lrRangeFinder],
                'use_multiprocessing': True
            }

            model = model_func(INPUT_DIMS)

            print('Finding learning rate')
            model.fit(X_train, y_train, **lr_model_params)
            lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path,
                                                  img, pctl)
            lr_mins.append(lr_min)
            lr_maxes.append(lr_max)
            # ---------------------------------------------------------------------------------------------------
            # Training the model with cyclical learning rate scheduler
            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')
            scheduler = SGDRScheduler(min_lr=lr_min,
                                      max_lr=lr_max,
                                      lr_decay=0.9,
                                      cycle_length=3,
                                      mult_factor=1.5)

            callbacks = [
                tf.keras.callbacks.EarlyStopping(
                    monitor='sparse_categorical_accuracy',
                    min_delta=0.0001,
                    patience=10),
                tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                                   monitor='loss',
                                                   save_best_only=True),
                CSVLogger(metrics_path / 'training_log.log'), scheduler
            ]

            model = get_model(INPUT_DIMS)

            print('Training full model with best LR')
            start_time = time.time()
            model.fit(X_train, y_train, **model_params, callbacks=callbacks)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            # model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)

        lr_range = np.column_stack([pctls, lr_mins, lr_maxes])
        lr_avg = np.mean(lr_range[:, 1:2], axis=1)
        lr_range = np.column_stack([lr_range, lr_avg])
        lr_range_df = pd.DataFrame(
            lr_range, columns=['cloud_cover', 'lr_min', 'lr_max', 'lr_avg'])
        lr_range_df.to_csv((lr_vals_path / img).with_suffix('.csv'),
                           index=False)

        losses_path = lr_vals_path / img / '{}'.format('losses_' + str(pctl) +
                                                       '.csv')
        try:
            losses_path.parent.mkdir(parents=True)
        except FileExistsError:
            pass
        lr_losses = np.column_stack([lr, losses])
        lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses'])
        lr_losses.to_csv(losses_path, index=False)
Example #10
0
def training2(img_list,
              pctls,
              model_func,
              feat_list_new,
              data_path,
              batch,
              DROPOUT_RATE=0,
              HOLDOUT=0.3,
              **model_params):
    '''
    Removes flood water that is permanent water
    '''

    get_model = model_func

    for j, img in enumerate(img_list):
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=True)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            data_train, data_vector_train, data_ind_train = preprocessing(
                data_path, img, pctl, gaps=False)
            perm_index = feat_list_new.index('GSW_perm')
            flood_index = feat_list_new.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove perm water column

            training_data, validation_data = train_val(data_vector_train,
                                                       holdout=HOLDOUT)
            X_train, y_train = training_data[:, 0:14], training_data[:, 14]
            X_val, y_val = validation_data[:, 0:14], validation_data[:, 14]
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')

            csv_logger = CSVLogger(metrics_path / 'training_log.log')
            model_params['callbacks'].append(csv_logger)

            print('~~~~~', img, pctl, '% CLOUD COVER')

            model = get_model(INPUT_DIMS)

            start_time = time.time()
            model.fit(X_train,
                      y_train,
                      **model_params,
                      validation_data=(X_val, y_val))

            end_time = time.time()
            times.append(timer(start_time, end_time, False))

            model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #11
0
def training6(img_list,
              pctls,
              model_func,
              feat_list_new,
              data_path,
              batch,
              T,
              dropout_rate=0.2,
              **model_params):
    '''
    1. Removes ALL pixels that are over permanent water
    2. Finds the optimum learning rate and uses cyclic LR scheduler
    to train the model
    3. No validation set for training
    4.
    '''
    get_model = model_func
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, gaps=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove perm water column
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            y_train = to_categorical(y_train)
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')

            callbacks = [
                tf.keras.callbacks.EarlyStopping(
                    monitor='softmax_output_categorical_accuracy',
                    min_delta=0.005,
                    patience=5),
                tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                                   monitor='loss',
                                                   save_best_only=True),
                CSVLogger(metrics_path / 'training_log.log')
            ]

            start_time = time.time()
            model = get_model(model_params['epochs'],
                              X_train,
                              y_train,
                              X_train.shape,
                              T,
                              D=2,
                              batch_size=model_params['batch_size'],
                              dropout_rate=dropout_rate,
                              callbacks=callbacks)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            # model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #12
0
File: RF.py Project: moghimis/CPR
def rf_training(img_list, pctls, feat_list_new, data_path, batch, n_jobs):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl,
                                                                                     feat_list_new,
                                                                                     test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1, flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train, perm_index, axis=1)  # Remove perm water column
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            param_path = data_path / batch / 'models' / '4514_LC08_027033_20170826_1' / '{}'.format(
                '4514_LC08_027033_20170826_1_clouds_50params.pkl')
            model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav')

            # # Hyperparameter optimization
            # print('Hyperparameter search')
            # base_rf = RandomForestClassifier(random_state=0, n_estimators=100, max_leaf_nodes=10)

            # space = [skopt.space.Integer(2, 1000, name="max_leaf_nodes"),
            # skopt.space.Integer(2, 200, name="n_estimators"),
            # skopt.space.Integer(2, 3000, name="max_depth")]

            # @use_named_args(space)
            # def objective(**params):
            # base_rf.set_params(**params)
            # return -np.mean(cross_val_score(base_rf, X_train, y_train, cv=5, n_jobs=n_jobs, scoring="f1"))

            # res_rf = forest_minimize(objective, space, base_estimator='RF', n_calls=11,
            # random_state=0, verbose=True, n_jobs=n_jobs)
            # print(type(res_rf))
            # skopt.utils.dump(res_rf, param_path, store_objective=False)

            res_rf = skopt.utils.load(param_path)
            # Training
            print('Training with optimized hyperparameters')
            start_time = time.time()
            rf = RandomForestClassifier(random_state=0,
                                        max_leaf_nodes=res_rf.x[0],
                                        n_estimators=res_rf.x[1],
                                        max_depth=res_rf.x[2],
                                        n_jobs=-1)
            rf.fit(X_train, y_train)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            joblib.dump(rf, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #13
0
def log_reg_training_sample(img_list, pctls, feat_list_new, feat_list_all,
                            data_path, batch, n_flood, n_nonflood):
    for img in img_list:
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path, img, feat_list_new, overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for pctl in pctls:
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')

            sample_coords, data_train = get_sample_coords(
                img, pctl, n_flood, n_nonflood)
            perm_index = data_train.shape[2] - 2
            flood_index = data_train.shape[2] - 1
            data_vector_train = get_sample_data(sample_coords, data_train)
            data_vector_train, scaler = standardize_data(data_vector_train)
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove perm water column
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))
            scaler_dir = data_path / 'scalers' / img

            if not model_path.exists():
                model_path.mkdir(parents=True)
            if not metrics_path.exists():
                metrics_path.mkdir(parents=True)
            if not scaler_dir.exists():
                scaler_dir.mkdir(parents=True)

            model_path = data_path / batch / 'models' / img / '{}'.format(
                img + '_clouds_' + str(pctl) + '.sav')
            scaler_path = scaler_dir / '{}_clouds_{}_scaler_.sav'.format(
                img, str(pctl))
            joblib.dump(scaler, scaler_path)

            print('Training')
            start_time = time.time()
            logreg = LogisticRegression(solver='lbfgs')
            logreg.fit(X_train, y_train)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            joblib.dump(logreg, model_path)

            del data_train, data_vector_train, logreg

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Example #14
0
# Plot uncertainty and FP/FN
import matplotlib.pyplot as plt
import rasterio
from PIL import Image, ImageEnhance

print('Creating FN/FP map for {}'.format(img))
plot_path = data_path / batch / 'plots' / img
bin_file = data_path / batch / 'predictions' / img / 'predictions.h5'

stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

# Get RGB image
print('Stacking RGB image')
band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
tif_stacker(data_path, img, band_list, features=False, overwrite=False)
spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'


# Function to normalize the grid values
def normalize(array):
    """Normalizes numpy arrays into scale 0.0 - 1.0"""
    array_min, array_max = np.nanmin(array), np.nanmax(array)
    return ((array - array_min) / (array_max - array_min))


print('Processing RGB image')
with rasterio.open(spectra_stack_path, 'r') as f:
    red, green, blue = f.read(4), f.read(3), f.read(2)
    red[red == -999999] = np.nan
    green[green == -999999] = np.nan
Example #15
0
def false_map(probs, data_path, save=True):
    """
    Creates map of FP/FNs overlaid on RGB image
    save : bool
    If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay
    """
    plt.ioff()
    for i, img in enumerate(img_list):
        print('Creating FN/FP map for {}'.format(img))
        plot_path = data_path / batch / 'plots' / img
        bin_file = data_path / batch / 'predictions' / img / 'predictions.h5'

        stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

        # Get RGB image
        print('Stacking RGB image')
        band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        tif_stacker(data_path, img, band_list, features=False, overwrite=False)
        spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

        # Function to normalize the grid values
        def normalize(array):
            """Normalizes numpy arrays into scale 0.0 - 1.0"""
            array_min, array_max = np.nanmin(array), np.nanmax(array)
            return ((array - array_min) / (array_max - array_min))

        print('Processing RGB image')
        with rasterio.open(spectra_stack_path, 'r') as f:
            red, green, blue = f.read(4), f.read(3), f.read(2)
            red[red == -999999] = np.nan
            green[green == -999999] = np.nan
            blue[blue == -999999] = np.nan
            redn = normalize(red)
            greenn = normalize(green)
            bluen = normalize(blue)
            rgb = np.dstack((redn, greenn, bluen))

        # Convert to PIL image, enhance, and save
        rgb_img = Image.fromarray((rgb * 255).astype(np.uint8()))
        rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5)
        rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2)
        rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2)

        print('Saving RGB image')
        rgb_file = plot_path / '{}'.format('rgb_img' + '.png')
        rgb_img.save(rgb_file, dpi=(300, 300))

        # Reshape predicted values back into image band
        with rasterio.open(stack_path, 'r') as ds:
            shape = ds.read(1).shape  # Shape of full original image

        for j, pctl in enumerate(pctls):
            print('Fetching flood predictions for',
                  str(pctl) + '{}'.format('%'))
            # Read predictions
            with h5py.File(bin_file, 'r') as f:
                if probs:
                    prediction_probs = f[str(pctl)]
                    prediction_probs = np.array(
                        prediction_probs)  # Copy h5 dataset to array
                    predictions = np.argmax(prediction_probs, axis=1)
                else:
                    predictions = f[str(pctl)]
                    predictions = np.array(
                        predictions)  # Copy h5 dataset to array

            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=True)

            # Add predicted values to cloud-covered pixel positions
            prediction_img = np.zeros(shape)
            prediction_img[:] = np.nan
            rows, cols = zip(data_ind_test)
            prediction_img[rows, cols] = predictions

            # Remove perm water from predictions and actual
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[
                data_vector_test[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_shape = data_vector_test.shape
            with rasterio.open(stack_path, 'r') as ds:
                perm_feat = ds.read(perm_index + 1)
                prediction_img[perm_feat == 1] = 0

            # Add actual flood values to cloud-covered pixel positions
            flooded_img = np.zeros(shape)
            flooded_img[:] = np.nan
            flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1]

            # Visualizing FNs/FPs
            ones = np.ones(shape=shape)
            red_actual = np.where(ones, flooded_img, 0.5)  # Actual
            blue_preds = np.where(ones, prediction_img, 0.5)  # Predictions
            green_combo = np.minimum(red_actual, blue_preds)
            alphas = np.ones(shape) * 255

            # Convert black pixels to transparent in fpfn image so it can overlay RGB
            fpfn_img = np.dstack((red_actual, green_combo, blue_preds, alphas))
            fpfn_overlay_file = plot_path / '{}'.format('false_map' +
                                                        str(pctl) + '.png')
            indices = np.where((fpfn_img[:, :, 0] == 0)
                               & (fpfn_img[:, :, 1] == 0)
                               & (fpfn_img[:, :, 2] == 0)
                               & (fpfn_img[:, :, 3] == 255))
            fpfn_img[indices] = 0
            fpfn_overlay = Image.fromarray(fpfn_img, mode='RGBA')
            fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300))

            # Superimpose comparison image and RGB image, then save and close
            if save:
                rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay)
                print('Saving overlay image for', str(pctl) + '{}'.format('%'))
                rgb_img.save(
                    plot_path /
                    '{}'.format('false_map_overlay' + str(pctl) + '.png'),
                    dpi=(300, 300))
            plt.close('all')