Ejemplo n.º 1
0
def prediction(img_list, pctls, feat_list_new, data_path, batch, remove_perm):
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1 = [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, feat_list_new,
                                                                                  test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            if remove_perm:
                data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0  # Remove flood water that is perm water
            data_vector_test = np.delete(data_vector_test, perm_index, axis=1)  # Remove GSW_perm column
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[1]-1], data_vector_test[:, data_shape[1]-1]

            print('Predicting for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            model_path = data_path / batch / 'models' / img / '{}'.format(img + '_clouds_' + str(pctl) + '.sav')
            trained_model = joblib.load(model_path)
            pred_probs = trained_model.predict_proba(X_test)
            preds = np.argmax(pred_probs, axis=1)

            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=pred_probs)

            times.append(timer(start_time, time.time(), False))  # Elapsed time for MC simulations

            print('Evaluating predictions')
            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))

            del preds, pred_probs, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(np.column_stack([pctls, accuracy, precision, recall, f1]),
                               columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1'])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [float(i) for i in times]  # Convert time objects to float, otherwise valMetrics will be non-numeric
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 2
0
def training_bnn(img_list, pctls, feat_list_new, data_path, batch,
                 **model_params):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[data_vector_train[:, perm_index] == 1,
                              flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            y_train = to_categorical(y_train)
            D = len(set(y_train[:, 0]))  # Target classes

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))
            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass
            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')
            print('Training model')
            start_time = time.time()
            aleatoric_model = get_aleatoric_uncertainty_model(X_train,
                                                              y_train,
                                                              **model_params,
                                                              D=D)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            aleatoric_model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 3
0
def log_reg_training(img_list, pctls, feat_list_new, data_path, batch):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            if not model_path.exists():
                model_path.mkdir(parents=True)
            if not metrics_path.exists():
                metrics_path.mkdir(parents=True)

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.sav')

            print('Training')
            start_time = time.time()
            logreg = LogisticRegression(n_jobs=-1, solver='sag')
            logreg.fit(X_train, y_train)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            joblib.dump(logreg, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 4
0
def prediction_with_uncertainty(img_list,
                                pctls,
                                feat_list_new,
                                data_path,
                                batch,
                                DROPOUT_RATE,
                                MC_PASSES,
                                remove_perm,
                                weight_decay=0.005,
                                length_scale=0.00001,
                                **model_params):
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1 = [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        vars_path = data_path / batch / 'variances' / img
        mc_bin_file = preds_path / 'mc_preds.h5'
        preds_bin_file = preds_path / 'predictions.h5'
        vars_bin_file = vars_path / 'variances.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')
        try:
            preds_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')
        try:
            vars_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, gaps=True)
            feat_list_keep = [feat_list_new[i] for i in feat_keep
                              ]  # Removed if feat was deleted in preprocessing
            if remove_perm:
                perm_index = feat_list_keep.index('GSW_perm')
                flood_index = feat_list_keep.index('flooded')
                data_vector_test[
                    data_vector_test[:, perm_index] == 1,
                    flood_index] = 0  # Remove flood water that is perm water
            data_vector_test = np.delete(data_vector_test, perm_index,
                                         axis=1)  # Remove GSW_perm column
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[
                1] - 1], data_vector_test[:, data_shape[1] - 1]

            # Initialize binary file to hold predictions
            with h5py.File(mc_bin_file, 'w') as f:
                f.create_dataset('mc_preds',
                                 shape=(X_test.shape[0], 1),
                                 maxshape=(X_test.shape[0], None),
                                 chunks=True,
                                 compression='gzip'
                                 )  # Create empty dataset with shape of data

            start_time = time.time()
            model_path = data_path / batch / 'models' / img / '{}'.format(
                img + '_clouds_' + str(pctl) + '.h5')
            trained_model = tf.keras.models.load_model(model_path)

            for k in range(MC_PASSES):
                if k % 10 == 0 or k == MC_PASSES - 1:
                    print('Running MC {}/{} for {} at {}% cloud cover'.format(
                        k, MC_PASSES, img, pctl))
                flood_prob = trained_model.predict(
                    X_test,
                    batch_size=model_params['batch_size'],
                    use_multiprocessing=True)  # Predict
                flood_prob = flood_prob[:,
                                        1]  # Drop probability of not flooded (0) to save space
                with h5py.File(mc_bin_file, 'a') as f:
                    f['mc_preds'][:,
                                  -1] = flood_prob  # Append preds to h5 file
                    if k < MC_PASSES - 1:  # Resize to append next pass, if there is one
                        f['mc_preds'].resize((f['mc_preds'].shape[1] + 1),
                                             axis=1)
                tf.keras.backend.clear_session()
                del flood_prob

            # Calculate MC statistics
            print('Calculating MC statistics for {} at {}% cloud cover'.format(
                img, pctl))
            with h5py.File(mc_bin_file, 'r') as f:
                dset = f['mc_preds']
                preds_da = da.from_array(
                    dset, chunks="250 MiB")  # Open h5 file as dask array
                means = preds_da.mean(axis=1)
                means = means.compute()
                variance = preds_da.var(axis=1)
                variance = variance.compute()
                tau = (length_scale**2 *
                       (1 - DROPOUT_RATE)) / (2 * data_shape[0] * weight_decay)
                variance = variance + tau
                preds = means.round()
                del f, means, preds_da, dset

            os.remove(mc_bin_file)  # Delete predictions to save space on disk

            print('Saving mean preds/vars for {} at {}% cloud cover'.format(
                img, pctl))
            with h5py.File(preds_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=preds)
            with h5py.File(vars_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier variances')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=variance)

            times.append(timer(start_time, time.time(),
                               False))  # Elapsed time for MC simulations

            print('Evaluating predictions for {} at {}% cloud cover'.format(
                img, pctl))
            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))

            del preds, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(
            np.column_stack([pctls, accuracy, precision, recall, f1]),
            columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1'])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [
            float(i) for i in times
        ]  # Convert time objects to float, otherwise valMetrics will be non-numeric
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 5
0
    def uncertainty_map_LR(self):
        data_path = self.data_path
        plt.ioff()
        my_dpi = 300

        # Get predictions and variances
        for img in self.img_list:
            print('Creating uncertainty map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            se_lower_bin_file = data_path / self.batch / 'uncertainties' / img / 'se_lower.h5'
            se_upper_bin_file = data_path / self.batch / 'uncertainties' / img / 'se_upper.h5'
            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            try:
                plot_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # Reshape variance values back into image band
            with rasterio.open(stack_path, 'r') as ds:
                shape = ds.read(1).shape  # Shape of full original image

            for pctl in self.pctls:
                with h5py.File(se_lower_bin_file, 'r') as f:
                    lower = f[str(pctl)]
                    lower = np.array(lower)  # Copy h5 dataset to array

                with h5py.File(se_upper_bin_file, 'r') as f:
                    upper = f[str(pctl)]
                    upper = np.array(upper)  # Copy h5 dataset to array

                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_all, test=True)

                uncertainties = upper - lower

                perm_index = feat_keep.index('GSWPerm')
                perm = data_test[:, :, perm_index]

                unc_image = np.zeros(shape)
                unc_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                unc_image[rows, cols] = uncertainties

                unc_image[perm == 1] = 0
                cutoff_value = np.nanpercentile(
                    unc_image,
                    99.99)  # Truncate values so outliers don't skew colorbar
                unc_image[unc_image > cutoff_value] = np.round(cutoff_value, 0)

                fig, ax = plt.subplots()
                im = ax.imshow(unc_image, cmap='magma')
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                im_ratio = unc_image.shape[0] / unc_image.shape[1]
                cbar = fig.colorbar(im,
                                    ax=ax,
                                    fraction=0.02 * im_ratio,
                                    pad=0.02 * im_ratio)
                # cbar_labels = [label.get_text() for label in cbar.ax.get_yticklabels()]  # Add + to cbar max value
                # cbar_labels[-1] = cbar_labels[-1] + '+'
                # cbar.ax.set_yticklabels(cbar_labels)
                plt.tight_layout()
                plt.savefig(
                    plot_path /
                    '{}'.format('map_uncertainty_' + str(pctl) + '.png'),
                    dpi=my_dpi,
                    pad_inches=0.0)

                plt.close('all')
Ejemplo n.º 6
0
def training2(img_list,
              pctls,
              model_func,
              feat_list_new,
              data_path,
              batch,
              DROPOUT_RATE=0,
              HOLDOUT=0.3,
              **model_params):
    '''
    Removes flood water that is permanent water
    '''

    get_model = model_func

    for j, img in enumerate(img_list):
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=True)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            data_train, data_vector_train, data_ind_train = preprocessing(
                data_path, img, pctl, gaps=False)
            perm_index = feat_list_new.index('GSW_perm')
            flood_index = feat_list_new.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove perm water column

            training_data, validation_data = train_val(data_vector_train,
                                                       holdout=HOLDOUT)
            X_train, y_train = training_data[:, 0:14], training_data[:, 14]
            X_val, y_val = validation_data[:, 0:14], validation_data[:, 14]
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')

            csv_logger = CSVLogger(metrics_path / 'training_log.log')
            model_params['callbacks'].append(csv_logger)

            print('~~~~~', img, pctl, '% CLOUD COVER')

            model = get_model(INPUT_DIMS)

            start_time = time.time()
            model.fit(X_train,
                      y_train,
                      **model_params,
                      validation_data=(X_val, y_val))

            end_time = time.time()
            times.append(timer(start_time, end_time, False))

            model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 7
0
def prediction_gen_model(img_list, pctls, feat_list_new, data_path, batch,
                         **model_params):
    model_path = data_path / batch / 'models' / 'gen_model.h5'
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1 = [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            pretrained_model = tf.keras.models.load_model(model_path)
            for i in range(6):
                pretrained_model.layers[i].trainable = False
            pretrained_model.layers[6].trainable = True
            ll = pretrained_model.layers[6].output
            ll = tf.keras.layers.Dense(6)(ll)
            ll = tf.keras.layers.Dense(6)(ll)
            new_model = Model(pretrained_model.input, outputs=ll)

            print('Training')
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            gsw_index = feat_keep.index('GSW_maxExtent')
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove GSW_perm column
            data_vector_train = np.delete(data_vector_train, gsw_index, axis=1)
            data_shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:data_shape[
                1] - 1], data_vector_train[:, data_shape[1] - 1]
            trained_model = new_model.fit(X_train, y_train)

            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            gsw_index = feat_list_new.index('GSW_maxExtent')
            data_vector_test[
                data_vector_test[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_test = np.delete(data_vector_test, perm_index,
                                         axis=1)  # Remove GSW_perm column
            data_vector_test = np.delete(data_vector_test, gsw_index, axis=1)
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[
                1] - 1], data_vector_test[:, data_shape[1] - 1]

            print('Predicting for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            preds = trained_model.predict(
                X_test,
                batch_size=model_params['batch_size'],
                use_multiprocessing=True)
            preds = np.argmax(preds, axis=1)  # Display most probable value

            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=preds)

            times.append(timer(start_time, time.time(),
                               False))  # Elapsed time for MC simulations

            print('Evaluating predictions')
            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))

            del preds, X_test, y_test, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(
            np.column_stack([pctls, accuracy, precision, recall, f1]),
            columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1'])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [
            float(i) for i in times
        ]  # Convert time objects to float, otherwise valMetrics will be non-numeric
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 8
0
def training6(img_list,
              pctls,
              model_func,
              feat_list_new,
              data_path,
              batch,
              T,
              dropout_rate=0.2,
              **model_params):
    '''
    1. Removes ALL pixels that are over permanent water
    2. Finds the optimum learning rate and uses cyclic LR scheduler
    to train the model
    3. No validation set for training
    4.
    '''
    get_model = model_func
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, gaps=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)  # Remove perm water column
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            y_train = to_categorical(y_train)
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')

            callbacks = [
                tf.keras.callbacks.EarlyStopping(
                    monitor='softmax_output_categorical_accuracy',
                    min_delta=0.005,
                    patience=5),
                tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                                   monitor='loss',
                                                   save_best_only=True),
                CSVLogger(metrics_path / 'training_log.log')
            ]

            start_time = time.time()
            model = get_model(model_params['epochs'],
                              X_train,
                              y_train,
                              X_train.shape,
                              T,
                              D=2,
                              batch_size=model_params['batch_size'],
                              dropout_rate=dropout_rate,
                              callbacks=callbacks)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            # model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 9
0
    def false_map(self):
        """
        Creates map of FP/FNs overlaid on RGB image
        """
        plt.ioff()
        data_path = self.data_path
        for i, img in enumerate(self.img_list):
            print('Creating FN/FP map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5'

            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            # Get RGB image
            print('Stacking RGB image')
            band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
            tif_stacker(data_path,
                        img,
                        band_list,
                        features=False,
                        overwrite=False)
            spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

            # Function to normalize the grid values
            def normalize(array):
                """Normalizes numpy arrays into scale 0.0 - 1.0"""
                array_min, array_max = np.nanmin(array), np.nanmax(array)
                return ((array - array_min) / (array_max - array_min))

            print('Processing RGB image')
            with rasterio.open(spectra_stack_path, 'r') as f:
                red, green, blue = f.read(4), f.read(3), f.read(2)
                red[red == -999999] = np.nan
                green[green == -999999] = np.nan
                blue[blue == -999999] = np.nan
                redn = normalize(red)
                greenn = normalize(green)
                bluen = normalize(blue)
                rgb = np.dstack((redn, greenn, bluen))

            # Convert to PIL image, enhance, and save
            rgb_img = Image.fromarray((rgb * 255).astype(np.uint8()))
            rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5)
            rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2)
            rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2)

            print('Saving RGB image')
            rgb_file = plot_path / '{}'.format('rgb_img' + '.png')
            rgb_img.save(rgb_file, dpi=(300, 300))

            # Reshape predicted values back into image band
            with rasterio.open(stack_path, 'r') as ds:
                shape = ds.read(1).shape  # Shape of full original image

            for pctl in self.pctls:
                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_new, test=True)
                for buffer_iter in self.buffer_iters:
                    print('Fetching flood predictions for buffer', buffer_iter,
                          'at',
                          str(pctl) + '{}'.format('%'))
                    # Read predictions
                    with h5py.File(bin_file, 'r') as f:
                        pred_name = str(pctl) + '_buff_' + str(buffer_iter)
                        predictions = f[pred_name]
                        predictions = np.array(
                            predictions)  # Copy h5 dataset to array

                    # Add predicted values to cloud-covered pixel positions
                    prediction_img = np.zeros(shape)
                    prediction_img[:] = np.nan
                    rows, cols = zip(data_ind_test)
                    prediction_img[rows, cols] = predictions

                    # Remove perm water from predictions and actual
                    perm_index = feat_keep.index('GSW_perm')
                    flood_index = feat_keep.index('flooded')
                    data_vector_test[
                        data_vector_test[:, perm_index] == 1,
                        flood_index] = 0  # Remove flood water that is perm water
                    data_shape = data_vector_test.shape
                    with rasterio.open(stack_path, 'r') as ds:
                        perm_feat = ds.read(perm_index + 1)
                        prediction_img[perm_feat == 1] = 0

                    # Add actual flood values to cloud-covered pixel positions
                    flooded_img = np.zeros(shape)
                    flooded_img[:] = np.nan
                    flooded_img[rows,
                                cols] = data_vector_test[:, data_shape[1] - 1]

                    # Visualizing FNs/FPs
                    ones = np.ones(shape=shape)
                    red_actual = np.where(ones, flooded_img, 0.5)  # Actual
                    blue_preds = np.where(ones, prediction_img,
                                          0.5)  # Predictions
                    green_combo = np.minimum(red_actual, blue_preds)

                    # Saving FN/FP comparison image
                    comparison_img = np.dstack(
                        (red_actual, green_combo, blue_preds))
                    comparison_img_file = plot_path / '{}'.format(
                        'false_map' + str(pctl) + '_buff_' + str(buffer_iter) +
                        '.png')
                    print('Saving FN/FP image for buffer', str(buffer_iter),
                          'at',
                          str(pctl) + '{}'.format('%'))
                    matplotlib.image.imsave(comparison_img_file,
                                            comparison_img,
                                            dpi=300)

                    # Load comparison image
                    flood_overlay = Image.open(comparison_img_file)

                    # Convert black pixels to transparent in comparison image so it can overlay RGB
                    datas = flood_overlay.getdata()
                    newData = []
                    for item in datas:
                        if item[0] == 0 and item[1] == 0 and item[2] == 0:
                            newData.append((255, 255, 255, 0))
                        else:
                            newData.append(item)
                    flood_overlay.putdata(newData)

                    # Superimpose comparison image and RGB image, then save and close
                    rgb_img.paste(flood_overlay, (0, 0), flood_overlay)
                    plt.imshow(rgb_img)
                    print('Saving overlay image for buffer', str(buffer_iter),
                          'at',
                          str(pctl) + '{}'.format('%'))
                    rgb_img.save(
                        plot_path /
                        '{}'.format('false_map_overlay' + str(pctl) +
                                    '_buff_' + str(buffer_iter) + '.png'),
                        dpi=(300, 300))
                    plt.close('all')
Ejemplo n.º 10
0
    def false_map(self, probs, save=True):
        """
        Creates map of FP/FNs overlaid on RGB image
        save : bool
        If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay
        """
        plt.ioff()
        data_path = self.data_path
        for i, img in enumerate(self.img_list):
            print('Creating false map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            band_combo_dir = data_path / 'band_combos'
            bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5'
            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            try:
                plot_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # Reshape predicted values back into image band
            # with rasterio.open(stack_path, 'r') as ds:
            #     shape = ds.read(1).shape  # Shape of full original image

            # Get RGB image
            rgb_file = band_combo_dir / '{}'.format(img + '_rgb_img' + '.png')
            rgb_img = Image.open(rgb_file)

            for j, pctl in enumerate(self.pctls):
                print('Fetching flood predictions for',
                      str(pctl) + '{}'.format('%'))
                # Read predictions
                with h5py.File(bin_file, 'r') as f:
                    if probs:
                        prediction_probs = f[str(pctl)]
                        prediction_probs = np.array(
                            prediction_probs)  # Copy h5 dataset to array
                        predictions = np.argmax(prediction_probs, axis=1)
                    else:
                        predictions = f[str(pctl)]
                        predictions = np.array(
                            predictions)  # Copy h5 dataset to array

                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_all, test=True)

                shape = data_test.shape[:2]

                # Add predicted values to cloud-covered pixel positions
                prediction_img = np.zeros(shape)
                prediction_img[:] = np.nan
                rows, cols = zip(data_ind_test)
                prediction_img[rows, cols] = predictions

                # Remove perm water from predictions and actual
                perm_index = feat_keep.index('GSWPerm')
                flood_index = feat_keep.index('flooded')
                data_vector_test[data_vector_test[:, perm_index] == 1,
                                 flood_index] = 0
                data_shape = data_vector_test.shape
                perm_feat = data_test[:, :, perm_index]
                prediction_img[((prediction_img == 1) & (perm_feat == 1))] = 0

                # Add actual flood values to cloud-covered pixel positions
                flooded_img = np.zeros(shape)
                flooded_img[:] = np.nan
                flooded_img[rows, cols] = data_vector_test[:,
                                                           data_shape[1] - 1]

                # Visualizing FNs/FPs
                ones = np.ones(shape=shape)
                red_actual = np.where(ones, flooded_img, 0.5)  # Actual
                blue_preds = np.where(ones, prediction_img, 0.5)  # Predictions
                green_combo = np.minimum(red_actual, blue_preds)
                alphas = np.ones(shape)

                # Convert black pixels to transparent in fpfn image so it can overlay RGB
                fpfn_img = np.dstack(
                    (red_actual, green_combo, blue_preds, alphas)) * 255
                fpfn_overlay_file = plot_path / '{}'.format('false_map' +
                                                            str(pctl) + '.png')
                indices = np.where((np.isnan(fpfn_img[:, :, 0]))
                                   & np.isnan(fpfn_img[:, :, 1])
                                   & np.isnan(fpfn_img[:, :, 2])
                                   & (fpfn_img[:, :, 3] == 255))
                fpfn_img[indices] = [255, 255, 255, 0]
                fpfn_overlay = Image.fromarray(np.uint8(fpfn_img), mode='RGBA')
                fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300))

                # Superimpose comparison image and RGB image, then save and close
                if save:
                    rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay)
                    print('Saving overlay image for',
                          str(pctl) + '{}'.format('%'))
                    rgb_img.save(
                        plot_path /
                        '{}'.format('false_map_overlay' + str(pctl) + '.png'),
                        dpi=(300, 300))
                plt.close('all')
Ejemplo n.º 11
0
def log_reg_gen_prediction(img_list, pctls, feat_list_new, data_path, batch):
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1, roc_auc = [], [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        uncertainties_path = data_path / batch / 'uncertainties' / img
        se_lower_bin_file = uncertainties_path / 'se_lower.h5'
        se_upper_bin_file = uncertainties_path / 'se_upper.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[data_vector_test[:, perm_index] == 1,
                             flood_index] = 0
            data_vector_test = np.delete(data_vector_test, perm_index,
                                         axis=1)  # Remove GSW_perm column
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[
                1] - 1], data_vector_test[:, data_shape[1] - 1]

            print('Predicting for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            model_path = data_path / batch / 'models' / 'gen_model.sav'
            trained_model = joblib.load(model_path)
            pred_probs = trained_model.predict_proba(X_test)
            preds = np.argmax(pred_probs, axis=1)

            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=pred_probs)

            # Computer standard errors
            SE_est = get_se(X_test, y_test, trained_model)
            probs, upper, lower = get_probs(
                trained_model, X_test, SE_est,
                z=1.96)  # probs is redundant, predicted above

            try:
                uncertainties_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(se_lower_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier lower SEs')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=lower)

            with h5py.File(se_upper_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier upper SEs')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=upper)

            times.append(timer(start_time, time.time(), False))

            print('Evaluating predictions')
            perm_mask = data_test[:, :, perm_index]
            perm_mask = perm_mask.reshape(
                [perm_mask.shape[0] * perm_mask.shape[1]])
            perm_mask = perm_mask[~np.isnan(perm_mask)]
            preds[perm_mask.astype('bool')] = 0
            y_test[perm_mask.astype('bool')] = 0

            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))
            roc_auc.append(roc_auc_score(y_test, pred_probs[:, 1]))

            del preds, probs, pred_probs, upper, lower, X_test, y_test, \
                trained_model, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(
            np.column_stack([pctls, accuracy, precision, recall, f1, roc_auc]),
            columns=[
                'cloud_cover', 'accuracy', 'precision', 'recall', 'f1', 'auc'
            ])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [
            float(i) for i in times
        ]  # Convert time objects to float, otherwise valMetrics will be non-numeric
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 12
0
# ======================================================================================================================

img = img_list[0]
pctl = 30
batch = 'test'
import statsmodels.api as sm

print(img + ': stacking tif, generating clouds')
times = []
tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False)
cloud_generator(img, data_path, overwrite=False)

print(img, pctl, '% CLOUD COVER')
print('Preprocessing')
tf.keras.backend.clear_session()
data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
    data_path, img, pctl, feat_list_new, test=False)
perm_index = feat_keep.index('GSW_perm')
flood_index = feat_keep.index('flooded')
data_vector_train[data_vector_train[:, perm_index] == 1,
                  flood_index] = 0  # Remove flood water that is perm water
data_vector_train = np.delete(data_vector_train, perm_index,
                              axis=1)  # Remove perm water column
shape = data_vector_train.shape
X_train, y_train = data_vector_train[:, 0:shape[1] -
                                     1], data_vector_train[:, shape[1] - 1]

# # Logistic regression using sklearn
# model_path = data_path / batch / 'models' / img
# if not model_path.exists():
# model_path.mkdir(parents=True)
# model_path = model_path / '{}'.format(img + '_sklearn.sav')
Ejemplo n.º 13
0
def NN_prediction(img_list, pctls, feat_list_all, data_path, batch,
                  **model_params):
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1, roc_auc = [], [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_all, test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[
                data_vector_test[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_test = np.delete(data_vector_test, perm_index,
                                         axis=1)  # Remove GSW_perm column
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[
                1] - 1], data_vector_test[:, data_shape[1] - 1]

            print('Predicting for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            model_path = data_path / batch / 'models' / img / '{}'.format(
                img + '_clouds_' + str(pctl) + '.h5')
            trained_model = load_macro_soft_f1_model(model_path)

            pred_probs = trained_model.predict(
                X_test,
                batch_size=model_params['batch_size'],
                use_multiprocessing=True)
            preds = np.argmax(pred_probs,
                              axis=1)  # Display most probable value

            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=preds)

            times.append(timer(start_time, time.time(),
                               False))  # Elapsed time for MC simulations

            print('Evaluating predictions')
            perm_mask = data_test[:, :, perm_index]
            perm_mask = perm_mask.reshape(
                [perm_mask.shape[0] * perm_mask.shape[1]])
            perm_mask = perm_mask[~np.isnan(perm_mask)]
            preds[perm_mask.astype('bool')] = 0
            y_test[perm_mask.astype('bool')] = 0

            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))
            roc_auc.append(roc_auc_score(y_test, pred_probs[:, 1]))

            del preds, pred_probs, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(
            np.column_stack([pctls, accuracy, precision, recall, f1, roc_auc]),
            columns=[
                'cloud_cover', 'accuracy', 'precision', 'recall', 'f1', 'auc'
            ])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [
            float(i) for i in times
        ]  # Convert time objects to float, otherwise valMetrics will be non-numeric
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 14
0
def stack_all_uncertainties(model, batch, data_path, img_list, feat_list_all):
    uncertainty_all = []
    predictions_all = []
    tp_all = []
    tn_all = []
    fp_all = []
    fn_all = []
    if model is 'BNN':
        aleatoric_all = []
        epistemic_all = []
    plot_path = data_path / batch / 'plots'
    output_bin_file = data_path / batch / 'metrics' / 'uncertainty_fpfn.h5'
    for i, img in enumerate(img_list):
        print(img)
        preds_bin_file = data_path / batch / 'predictions' / img / 'predictions.h5'
        stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

        try:
            plot_path.mkdir(parents=True)
        except FileExistsError:
            pass

        # Reshape variance values back into image band
        with rasterio.open(stack_path, 'r') as ds:
            shape = ds.read(1).shape  # Shape of full original image

        for pctl in pctls:
            print(pctl)
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_all, test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            floods = data_test[:, :, flood_index]
            perm = data_test[:, :, perm_index]

            if model is 'LR':
                se_lower_bin_file = data_path / batch / 'uncertainties' / img / 'se_lower.h5'
                se_upper_bin_file = data_path / batch / 'uncertainties' / img / 'se_upper.h5'
                with h5py.File(se_lower_bin_file, 'r') as f:
                    lower = f[str(pctl)]
                    lower = np.array(lower)

                with h5py.File(se_upper_bin_file, 'r') as f:
                    upper = f[str(pctl)]
                    upper = np.array(upper)

                uncertainties = upper - lower

            if model is 'BNN':
                aleatoric_bin_file = data_path / batch / 'uncertainties' / img / 'aleatoric_uncertainties.h5'
                epistemic_bin_file = data_path / batch / 'uncertainties' / img / 'epistemic_uncertainties.h5'
                with h5py.File(aleatoric_bin_file, 'r') as f:
                    aleatoric = f[str(pctl)]
                    aleatoric = np.array(aleatoric)

                with h5py.File(epistemic_bin_file, 'r') as f:
                    epistemic = f[str(pctl)]
                    epistemic = np.array(epistemic)

                aleatoric_image = np.zeros(shape)
                aleatoric_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                aleatoric_image[rows, cols] = aleatoric

                epistemic_image = np.zeros(shape)
                epistemic_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                epistemic_image[rows, cols] = epistemic

                uncertainties = aleatoric + epistemic

            unc_image = np.zeros(shape)
            unc_image[:] = np.nan
            rows, cols = zip(data_ind_test)
            unc_image[rows, cols] = uncertainties

            # unc_image[perm == 1] = 0
            # cutoff_value = np.nanpercentile(unc_image, 99.99)  # Truncate values so outliers don't skew colorbar
            # unc_image[unc_image > cutoff_value] = np.round(cutoff_value, 0)

            with h5py.File(preds_bin_file, 'r') as f:
                predictions = f[str(pctl)]
                if model is 'LR':
                    predictions = np.argmax(np.array(predictions),
                                            axis=1)  # Copy h5 dataset to array
                if model is 'BNN':
                    predictions = np.array(predictions)

            prediction_img = np.zeros(shape)
            prediction_img[:] = np.nan
            rows, cols = zip(data_ind_test)
            prediction_img[rows, cols] = predictions

            floods = floods.reshape([
                floods.shape[0] * floods.shape[1],
            ])
            predictions_mask = prediction_img.reshape([
                prediction_img.shape[0] * prediction_img.shape[1],
            ])
            tp = np.logical_and(predictions_mask == 1,
                                floods == 1).astype('int')
            tn = np.logical_and(predictions_mask == 0,
                                floods == 0).astype('int')
            fp = np.logical_and(predictions_mask == 1,
                                floods == 0).astype('int')
            fn = np.logical_and(predictions_mask == 0,
                                floods == 1).astype('int')

            # Mask out clouds, etc.
            tp = tp[~np.isnan(predictions_mask)]
            tn = tn[~np.isnan(predictions_mask)]
            fp = fp[~np.isnan(predictions_mask)]
            fn = fn[~np.isnan(predictions_mask)]

            unc_image_mask = unc_image.reshape([
                unc_image.shape[0] * unc_image.shape[1],
            ])
            unc_image_mask = unc_image_mask[~np.isnan(predictions_mask)]

            if model is 'BNN':
                aleatoric_image_mask = aleatoric_image.reshape([
                    aleatoric_image.shape[0] * aleatoric_image.shape[1],
                ])
                aleatoric_image_mask = aleatoric_image_mask[
                    ~np.isnan(predictions_mask)]

                epistemic_image_mask = epistemic_image.reshape([
                    epistemic_image.shape[0] * epistemic_image.shape[1],
                ])
                epistemic_image_mask = epistemic_image_mask[
                    ~np.isnan(predictions_mask)]

                aleatoric_all.append(aleatoric_image_mask)
                epistemic_all.append(epistemic_image_mask)

            predictions_all.append(predictions)
            uncertainty_all.append(unc_image_mask)
            tp_all.append(tp)
            tn_all.append(tn)
            fp_all.append(fp)
            fn_all.append(fn)

    # data_vector_all = np.concatenate(data_vector_all, axis=0)  # Won't work because some features are missing
    predictions_all = np.concatenate(predictions_all, axis=0)
    uncertainty_all = np.concatenate(uncertainty_all, axis=0)
    tp_all = np.concatenate(tp_all, axis=0)
    tn_all = np.concatenate(tn_all, axis=0)
    fp_all = np.concatenate(fp_all, axis=0)
    fn_all = np.concatenate(fn_all, axis=0)

    if model is 'BNN':
        aleatoric_all = np.concatenate(aleatoric_all, axis=0)
        epistemic_all = np.concatenate(epistemic_all, axis=0)

    # df = np.column_stack((data_vector_all, predictions_all, uncertainty_all, tp_all, tn_all, fp_all, fn_all))
    if model is 'LR':
        df = np.column_stack(
            (predictions_all, uncertainty_all, tp_all, tn_all, fp_all, fn_all))

    if model is 'BNN':
        df = np.column_stack((predictions_all, uncertainty_all, aleatoric_all,
                              epistemic_all, tp_all, tn_all, fp_all, fn_all))

    with h5py.File(output_bin_file, 'a') as f:
        if 'uncertainty_fpfn' in f:
            print('Deleting earlier uncertainty/fpfn')
            del f['uncertainty_fpfn']
        f.create_dataset('uncertainty_fpfn', data=df)
Ejemplo n.º 15
0
def false_map(probs, data_path, save=True):
    """
    Creates map of FP/FNs overlaid on RGB image
    save : bool
    If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay
    """
    plt.ioff()
    for i, img in enumerate(img_list):
        print('Creating FN/FP map for {}'.format(img))
        plot_path = data_path / batch / 'plots' / img
        bin_file = data_path / batch / 'predictions' / img / 'predictions.h5'

        stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

        # Get RGB image
        print('Stacking RGB image')
        band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        tif_stacker(data_path, img, band_list, features=False, overwrite=False)
        spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif'

        # Function to normalize the grid values
        def normalize(array):
            """Normalizes numpy arrays into scale 0.0 - 1.0"""
            array_min, array_max = np.nanmin(array), np.nanmax(array)
            return ((array - array_min) / (array_max - array_min))

        print('Processing RGB image')
        with rasterio.open(spectra_stack_path, 'r') as f:
            red, green, blue = f.read(4), f.read(3), f.read(2)
            red[red == -999999] = np.nan
            green[green == -999999] = np.nan
            blue[blue == -999999] = np.nan
            redn = normalize(red)
            greenn = normalize(green)
            bluen = normalize(blue)
            rgb = np.dstack((redn, greenn, bluen))

        # Convert to PIL image, enhance, and save
        rgb_img = Image.fromarray((rgb * 255).astype(np.uint8()))
        rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5)
        rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2)
        rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2)

        print('Saving RGB image')
        rgb_file = plot_path / '{}'.format('rgb_img' + '.png')
        rgb_img.save(rgb_file, dpi=(300, 300))

        # Reshape predicted values back into image band
        with rasterio.open(stack_path, 'r') as ds:
            shape = ds.read(1).shape  # Shape of full original image

        for j, pctl in enumerate(pctls):
            print('Fetching flood predictions for',
                  str(pctl) + '{}'.format('%'))
            # Read predictions
            with h5py.File(bin_file, 'r') as f:
                if probs:
                    prediction_probs = f[str(pctl)]
                    prediction_probs = np.array(
                        prediction_probs)  # Copy h5 dataset to array
                    predictions = np.argmax(prediction_probs, axis=1)
                else:
                    predictions = f[str(pctl)]
                    predictions = np.array(
                        predictions)  # Copy h5 dataset to array

            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=True)

            # Add predicted values to cloud-covered pixel positions
            prediction_img = np.zeros(shape)
            prediction_img[:] = np.nan
            rows, cols = zip(data_ind_test)
            prediction_img[rows, cols] = predictions

            # Remove perm water from predictions and actual
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[
                data_vector_test[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_shape = data_vector_test.shape
            with rasterio.open(stack_path, 'r') as ds:
                perm_feat = ds.read(perm_index + 1)
                prediction_img[perm_feat == 1] = 0

            # Add actual flood values to cloud-covered pixel positions
            flooded_img = np.zeros(shape)
            flooded_img[:] = np.nan
            flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1]

            # Visualizing FNs/FPs
            ones = np.ones(shape=shape)
            red_actual = np.where(ones, flooded_img, 0.5)  # Actual
            blue_preds = np.where(ones, prediction_img, 0.5)  # Predictions
            green_combo = np.minimum(red_actual, blue_preds)
            alphas = np.ones(shape) * 255

            # Convert black pixels to transparent in fpfn image so it can overlay RGB
            fpfn_img = np.dstack((red_actual, green_combo, blue_preds, alphas))
            fpfn_overlay_file = plot_path / '{}'.format('false_map' +
                                                        str(pctl) + '.png')
            indices = np.where((fpfn_img[:, :, 0] == 0)
                               & (fpfn_img[:, :, 1] == 0)
                               & (fpfn_img[:, :, 2] == 0)
                               & (fpfn_img[:, :, 3] == 255))
            fpfn_img[indices] = 0
            fpfn_overlay = Image.fromarray(fpfn_img, mode='RGBA')
            fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300))

            # Superimpose comparison image and RGB image, then save and close
            if save:
                rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay)
                print('Saving overlay image for', str(pctl) + '{}'.format('%'))
                rgb_img.save(
                    plot_path /
                    '{}'.format('false_map_overlay' + str(pctl) + '.png'),
                    dpi=(300, 300))
            plt.close('all')
Ejemplo n.º 16
0
def prediction_bnn(img_list, pctls, feat_list_new, data_path, batch, MC_passes):
    for j, img in enumerate(img_list):
        epistemic_times = []
        aleatoric_times = []
        accuracy, precision, recall, f1 = [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        aleatoric_bin_file = preds_path / 'aleatoric_predictions.h5'
        uncertainties_path = data_path / batch / 'uncertainties' / img
        aleatoric_uncertainty_file = uncertainties_path / 'aleatoric_uncertainties.h5'
        epistemic_uncertainty_file = uncertainties_path / 'epistemic_uncertainties.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, feat_list_new,
                                                                                  test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0
            data_vector_test = np.delete(data_vector_test, perm_index, axis=1)
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[1] - 1], data_vector_test[:, data_shape[1] - 1]
            y_test = to_categorical(y_test)
            D = len(set(y_test[:, 0]))  # Target classes
            iterable = K.variable(np.ones(MC_passes))

            print('Predicting (aleatoric) for {} at {}% cloud cover'.format(img, pctl))
            model_path = data_path / batch / 'models' / img / '{}'.format(img + '_clouds_' + str(pctl) + '.h5')
            start_time = time.time()
            # aleatoric_model = tf.keras.models.load_model(model_path)
            aleatoric_model = load_bayesian_model(model_path, MC_passes, D, iterable)
            aleatoric_results = aleatoric_model.predict(X_test, verbose=1)
            aleatoric_uncertainties = np.reshape(aleatoric_results[0][:, D:], (-1))
            try:
                uncertainties_path.mkdir(parents=True)
            except FileExistsError:
                pass
            with h5py.File(aleatoric_uncertainty_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier aleatoric uncertainties')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=aleatoric_uncertainties)
            logits = aleatoric_results[0][:, 0:D]
            aleatoric_preds = np.argmax(aleatoric_results[1], axis=1)
            aleatoric_times.append(timer(start_time, time.time(), False))
            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass
            with h5py.File(aleatoric_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier aleatoric predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=aleatoric_preds)

            print('Predicting (epistemic) for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            epistemic_model = get_epistemic_uncertainty_model(model_path, T=MC_passes, D=D)
            epistemic_results = epistemic_model.predict(X_test, verbose=2, use_multiprocessing=True)
            epistemic_uncertainties = epistemic_results[0]
            with h5py.File(epistemic_uncertainty_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier epistemic uncertainties')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=epistemic_uncertainties)
            epistemic_preds = np.argmax(epistemic_results[1], axis=1)
            epistemic_times.append(timer(start_time, time.time(), False))
            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier epistemic predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=epistemic_preds)

            print('Evaluating predictions')
            accuracy.append(accuracy_score(y_test[:, 1], epistemic_preds))
            precision.append(precision_score(y_test[:, 1], epistemic_preds))
            recall.append(recall_score(y_test[:, 1], epistemic_preds))
            f1.append(f1_score(y_test[:, 1], epistemic_preds))

            del aleatoric_model, aleatoric_results, aleatoric_uncertainties, logits, aleatoric_preds, \
                epistemic_model, epistemic_uncertainties, epistemic_preds, epistemic_results, \
                data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(np.column_stack([pctls, accuracy, precision, recall, f1]),
                               columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1'])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        epistemic_times = [float(i) for i in epistemic_times]
        aleatoric_times = [float(i) for i in aleatoric_times]
        times_df = pd.DataFrame(np.column_stack([pctls, epistemic_times, aleatoric_times]),
                                columns=['cloud_cover', 'epistemic_testing_time', 'aleatoric_testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 17
0
    def uncertainty_map_NN(self):
        data_path = self.data_path
        plt.ioff()
        my_dpi = 300

        # Get predictions and variances
        for img in self.img_list:
            print('Creating uncertainty map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            aleatoric_bin_file = data_path / self.batch / 'uncertainties' / img / 'aleatoric_uncertainties.h5'
            epistemic_bin_file = data_path / self.batch / 'uncertainties' / img / 'epistemic_uncertainties.h5'
            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            try:
                plot_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # Reshape variance values back into image band
            with rasterio.open(stack_path, 'r') as ds:
                shape = ds.read(1).shape  # Shape of full original image

            for pctl in self.pctls:
                with h5py.File(aleatoric_bin_file, 'r') as f:
                    aleatoric = f[str(pctl)]
                    aleatoric = np.array(aleatoric)

                with h5py.File(epistemic_bin_file, 'r') as f:
                    epistemic = f[str(pctl)]
                    epistemic = np.array(epistemic)

                uncertainties = aleatoric + epistemic
                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_all, test=True)

                perm_index = feat_keep.index('GSWPerm')
                perm = data_test[:, :, perm_index]

                # Aleatoric + epistemic
                unc_image = np.zeros(shape)
                unc_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                unc_image[rows, cols] = uncertainties

                unc_image[perm == 1] = 0

                fig, ax = plt.subplots()
                my_img = ax.imshow(unc_image, cmap='plasma')
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                im_ratio = unc_image.shape[0] / unc_image.shape[1]
                fig.colorbar(my_img,
                             ax=ax,
                             fraction=0.02 * im_ratio,
                             pad=0.02 * im_ratio)
                plt.tight_layout()
                plt.savefig(
                    plot_path /
                    '{}'.format('map_uncertainty_' + str(pctl) + '.png'),
                    dpi=my_dpi,
                    pad_inches=0.0)

                # Aleatoric
                aleatoric_image = np.zeros(shape)
                aleatoric_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                aleatoric_image[rows, cols] = aleatoric

                aleatoric_image[perm == 1] = 0

                fig, ax = plt.subplots()
                my_img = ax.imshow(aleatoric_image, cmap='plasma')
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                im_ratio = aleatoric_image.shape[0] / aleatoric_image.shape[1]
                fig.colorbar(my_img,
                             ax=ax,
                             fraction=0.02 * im_ratio,
                             pad=0.02 * im_ratio)
                plt.tight_layout()
                plt.savefig(plot_path /
                            '{}'.format('map_aleatoric_' + str(pctl) + '.png'),
                            dpi=my_dpi,
                            pad_inches=0.0)

                # Epistemic
                epistemic_image = np.zeros(shape)
                epistemic_image[:] = np.nan
                rows, cols = zip(data_ind_test)
                epistemic_image[rows, cols] = epistemic

                epistemic_image[perm == 1] = 0

                fig, ax = plt.subplots()
                my_img = ax.imshow(epistemic_image, cmap='plasma')
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                im_ratio = epistemic_image.shape[0] / epistemic_image.shape[1]
                fig.colorbar(my_img,
                             ax=ax,
                             fraction=0.02 * im_ratio,
                             pad=0.02 * im_ratio)
                plt.tight_layout()
                plt.savefig(plot_path /
                            '{}'.format('map_epistemic_' + str(pctl) + '.png'),
                            dpi=my_dpi,
                            pad_inches=0.0)
                plt.close('all')
Ejemplo n.º 18
0
def NN_training(img_list, pctls, model_func, feat_list_new, data_path, batch,
                **model_params):
    get_model = model_func
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        lr_mins = []
        lr_maxes = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            lr_plots_path = metrics_path.parents[1] / 'lr_plots'
            lr_vals_path = metrics_path.parents[1] / 'lr_vals'
            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
                lr_plots_path.mkdir(parents=True)
                lr_vals_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # ---------------------------------------------------------------------------------------------------
            # Determine learning rate by finding max loss decrease during single epoch training
            lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2)

            lr_model_params = {
                'batch_size': model_params['batch_size'],
                'epochs': 1,
                'verbose': 2,
                'callbacks': [lrRangeFinder],
                'use_multiprocessing': True
            }

            model = model_func(INPUT_DIMS)

            print('Finding learning rate')
            model.fit(X_train, y_train, **lr_model_params)
            lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path,
                                                  img, pctl)
            lr_mins.append(lr_min)
            lr_maxes.append(lr_max)
            # ---------------------------------------------------------------------------------------------------
            # Training the model with cyclical learning rate scheduler
            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')
            scheduler = SGDRScheduler(min_lr=lr_min,
                                      max_lr=lr_max,
                                      lr_decay=0.9,
                                      cycle_length=3,
                                      mult_factor=1.5)

            callbacks = [
                tf.keras.callbacks.EarlyStopping(
                    monitor='sparse_categorical_accuracy',
                    min_delta=0.0001,
                    patience=10),
                tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                                   monitor='loss',
                                                   save_best_only=True),
                CSVLogger(metrics_path / 'training_log.log'), scheduler
            ]

            model = get_model(INPUT_DIMS)

            print('Training full model with best LR')
            start_time = time.time()
            model.fit(X_train, y_train, **model_params, callbacks=callbacks)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            # model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)

        lr_range = np.column_stack([pctls, lr_mins, lr_maxes])
        lr_avg = np.mean(lr_range[:, 1:2], axis=1)
        lr_range = np.column_stack([lr_range, lr_avg])
        lr_range_df = pd.DataFrame(
            lr_range, columns=['cloud_cover', 'lr_min', 'lr_max', 'lr_avg'])
        lr_range_df.to_csv((lr_vals_path / img).with_suffix('.csv'),
                           index=False)

        losses_path = lr_vals_path / img / '{}'.format('losses_' + str(pctl) +
                                                       '.csv')
        try:
            losses_path.parent.mkdir(parents=True)
        except FileExistsError:
            pass
        lr_losses = np.column_stack([lr, losses])
        lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses'])
        lr_losses.to_csv(losses_path, index=False)
Ejemplo n.º 19
0
# =================================================================================

img = img_list[0]
pctl = pctls[0]

print(img + ': stacking tif, generating clouds')
times = []
tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False)
cloud_generator(img, data_path, overwrite=False)


print(img, pctl, '% CLOUD COVER')
print('Preprocessing')
data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl,
                                                                         feat_list_all,
                                                                         test=False)
perm_index = feat_keep.index('GSW_perm')
flood_index = feat_keep.index('flooded')
# data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
data_vector_train = np.delete(data_vector_train, perm_index, axis=1)
shape = data_vector_train.shape
X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1]

model_path = data_path / batch / 'models' / img
metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
    img + '_clouds_' + str(pctl))

if not model_path.exists():
    model_path.mkdir(parents=True)
if not metrics_path.exists():
Ejemplo n.º 20
0
def log_reg_training_buffer(img_list, pctls, feat_list_new, data_path, batch,
                            buffer_iters, buffer_flood_only):
    from imageio import imwrite

    for img in img_list:
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for pctl in pctls:
            print('Preprocessing')
            data_train_full, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            for buffer_iter in buffer_iters:
                perm_index = feat_keep.index('GSW_perm')
                flood_index = feat_keep.index('flooded')
                data_train = data_train_full.copy()
                if buffer_flood_only:
                    data_train[data_train[:, :, perm_index] == 1,
                               flood_index] = 0
                    mask = data_train[:, :, flood_index]
                    buffer_mask = np.invert(
                        binary_dilation(mask, iterations=buffer_iter))
                else:
                    mask = data_train[:, :, flood_index]
                    buffer_mask = np.invert(
                        binary_dilation(mask, iterations=buffer_iter))
                    data_train[data_train[:, :, perm_index] == 1,
                               flood_index] = 0
                data_train[buffer_mask] = np.nan

                data_vector_train = data_train.reshape([
                    data_train.shape[0] * data_train.shape[1],
                    data_train.shape[2]
                ])
                data_vector_train = data_vector_train[
                    ~np.isnan(data_vector_train).any(axis=1)]
                data_vector_train = np.delete(
                    data_vector_train, perm_index,
                    axis=1)  # Remove perm water column
                shape = data_vector_train.shape
                X_train, y_train = data_vector_train[:, 0:shape[
                    1] - 1], data_vector_train[:, shape[1] - 1]

                model_path = data_path / batch / 'models' / img
                metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                    img + '_clouds_' + str(pctl))

                if not model_path.exists():
                    model_path.mkdir(parents=True)
                if not metrics_path.exists():
                    metrics_path.mkdir(parents=True)

                model_path = model_path / '{}'.format(img + '_clouds_' + str(
                    pctl) + 'buff' + str(buffer_iter) + '.sav')

                # Save data flooding image to check that buffering is working correctly
                # imwrite(model_path.parents[0] / '{}'.format('buff' + str(buffer_iter) + '.jpg'), data_train[:, :, 6])

                print('Training')
                start_time = time.time()
                logreg = LogisticRegression(n_jobs=-1, solver='sag')
                logreg.fit(X_train, y_train)
                end_time = time.time()
                times.append(timer(start_time, end_time, False))
                joblib.dump(logreg, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([
            np.repeat(pctls, len(buffer_iters)),
            np.tile(buffer_iters, len(pctls)), times
        ])
        times_df = pd.DataFrame(
            times, columns=['cloud_cover', 'buffer_iters', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 21
0
Archivo: RF.py Proyecto: moghimis/CPR
def rf_training(img_list, pctls, feat_list_new, data_path, batch, n_jobs):
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl,
                                                                                     feat_list_new,
                                                                                     test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_train[
                data_vector_train[:, perm_index] == 1, flood_index] = 0  # Remove flood water that is perm water
            data_vector_train = np.delete(data_vector_train, perm_index, axis=1)  # Remove perm water column
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
            except FileExistsError:
                pass

            param_path = data_path / batch / 'models' / '4514_LC08_027033_20170826_1' / '{}'.format(
                '4514_LC08_027033_20170826_1_clouds_50params.pkl')
            model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav')

            # # Hyperparameter optimization
            # print('Hyperparameter search')
            # base_rf = RandomForestClassifier(random_state=0, n_estimators=100, max_leaf_nodes=10)

            # space = [skopt.space.Integer(2, 1000, name="max_leaf_nodes"),
            # skopt.space.Integer(2, 200, name="n_estimators"),
            # skopt.space.Integer(2, 3000, name="max_depth")]

            # @use_named_args(space)
            # def objective(**params):
            # base_rf.set_params(**params)
            # return -np.mean(cross_val_score(base_rf, X_train, y_train, cv=5, n_jobs=n_jobs, scoring="f1"))

            # res_rf = forest_minimize(objective, space, base_estimator='RF', n_calls=11,
            # random_state=0, verbose=True, n_jobs=n_jobs)
            # print(type(res_rf))
            # skopt.utils.dump(res_rf, param_path, store_objective=False)

            res_rf = skopt.utils.load(param_path)
            # Training
            print('Training with optimized hyperparameters')
            start_time = time.time()
            rf = RandomForestClassifier(random_state=0,
                                        max_leaf_nodes=res_rf.x[0],
                                        n_estimators=res_rf.x[1],
                                        max_depth=res_rf.x[2],
                                        n_jobs=-1)
            rf.fit(X_train, y_train)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            joblib.dump(rf, model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)
Ejemplo n.º 22
0
def prediction_BNN_gen_model(img_list, pctls, feat_list_new, data_path, batch,
                             MC_passes, **model_params):
    for j, img in enumerate(img_list):
        times = []
        accuracy, precision, recall, f1 = [], [], [], []
        preds_path = data_path / batch / 'predictions' / img
        bin_file = preds_path / 'predictions.h5'
        model_path = data_path / batch / 'models' / 'gen_model.h5'

        uncertainties_path = data_path / batch / 'uncertainties' / img
        aleatoric_bin_file = uncertainties_path / 'aleatoric_uncertainties.h5'
        epistemic_bin_file = uncertainties_path / 'epistemic_uncertainties.h5'
        metrics_path = data_path / batch / 'metrics' / 'testing' / img

        try:
            metrics_path.mkdir(parents=True)
        except FileExistsError:
            print('Metrics directory already exists')

        for i, pctl in enumerate(pctls):
            print('Preprocessing', img, pctl, '% cloud cover')
            data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=True)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            data_vector_test[
                data_vector_test[:, perm_index] == 1,
                flood_index] = 0  # Remove flood water that is perm water
            data_vector_test = np.delete(data_vector_test, perm_index,
                                         axis=1)  # Remove GSW_perm column
            data_shape = data_vector_test.shape
            X_test, y_test = data_vector_test[:, 0:data_shape[
                1] - 1], data_vector_test[:, data_shape[1] - 1]

            print('Predicting for {} at {}% cloud cover'.format(img, pctl))
            start_time = time.time()
            model = tf.keras.models.load_model(model_path)
            p_hat = []
            for t in range(MC_passes):
                p_hat.append(
                    model.predict(X_test,
                                  batch_size=model_params['batch_size'],
                                  use_multiprocessing=True)[:, 1])
            p_hat = np.array(p_hat)
            preds = np.round(np.mean(p_hat, axis=0))
            aleatoric = np.mean(p_hat * (1 - p_hat), axis=0)
            epistemic = np.mean(p_hat**2, axis=0) - np.mean(p_hat, axis=0)**2

            try:
                preds_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier mean predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=preds)

            try:
                uncertainties_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with h5py.File(epistemic_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier epistemic predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=epistemic)

            with h5py.File(aleatoric_bin_file, 'a') as f:
                if str(pctl) in f:
                    print('Deleting earlier epistemic predictions')
                    del f[str(pctl)]
                f.create_dataset(str(pctl), data=aleatoric)

            times.append(timer(start_time, time.time(), False))

            print('Evaluating predictions')
            accuracy.append(accuracy_score(y_test, preds))
            precision.append(precision_score(y_test, preds))
            recall.append(recall_score(y_test, preds))
            f1.append(f1_score(y_test, preds))

            del preds, p_hat, aleatoric, epistemic, X_test, y_test, model, data_test, data_vector_test, data_ind_test

        metrics = pd.DataFrame(
            np.column_stack([pctls, accuracy, precision, recall, f1]),
            columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1'])
        metrics.to_csv(metrics_path / 'metrics.csv', index=False)
        times = [float(i) for i in times]
        times_df = pd.DataFrame(np.column_stack([pctls, times]),
                                columns=['cloud_cover', 'testing_time'])
        times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
Ejemplo n.º 23
0
    def fpfn_map(self, probs):
        data_path = self.data_path
        plt.ioff()
        my_dpi = 300
        # Get predictions and variances
        for img in self.img_list:
            print('Creating FN/FP map for {}'.format(img))
            plot_path = data_path / self.batch / 'plots' / img
            preds_bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5'
            stack_path = data_path / 'images' / img / 'stack' / 'stack.tif'

            try:
                plot_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # Reshape variance values back into image band
            with rasterio.open(stack_path, 'r') as ds:
                shape = ds.read(1).shape  # Shape of full original image

            for pctl in self.pctls:
                data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(
                    data_path, img, pctl, self.feat_list_all, test=True)

                print('Fetching flood predictions for',
                      str(pctl) + '{}'.format('%'))
                with h5py.File(preds_bin_file, 'r') as f:
                    predictions = f[str(pctl)]
                    if probs:
                        predictions = np.argmax(
                            np.array(predictions),
                            axis=1)  # Copy h5 dataset to array
                    if not probs:
                        predictions = np.array(predictions)

                prediction_img = np.zeros(shape)
                prediction_img[:] = np.nan
                rows, cols = zip(data_ind_test)
                prediction_img[rows, cols] = predictions

                perm_index = feat_keep.index('GSWPerm')
                flood_index = feat_keep.index('flooded')
                floods = data_test[:, :, flood_index]
                perm_water = (data_test[:, :, perm_index] == 1)
                tp = np.logical_and(prediction_img == 1,
                                    floods == 1).astype('int')
                tn = np.logical_and(prediction_img == 0,
                                    floods == 0).astype('int')
                fp = np.logical_and(prediction_img == 1,
                                    floods == 0).astype('int')
                fn = np.logical_and(prediction_img == 0,
                                    floods == 1).astype('int')

                # Mask out clouds, etc.
                tp = ma.masked_array(tp, mask=np.isnan(prediction_img))
                fp = ma.masked_array(fp, mask=np.isnan(prediction_img))
                fn = ma.masked_array(fn, mask=np.isnan(prediction_img))

                true_false = fp + (fn * 2) + (tp * 3)
                true_false[perm_water] = -1

                colors = []
                class_labels = []
                if np.sum(perm_water) != 0:
                    colors.append('darkgrey')
                    class_labels.append('Permanent Water')
                if np.sum(tn) != 0:
                    colors.append('saddlebrown')
                    class_labels.append('True Negatives')
                if np.sum(fp) != 0:
                    colors.append('limegreen')
                    class_labels.append('False Floods')
                if np.sum(fn) != 0:
                    colors.append('red')
                    class_labels.append('Missed Floods')
                if np.sum(tp) != 0:
                    colors.append('blue')
                    class_labels.append('True Floods')

                legend_patches = [
                    Patch(color=icolor, label=label)
                    for icolor, label in zip(colors, class_labels)
                ]
                cmap = ListedColormap(colors)
                fig, ax = plt.subplots(figsize=(8, 5))
                ax.imshow(true_false, cmap=cmap)
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                ax.legend(labels=class_labels,
                          handles=legend_patches,
                          loc='lower left',
                          bbox_to_anchor=(0, 1),
                          ncol=5,
                          borderaxespad=0,
                          frameon=False,
                          prop={'size': 7})
                plt.tight_layout()
                plt.savefig(plot_path /
                            '{}'.format('map_fpfn_' + str(pctl) + '.png'),
                            dpi=my_dpi,
                            pad_inches=0.0)

                plt.close('all')