def prediction(img_list, pctls, feat_list_new, data_path, batch, remove_perm): for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1 = [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, feat_list_new, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') if remove_perm: data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[1]-1], data_vector_test[:, data_shape[1]-1] print('Predicting for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() model_path = data_path / batch / 'models' / img / '{}'.format(img + '_clouds_' + str(pctl) + '.sav') trained_model = joblib.load(model_path) pred_probs = trained_model.predict_proba(X_test) preds = np.argmax(pred_probs, axis=1) try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=pred_probs) times.append(timer(start_time, time.time(), False)) # Elapsed time for MC simulations print('Evaluating predictions') accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) del preds, pred_probs, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame(np.column_stack([pctls, accuracy, precision, recall, f1]), columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1']) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [float(i) for i in times] # Convert time objects to float, otherwise valMetrics will be non-numeric times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def training_bnn(img_list, pctls, feat_list_new, data_path, batch, **model_params): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] y_train = to_categorical(y_train) D = len(set(y_train[:, 0])) # Target classes model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') print('Training model') start_time = time.time() aleatoric_model = get_aleatoric_uncertainty_model(X_train, y_train, **model_params, D=D) end_time = time.time() times.append(timer(start_time, end_time, False)) aleatoric_model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def log_reg_training(img_list, pctls, feat_list_new, data_path, batch): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists(): metrics_path.mkdir(parents=True) model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav') print('Training') start_time = time.time() logreg = LogisticRegression(n_jobs=-1, solver='sag') logreg.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(logreg, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def prediction_with_uncertainty(img_list, pctls, feat_list_new, data_path, batch, DROPOUT_RATE, MC_PASSES, remove_perm, weight_decay=0.005, length_scale=0.00001, **model_params): for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1 = [], [], [], [] preds_path = data_path / batch / 'predictions' / img vars_path = data_path / batch / 'variances' / img mc_bin_file = preds_path / 'mc_preds.h5' preds_bin_file = preds_path / 'predictions.h5' vars_bin_file = vars_path / 'variances.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') try: preds_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') try: vars_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, gaps=True) feat_list_keep = [feat_list_new[i] for i in feat_keep ] # Removed if feat was deleted in preprocessing if remove_perm: perm_index = feat_list_keep.index('GSW_perm') flood_index = feat_list_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[ 1] - 1], data_vector_test[:, data_shape[1] - 1] # Initialize binary file to hold predictions with h5py.File(mc_bin_file, 'w') as f: f.create_dataset('mc_preds', shape=(X_test.shape[0], 1), maxshape=(X_test.shape[0], None), chunks=True, compression='gzip' ) # Create empty dataset with shape of data start_time = time.time() model_path = data_path / batch / 'models' / img / '{}'.format( img + '_clouds_' + str(pctl) + '.h5') trained_model = tf.keras.models.load_model(model_path) for k in range(MC_PASSES): if k % 10 == 0 or k == MC_PASSES - 1: print('Running MC {}/{} for {} at {}% cloud cover'.format( k, MC_PASSES, img, pctl)) flood_prob = trained_model.predict( X_test, batch_size=model_params['batch_size'], use_multiprocessing=True) # Predict flood_prob = flood_prob[:, 1] # Drop probability of not flooded (0) to save space with h5py.File(mc_bin_file, 'a') as f: f['mc_preds'][:, -1] = flood_prob # Append preds to h5 file if k < MC_PASSES - 1: # Resize to append next pass, if there is one f['mc_preds'].resize((f['mc_preds'].shape[1] + 1), axis=1) tf.keras.backend.clear_session() del flood_prob # Calculate MC statistics print('Calculating MC statistics for {} at {}% cloud cover'.format( img, pctl)) with h5py.File(mc_bin_file, 'r') as f: dset = f['mc_preds'] preds_da = da.from_array( dset, chunks="250 MiB") # Open h5 file as dask array means = preds_da.mean(axis=1) means = means.compute() variance = preds_da.var(axis=1) variance = variance.compute() tau = (length_scale**2 * (1 - DROPOUT_RATE)) / (2 * data_shape[0] * weight_decay) variance = variance + tau preds = means.round() del f, means, preds_da, dset os.remove(mc_bin_file) # Delete predictions to save space on disk print('Saving mean preds/vars for {} at {}% cloud cover'.format( img, pctl)) with h5py.File(preds_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=preds) with h5py.File(vars_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier variances') del f[str(pctl)] f.create_dataset(str(pctl), data=variance) times.append(timer(start_time, time.time(), False)) # Elapsed time for MC simulations print('Evaluating predictions for {} at {}% cloud cover'.format( img, pctl)) accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) del preds, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame( np.column_stack([pctls, accuracy, precision, recall, f1]), columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1']) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [ float(i) for i in times ] # Convert time objects to float, otherwise valMetrics will be non-numeric times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def uncertainty_map_LR(self): data_path = self.data_path plt.ioff() my_dpi = 300 # Get predictions and variances for img in self.img_list: print('Creating uncertainty map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img se_lower_bin_file = data_path / self.batch / 'uncertainties' / img / 'se_lower.h5' se_upper_bin_file = data_path / self.batch / 'uncertainties' / img / 'se_upper.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' try: plot_path.mkdir(parents=True) except FileExistsError: pass # Reshape variance values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in self.pctls: with h5py.File(se_lower_bin_file, 'r') as f: lower = f[str(pctl)] lower = np.array(lower) # Copy h5 dataset to array with h5py.File(se_upper_bin_file, 'r') as f: upper = f[str(pctl)] upper = np.array(upper) # Copy h5 dataset to array data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_all, test=True) uncertainties = upper - lower perm_index = feat_keep.index('GSWPerm') perm = data_test[:, :, perm_index] unc_image = np.zeros(shape) unc_image[:] = np.nan rows, cols = zip(data_ind_test) unc_image[rows, cols] = uncertainties unc_image[perm == 1] = 0 cutoff_value = np.nanpercentile( unc_image, 99.99) # Truncate values so outliers don't skew colorbar unc_image[unc_image > cutoff_value] = np.round(cutoff_value, 0) fig, ax = plt.subplots() im = ax.imshow(unc_image, cmap='magma') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) im_ratio = unc_image.shape[0] / unc_image.shape[1] cbar = fig.colorbar(im, ax=ax, fraction=0.02 * im_ratio, pad=0.02 * im_ratio) # cbar_labels = [label.get_text() for label in cbar.ax.get_yticklabels()] # Add + to cbar max value # cbar_labels[-1] = cbar_labels[-1] + '+' # cbar.ax.set_yticklabels(cbar_labels) plt.tight_layout() plt.savefig( plot_path / '{}'.format('map_uncertainty_' + str(pctl) + '.png'), dpi=my_dpi, pad_inches=0.0) plt.close('all')
def training2(img_list, pctls, model_func, feat_list_new, data_path, batch, DROPOUT_RATE=0, HOLDOUT=0.3, **model_params): ''' Removes flood water that is permanent water ''' get_model = model_func for j, img in enumerate(img_list): times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=True) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): data_train, data_vector_train, data_ind_train = preprocessing( data_path, img, pctl, gaps=False) perm_index = feat_list_new.index('GSW_perm') flood_index = feat_list_new.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column training_data, validation_data = train_val(data_vector_train, holdout=HOLDOUT) X_train, y_train = training_data[:, 0:14], training_data[:, 14] X_val, y_val = validation_data[:, 0:14], validation_data[:, 14] INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') csv_logger = CSVLogger(metrics_path / 'training_log.log') model_params['callbacks'].append(csv_logger) print('~~~~~', img, pctl, '% CLOUD COVER') model = get_model(INPUT_DIMS) start_time = time.time() model.fit(X_train, y_train, **model_params, validation_data=(X_val, y_val)) end_time = time.time() times.append(timer(start_time, end_time, False)) model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def prediction_gen_model(img_list, pctls, feat_list_new, data_path, batch, **model_params): model_path = data_path / batch / 'models' / 'gen_model.h5' for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1 = [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): pretrained_model = tf.keras.models.load_model(model_path) for i in range(6): pretrained_model.layers[i].trainable = False pretrained_model.layers[6].trainable = True ll = pretrained_model.layers[6].output ll = tf.keras.layers.Dense(6)(ll) ll = tf.keras.layers.Dense(6)(ll) new_model = Model(pretrained_model.input, outputs=ll) print('Training') data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') gsw_index = feat_keep.index('GSW_maxExtent') data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove GSW_perm column data_vector_train = np.delete(data_vector_train, gsw_index, axis=1) data_shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:data_shape[ 1] - 1], data_vector_train[:, data_shape[1] - 1] trained_model = new_model.fit(X_train, y_train) print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') gsw_index = feat_list_new.index('GSW_maxExtent') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_vector_test = np.delete(data_vector_test, gsw_index, axis=1) data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[ 1] - 1], data_vector_test[:, data_shape[1] - 1] print('Predicting for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() preds = trained_model.predict( X_test, batch_size=model_params['batch_size'], use_multiprocessing=True) preds = np.argmax(preds, axis=1) # Display most probable value try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=preds) times.append(timer(start_time, time.time(), False)) # Elapsed time for MC simulations print('Evaluating predictions') accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) del preds, X_test, y_test, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame( np.column_stack([pctls, accuracy, precision, recall, f1]), columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1']) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [ float(i) for i in times ] # Convert time objects to float, otherwise valMetrics will be non-numeric times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def training6(img_list, pctls, model_func, feat_list_new, data_path, batch, T, dropout_rate=0.2, **model_params): ''' 1. Removes ALL pixels that are over permanent water 2. Finds the optimum learning rate and uses cyclic LR scheduler to train the model 3. No validation set for training 4. ''' get_model = model_func for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, gaps=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] y_train = to_categorical(y_train) INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') callbacks = [ tf.keras.callbacks.EarlyStopping( monitor='softmax_output_categorical_accuracy', min_delta=0.005, patience=5), tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path), monitor='loss', save_best_only=True), CSVLogger(metrics_path / 'training_log.log') ] start_time = time.time() model = get_model(model_params['epochs'], X_train, y_train, X_train.shape, T, D=2, batch_size=model_params['batch_size'], dropout_rate=dropout_rate, callbacks=callbacks) end_time = time.time() times.append(timer(start_time, end_time, False)) # model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def false_map(self): """ Creates map of FP/FNs overlaid on RGB image """ plt.ioff() data_path = self.data_path for i, img in enumerate(self.img_list): print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan blue[blue == -999999] = np.nan redn = normalize(red) greenn = normalize(green) bluen = normalize(blue) rgb = np.dstack((redn, greenn, bluen)) # Convert to PIL image, enhance, and save rgb_img = Image.fromarray((rgb * 255).astype(np.uint8())) rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5) rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2) rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2) print('Saving RGB image') rgb_file = plot_path / '{}'.format('rgb_img' + '.png') rgb_img.save(rgb_file, dpi=(300, 300)) # Reshape predicted values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in self.pctls: data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_new, test=True) for buffer_iter in self.buffer_iters: print('Fetching flood predictions for buffer', buffer_iter, 'at', str(pctl) + '{}'.format('%')) # Read predictions with h5py.File(bin_file, 'r') as f: pred_name = str(pctl) + '_buff_' + str(buffer_iter) predictions = f[pred_name] predictions = np.array( predictions) # Copy h5 dataset to array # Add predicted values to cloud-covered pixel positions prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions # Remove perm water from predictions and actual perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_shape = data_vector_test.shape with rasterio.open(stack_path, 'r') as ds: perm_feat = ds.read(perm_index + 1) prediction_img[perm_feat == 1] = 0 # Add actual flood values to cloud-covered pixel positions flooded_img = np.zeros(shape) flooded_img[:] = np.nan flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1] # Visualizing FNs/FPs ones = np.ones(shape=shape) red_actual = np.where(ones, flooded_img, 0.5) # Actual blue_preds = np.where(ones, prediction_img, 0.5) # Predictions green_combo = np.minimum(red_actual, blue_preds) # Saving FN/FP comparison image comparison_img = np.dstack( (red_actual, green_combo, blue_preds)) comparison_img_file = plot_path / '{}'.format( 'false_map' + str(pctl) + '_buff_' + str(buffer_iter) + '.png') print('Saving FN/FP image for buffer', str(buffer_iter), 'at', str(pctl) + '{}'.format('%')) matplotlib.image.imsave(comparison_img_file, comparison_img, dpi=300) # Load comparison image flood_overlay = Image.open(comparison_img_file) # Convert black pixels to transparent in comparison image so it can overlay RGB datas = flood_overlay.getdata() newData = [] for item in datas: if item[0] == 0 and item[1] == 0 and item[2] == 0: newData.append((255, 255, 255, 0)) else: newData.append(item) flood_overlay.putdata(newData) # Superimpose comparison image and RGB image, then save and close rgb_img.paste(flood_overlay, (0, 0), flood_overlay) plt.imshow(rgb_img) print('Saving overlay image for buffer', str(buffer_iter), 'at', str(pctl) + '{}'.format('%')) rgb_img.save( plot_path / '{}'.format('false_map_overlay' + str(pctl) + '_buff_' + str(buffer_iter) + '.png'), dpi=(300, 300)) plt.close('all')
def false_map(self, probs, save=True): """ Creates map of FP/FNs overlaid on RGB image save : bool If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay """ plt.ioff() data_path = self.data_path for i, img in enumerate(self.img_list): print('Creating false map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img band_combo_dir = data_path / 'band_combos' bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' try: plot_path.mkdir(parents=True) except FileExistsError: pass # Reshape predicted values back into image band # with rasterio.open(stack_path, 'r') as ds: # shape = ds.read(1).shape # Shape of full original image # Get RGB image rgb_file = band_combo_dir / '{}'.format(img + '_rgb_img' + '.png') rgb_img = Image.open(rgb_file) for j, pctl in enumerate(self.pctls): print('Fetching flood predictions for', str(pctl) + '{}'.format('%')) # Read predictions with h5py.File(bin_file, 'r') as f: if probs: prediction_probs = f[str(pctl)] prediction_probs = np.array( prediction_probs) # Copy h5 dataset to array predictions = np.argmax(prediction_probs, axis=1) else: predictions = f[str(pctl)] predictions = np.array( predictions) # Copy h5 dataset to array data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_all, test=True) shape = data_test.shape[:2] # Add predicted values to cloud-covered pixel positions prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions # Remove perm water from predictions and actual perm_index = feat_keep.index('GSWPerm') flood_index = feat_keep.index('flooded') data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0 data_shape = data_vector_test.shape perm_feat = data_test[:, :, perm_index] prediction_img[((prediction_img == 1) & (perm_feat == 1))] = 0 # Add actual flood values to cloud-covered pixel positions flooded_img = np.zeros(shape) flooded_img[:] = np.nan flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1] # Visualizing FNs/FPs ones = np.ones(shape=shape) red_actual = np.where(ones, flooded_img, 0.5) # Actual blue_preds = np.where(ones, prediction_img, 0.5) # Predictions green_combo = np.minimum(red_actual, blue_preds) alphas = np.ones(shape) # Convert black pixels to transparent in fpfn image so it can overlay RGB fpfn_img = np.dstack( (red_actual, green_combo, blue_preds, alphas)) * 255 fpfn_overlay_file = plot_path / '{}'.format('false_map' + str(pctl) + '.png') indices = np.where((np.isnan(fpfn_img[:, :, 0])) & np.isnan(fpfn_img[:, :, 1]) & np.isnan(fpfn_img[:, :, 2]) & (fpfn_img[:, :, 3] == 255)) fpfn_img[indices] = [255, 255, 255, 0] fpfn_overlay = Image.fromarray(np.uint8(fpfn_img), mode='RGBA') fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300)) # Superimpose comparison image and RGB image, then save and close if save: rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay) print('Saving overlay image for', str(pctl) + '{}'.format('%')) rgb_img.save( plot_path / '{}'.format('false_map_overlay' + str(pctl) + '.png'), dpi=(300, 300)) plt.close('all')
def log_reg_gen_prediction(img_list, pctls, feat_list_new, data_path, batch): for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1, roc_auc = [], [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' uncertainties_path = data_path / batch / 'uncertainties' / img se_lower_bin_file = uncertainties_path / 'se_lower.h5' se_upper_bin_file = uncertainties_path / 'se_upper.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0 data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[ 1] - 1], data_vector_test[:, data_shape[1] - 1] print('Predicting for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() model_path = data_path / batch / 'models' / 'gen_model.sav' trained_model = joblib.load(model_path) pred_probs = trained_model.predict_proba(X_test) preds = np.argmax(pred_probs, axis=1) try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=pred_probs) # Computer standard errors SE_est = get_se(X_test, y_test, trained_model) probs, upper, lower = get_probs( trained_model, X_test, SE_est, z=1.96) # probs is redundant, predicted above try: uncertainties_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(se_lower_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier lower SEs') del f[str(pctl)] f.create_dataset(str(pctl), data=lower) with h5py.File(se_upper_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier upper SEs') del f[str(pctl)] f.create_dataset(str(pctl), data=upper) times.append(timer(start_time, time.time(), False)) print('Evaluating predictions') perm_mask = data_test[:, :, perm_index] perm_mask = perm_mask.reshape( [perm_mask.shape[0] * perm_mask.shape[1]]) perm_mask = perm_mask[~np.isnan(perm_mask)] preds[perm_mask.astype('bool')] = 0 y_test[perm_mask.astype('bool')] = 0 accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) roc_auc.append(roc_auc_score(y_test, pred_probs[:, 1])) del preds, probs, pred_probs, upper, lower, X_test, y_test, \ trained_model, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame( np.column_stack([pctls, accuracy, precision, recall, f1, roc_auc]), columns=[ 'cloud_cover', 'accuracy', 'precision', 'recall', 'f1', 'auc' ]) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [ float(i) for i in times ] # Convert time objects to float, otherwise valMetrics will be non-numeric times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
# ====================================================================================================================== img = img_list[0] pctl = 30 batch = 'test' import statsmodels.api as sm print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] # # Logistic regression using sklearn # model_path = data_path / batch / 'models' / img # if not model_path.exists(): # model_path.mkdir(parents=True) # model_path = model_path / '{}'.format(img + '_sklearn.sav')
def NN_prediction(img_list, pctls, feat_list_all, data_path, batch, **model_params): for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1, roc_auc = [], [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_all, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[ 1] - 1], data_vector_test[:, data_shape[1] - 1] print('Predicting for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() model_path = data_path / batch / 'models' / img / '{}'.format( img + '_clouds_' + str(pctl) + '.h5') trained_model = load_macro_soft_f1_model(model_path) pred_probs = trained_model.predict( X_test, batch_size=model_params['batch_size'], use_multiprocessing=True) preds = np.argmax(pred_probs, axis=1) # Display most probable value try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=preds) times.append(timer(start_time, time.time(), False)) # Elapsed time for MC simulations print('Evaluating predictions') perm_mask = data_test[:, :, perm_index] perm_mask = perm_mask.reshape( [perm_mask.shape[0] * perm_mask.shape[1]]) perm_mask = perm_mask[~np.isnan(perm_mask)] preds[perm_mask.astype('bool')] = 0 y_test[perm_mask.astype('bool')] = 0 accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) roc_auc.append(roc_auc_score(y_test, pred_probs[:, 1])) del preds, pred_probs, X_test, y_test, trained_model, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame( np.column_stack([pctls, accuracy, precision, recall, f1, roc_auc]), columns=[ 'cloud_cover', 'accuracy', 'precision', 'recall', 'f1', 'auc' ]) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [ float(i) for i in times ] # Convert time objects to float, otherwise valMetrics will be non-numeric times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def stack_all_uncertainties(model, batch, data_path, img_list, feat_list_all): uncertainty_all = [] predictions_all = [] tp_all = [] tn_all = [] fp_all = [] fn_all = [] if model is 'BNN': aleatoric_all = [] epistemic_all = [] plot_path = data_path / batch / 'plots' output_bin_file = data_path / batch / 'metrics' / 'uncertainty_fpfn.h5' for i, img in enumerate(img_list): print(img) preds_bin_file = data_path / batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' try: plot_path.mkdir(parents=True) except FileExistsError: pass # Reshape variance values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in pctls: print(pctl) data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_all, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') floods = data_test[:, :, flood_index] perm = data_test[:, :, perm_index] if model is 'LR': se_lower_bin_file = data_path / batch / 'uncertainties' / img / 'se_lower.h5' se_upper_bin_file = data_path / batch / 'uncertainties' / img / 'se_upper.h5' with h5py.File(se_lower_bin_file, 'r') as f: lower = f[str(pctl)] lower = np.array(lower) with h5py.File(se_upper_bin_file, 'r') as f: upper = f[str(pctl)] upper = np.array(upper) uncertainties = upper - lower if model is 'BNN': aleatoric_bin_file = data_path / batch / 'uncertainties' / img / 'aleatoric_uncertainties.h5' epistemic_bin_file = data_path / batch / 'uncertainties' / img / 'epistemic_uncertainties.h5' with h5py.File(aleatoric_bin_file, 'r') as f: aleatoric = f[str(pctl)] aleatoric = np.array(aleatoric) with h5py.File(epistemic_bin_file, 'r') as f: epistemic = f[str(pctl)] epistemic = np.array(epistemic) aleatoric_image = np.zeros(shape) aleatoric_image[:] = np.nan rows, cols = zip(data_ind_test) aleatoric_image[rows, cols] = aleatoric epistemic_image = np.zeros(shape) epistemic_image[:] = np.nan rows, cols = zip(data_ind_test) epistemic_image[rows, cols] = epistemic uncertainties = aleatoric + epistemic unc_image = np.zeros(shape) unc_image[:] = np.nan rows, cols = zip(data_ind_test) unc_image[rows, cols] = uncertainties # unc_image[perm == 1] = 0 # cutoff_value = np.nanpercentile(unc_image, 99.99) # Truncate values so outliers don't skew colorbar # unc_image[unc_image > cutoff_value] = np.round(cutoff_value, 0) with h5py.File(preds_bin_file, 'r') as f: predictions = f[str(pctl)] if model is 'LR': predictions = np.argmax(np.array(predictions), axis=1) # Copy h5 dataset to array if model is 'BNN': predictions = np.array(predictions) prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions floods = floods.reshape([ floods.shape[0] * floods.shape[1], ]) predictions_mask = prediction_img.reshape([ prediction_img.shape[0] * prediction_img.shape[1], ]) tp = np.logical_and(predictions_mask == 1, floods == 1).astype('int') tn = np.logical_and(predictions_mask == 0, floods == 0).astype('int') fp = np.logical_and(predictions_mask == 1, floods == 0).astype('int') fn = np.logical_and(predictions_mask == 0, floods == 1).astype('int') # Mask out clouds, etc. tp = tp[~np.isnan(predictions_mask)] tn = tn[~np.isnan(predictions_mask)] fp = fp[~np.isnan(predictions_mask)] fn = fn[~np.isnan(predictions_mask)] unc_image_mask = unc_image.reshape([ unc_image.shape[0] * unc_image.shape[1], ]) unc_image_mask = unc_image_mask[~np.isnan(predictions_mask)] if model is 'BNN': aleatoric_image_mask = aleatoric_image.reshape([ aleatoric_image.shape[0] * aleatoric_image.shape[1], ]) aleatoric_image_mask = aleatoric_image_mask[ ~np.isnan(predictions_mask)] epistemic_image_mask = epistemic_image.reshape([ epistemic_image.shape[0] * epistemic_image.shape[1], ]) epistemic_image_mask = epistemic_image_mask[ ~np.isnan(predictions_mask)] aleatoric_all.append(aleatoric_image_mask) epistemic_all.append(epistemic_image_mask) predictions_all.append(predictions) uncertainty_all.append(unc_image_mask) tp_all.append(tp) tn_all.append(tn) fp_all.append(fp) fn_all.append(fn) # data_vector_all = np.concatenate(data_vector_all, axis=0) # Won't work because some features are missing predictions_all = np.concatenate(predictions_all, axis=0) uncertainty_all = np.concatenate(uncertainty_all, axis=0) tp_all = np.concatenate(tp_all, axis=0) tn_all = np.concatenate(tn_all, axis=0) fp_all = np.concatenate(fp_all, axis=0) fn_all = np.concatenate(fn_all, axis=0) if model is 'BNN': aleatoric_all = np.concatenate(aleatoric_all, axis=0) epistemic_all = np.concatenate(epistemic_all, axis=0) # df = np.column_stack((data_vector_all, predictions_all, uncertainty_all, tp_all, tn_all, fp_all, fn_all)) if model is 'LR': df = np.column_stack( (predictions_all, uncertainty_all, tp_all, tn_all, fp_all, fn_all)) if model is 'BNN': df = np.column_stack((predictions_all, uncertainty_all, aleatoric_all, epistemic_all, tp_all, tn_all, fp_all, fn_all)) with h5py.File(output_bin_file, 'a') as f: if 'uncertainty_fpfn' in f: print('Deleting earlier uncertainty/fpfn') del f['uncertainty_fpfn'] f.create_dataset('uncertainty_fpfn', data=df)
def false_map(probs, data_path, save=True): """ Creates map of FP/FNs overlaid on RGB image save : bool If true, saves RGB FP/FN overlay image. If false, just saves FP/FN overlay """ plt.ioff() for i, img in enumerate(img_list): print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / batch / 'plots' / img bin_file = data_path / batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' # Get RGB image print('Stacking RGB image') band_list = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7'] tif_stacker(data_path, img, band_list, features=False, overwrite=False) spectra_stack_path = data_path / 'images' / img / 'stack' / 'spectra_stack.tif' # Function to normalize the grid values def normalize(array): """Normalizes numpy arrays into scale 0.0 - 1.0""" array_min, array_max = np.nanmin(array), np.nanmax(array) return ((array - array_min) / (array_max - array_min)) print('Processing RGB image') with rasterio.open(spectra_stack_path, 'r') as f: red, green, blue = f.read(4), f.read(3), f.read(2) red[red == -999999] = np.nan green[green == -999999] = np.nan blue[blue == -999999] = np.nan redn = normalize(red) greenn = normalize(green) bluen = normalize(blue) rgb = np.dstack((redn, greenn, bluen)) # Convert to PIL image, enhance, and save rgb_img = Image.fromarray((rgb * 255).astype(np.uint8())) rgb_img = ImageEnhance.Contrast(rgb_img).enhance(1.5) rgb_img = ImageEnhance.Sharpness(rgb_img).enhance(2) rgb_img = ImageEnhance.Brightness(rgb_img).enhance(2) print('Saving RGB image') rgb_file = plot_path / '{}'.format('rgb_img' + '.png') rgb_img.save(rgb_file, dpi=(300, 300)) # Reshape predicted values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for j, pctl in enumerate(pctls): print('Fetching flood predictions for', str(pctl) + '{}'.format('%')) # Read predictions with h5py.File(bin_file, 'r') as f: if probs: prediction_probs = f[str(pctl)] prediction_probs = np.array( prediction_probs) # Copy h5 dataset to array predictions = np.argmax(prediction_probs, axis=1) else: predictions = f[str(pctl)] predictions = np.array( predictions) # Copy h5 dataset to array data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=True) # Add predicted values to cloud-covered pixel positions prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions # Remove perm water from predictions and actual perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_shape = data_vector_test.shape with rasterio.open(stack_path, 'r') as ds: perm_feat = ds.read(perm_index + 1) prediction_img[perm_feat == 1] = 0 # Add actual flood values to cloud-covered pixel positions flooded_img = np.zeros(shape) flooded_img[:] = np.nan flooded_img[rows, cols] = data_vector_test[:, data_shape[1] - 1] # Visualizing FNs/FPs ones = np.ones(shape=shape) red_actual = np.where(ones, flooded_img, 0.5) # Actual blue_preds = np.where(ones, prediction_img, 0.5) # Predictions green_combo = np.minimum(red_actual, blue_preds) alphas = np.ones(shape) * 255 # Convert black pixels to transparent in fpfn image so it can overlay RGB fpfn_img = np.dstack((red_actual, green_combo, blue_preds, alphas)) fpfn_overlay_file = plot_path / '{}'.format('false_map' + str(pctl) + '.png') indices = np.where((fpfn_img[:, :, 0] == 0) & (fpfn_img[:, :, 1] == 0) & (fpfn_img[:, :, 2] == 0) & (fpfn_img[:, :, 3] == 255)) fpfn_img[indices] = 0 fpfn_overlay = Image.fromarray(fpfn_img, mode='RGBA') fpfn_overlay.save(fpfn_overlay_file, dpi=(300, 300)) # Superimpose comparison image and RGB image, then save and close if save: rgb_img.paste(fpfn_overlay, (0, 0), fpfn_overlay) print('Saving overlay image for', str(pctl) + '{}'.format('%')) rgb_img.save( plot_path / '{}'.format('false_map_overlay' + str(pctl) + '.png'), dpi=(300, 300)) plt.close('all')
def prediction_bnn(img_list, pctls, feat_list_new, data_path, batch, MC_passes): for j, img in enumerate(img_list): epistemic_times = [] aleatoric_times = [] accuracy, precision, recall, f1 = [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' aleatoric_bin_file = preds_path / 'aleatoric_predictions.h5' uncertainties_path = data_path / batch / 'uncertainties' / img aleatoric_uncertainty_file = uncertainties_path / 'aleatoric_uncertainties.h5' epistemic_uncertainty_file = uncertainties_path / 'epistemic_uncertainties.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, feat_list_new, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0 data_vector_test = np.delete(data_vector_test, perm_index, axis=1) data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[1] - 1], data_vector_test[:, data_shape[1] - 1] y_test = to_categorical(y_test) D = len(set(y_test[:, 0])) # Target classes iterable = K.variable(np.ones(MC_passes)) print('Predicting (aleatoric) for {} at {}% cloud cover'.format(img, pctl)) model_path = data_path / batch / 'models' / img / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') start_time = time.time() # aleatoric_model = tf.keras.models.load_model(model_path) aleatoric_model = load_bayesian_model(model_path, MC_passes, D, iterable) aleatoric_results = aleatoric_model.predict(X_test, verbose=1) aleatoric_uncertainties = np.reshape(aleatoric_results[0][:, D:], (-1)) try: uncertainties_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(aleatoric_uncertainty_file, 'a') as f: if str(pctl) in f: print('Deleting earlier aleatoric uncertainties') del f[str(pctl)] f.create_dataset(str(pctl), data=aleatoric_uncertainties) logits = aleatoric_results[0][:, 0:D] aleatoric_preds = np.argmax(aleatoric_results[1], axis=1) aleatoric_times.append(timer(start_time, time.time(), False)) try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(aleatoric_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier aleatoric predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=aleatoric_preds) print('Predicting (epistemic) for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() epistemic_model = get_epistemic_uncertainty_model(model_path, T=MC_passes, D=D) epistemic_results = epistemic_model.predict(X_test, verbose=2, use_multiprocessing=True) epistemic_uncertainties = epistemic_results[0] with h5py.File(epistemic_uncertainty_file, 'a') as f: if str(pctl) in f: print('Deleting earlier epistemic uncertainties') del f[str(pctl)] f.create_dataset(str(pctl), data=epistemic_uncertainties) epistemic_preds = np.argmax(epistemic_results[1], axis=1) epistemic_times.append(timer(start_time, time.time(), False)) with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier epistemic predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=epistemic_preds) print('Evaluating predictions') accuracy.append(accuracy_score(y_test[:, 1], epistemic_preds)) precision.append(precision_score(y_test[:, 1], epistemic_preds)) recall.append(recall_score(y_test[:, 1], epistemic_preds)) f1.append(f1_score(y_test[:, 1], epistemic_preds)) del aleatoric_model, aleatoric_results, aleatoric_uncertainties, logits, aleatoric_preds, \ epistemic_model, epistemic_uncertainties, epistemic_preds, epistemic_results, \ data_test, data_vector_test, data_ind_test metrics = pd.DataFrame(np.column_stack([pctls, accuracy, precision, recall, f1]), columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1']) metrics.to_csv(metrics_path / 'metrics.csv', index=False) epistemic_times = [float(i) for i in epistemic_times] aleatoric_times = [float(i) for i in aleatoric_times] times_df = pd.DataFrame(np.column_stack([pctls, epistemic_times, aleatoric_times]), columns=['cloud_cover', 'epistemic_testing_time', 'aleatoric_testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def uncertainty_map_NN(self): data_path = self.data_path plt.ioff() my_dpi = 300 # Get predictions and variances for img in self.img_list: print('Creating uncertainty map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img aleatoric_bin_file = data_path / self.batch / 'uncertainties' / img / 'aleatoric_uncertainties.h5' epistemic_bin_file = data_path / self.batch / 'uncertainties' / img / 'epistemic_uncertainties.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' try: plot_path.mkdir(parents=True) except FileExistsError: pass # Reshape variance values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in self.pctls: with h5py.File(aleatoric_bin_file, 'r') as f: aleatoric = f[str(pctl)] aleatoric = np.array(aleatoric) with h5py.File(epistemic_bin_file, 'r') as f: epistemic = f[str(pctl)] epistemic = np.array(epistemic) uncertainties = aleatoric + epistemic data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_all, test=True) perm_index = feat_keep.index('GSWPerm') perm = data_test[:, :, perm_index] # Aleatoric + epistemic unc_image = np.zeros(shape) unc_image[:] = np.nan rows, cols = zip(data_ind_test) unc_image[rows, cols] = uncertainties unc_image[perm == 1] = 0 fig, ax = plt.subplots() my_img = ax.imshow(unc_image, cmap='plasma') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) im_ratio = unc_image.shape[0] / unc_image.shape[1] fig.colorbar(my_img, ax=ax, fraction=0.02 * im_ratio, pad=0.02 * im_ratio) plt.tight_layout() plt.savefig( plot_path / '{}'.format('map_uncertainty_' + str(pctl) + '.png'), dpi=my_dpi, pad_inches=0.0) # Aleatoric aleatoric_image = np.zeros(shape) aleatoric_image[:] = np.nan rows, cols = zip(data_ind_test) aleatoric_image[rows, cols] = aleatoric aleatoric_image[perm == 1] = 0 fig, ax = plt.subplots() my_img = ax.imshow(aleatoric_image, cmap='plasma') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) im_ratio = aleatoric_image.shape[0] / aleatoric_image.shape[1] fig.colorbar(my_img, ax=ax, fraction=0.02 * im_ratio, pad=0.02 * im_ratio) plt.tight_layout() plt.savefig(plot_path / '{}'.format('map_aleatoric_' + str(pctl) + '.png'), dpi=my_dpi, pad_inches=0.0) # Epistemic epistemic_image = np.zeros(shape) epistemic_image[:] = np.nan rows, cols = zip(data_ind_test) epistemic_image[rows, cols] = epistemic epistemic_image[perm == 1] = 0 fig, ax = plt.subplots() my_img = ax.imshow(epistemic_image, cmap='plasma') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) im_ratio = epistemic_image.shape[0] / epistemic_image.shape[1] fig.colorbar(my_img, ax=ax, fraction=0.02 * im_ratio, pad=0.02 * im_ratio) plt.tight_layout() plt.savefig(plot_path / '{}'.format('map_epistemic_' + str(pctl) + '.png'), dpi=my_dpi, pad_inches=0.0) plt.close('all')
def NN_training(img_list, pctls, model_func, feat_list_new, data_path, batch, **model_params): get_model = model_func for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] lr_mins = [] lr_maxes = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] INPUT_DIMS = X_train.shape[1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) lr_plots_path = metrics_path.parents[1] / 'lr_plots' lr_vals_path = metrics_path.parents[1] / 'lr_vals' try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) lr_plots_path.mkdir(parents=True) lr_vals_path.mkdir(parents=True) except FileExistsError: pass # --------------------------------------------------------------------------------------------------- # Determine learning rate by finding max loss decrease during single epoch training lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2) lr_model_params = { 'batch_size': model_params['batch_size'], 'epochs': 1, 'verbose': 2, 'callbacks': [lrRangeFinder], 'use_multiprocessing': True } model = model_func(INPUT_DIMS) print('Finding learning rate') model.fit(X_train, y_train, **lr_model_params) lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path, img, pctl) lr_mins.append(lr_min) lr_maxes.append(lr_max) # --------------------------------------------------------------------------------------------------- # Training the model with cyclical learning rate scheduler model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.h5') scheduler = SGDRScheduler(min_lr=lr_min, max_lr=lr_max, lr_decay=0.9, cycle_length=3, mult_factor=1.5) callbacks = [ tf.keras.callbacks.EarlyStopping( monitor='sparse_categorical_accuracy', min_delta=0.0001, patience=10), tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path), monitor='loss', save_best_only=True), CSVLogger(metrics_path / 'training_log.log'), scheduler ] model = get_model(INPUT_DIMS) print('Training full model with best LR') start_time = time.time() model.fit(X_train, y_train, **model_params, callbacks=callbacks) end_time = time.time() times.append(timer(start_time, end_time, False)) # model.save(model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False) lr_range = np.column_stack([pctls, lr_mins, lr_maxes]) lr_avg = np.mean(lr_range[:, 1:2], axis=1) lr_range = np.column_stack([lr_range, lr_avg]) lr_range_df = pd.DataFrame( lr_range, columns=['cloud_cover', 'lr_min', 'lr_max', 'lr_avg']) lr_range_df.to_csv((lr_vals_path / img).with_suffix('.csv'), index=False) losses_path = lr_vals_path / img / '{}'.format('losses_' + str(pctl) + '.csv') try: losses_path.parent.mkdir(parents=True) except FileExistsError: pass lr_losses = np.column_stack([lr, losses]) lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses']) lr_losses.to_csv(losses_path, index=False)
# ================================================================================= img = img_list[0] pctl = pctls[0] print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) print(img, pctl, '% CLOUD COVER') print('Preprocessing') data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl, feat_list_all, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0 data_vector_train = np.delete(data_vector_train, perm_index, axis=1) shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists():
def log_reg_training_buffer(img_list, pctls, feat_list_new, data_path, batch, buffer_iters, buffer_flood_only): from imageio import imwrite for img in img_list: print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for pctl in pctls: print('Preprocessing') data_train_full, data_vector_train, data_ind_train, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=False) for buffer_iter in buffer_iters: perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_train = data_train_full.copy() if buffer_flood_only: data_train[data_train[:, :, perm_index] == 1, flood_index] = 0 mask = data_train[:, :, flood_index] buffer_mask = np.invert( binary_dilation(mask, iterations=buffer_iter)) else: mask = data_train[:, :, flood_index] buffer_mask = np.invert( binary_dilation(mask, iterations=buffer_iter)) data_train[data_train[:, :, perm_index] == 1, flood_index] = 0 data_train[buffer_mask] = np.nan data_vector_train = data_train.reshape([ data_train.shape[0] * data_train.shape[1], data_train.shape[2] ]) data_vector_train = data_vector_train[ ~np.isnan(data_vector_train).any(axis=1)] data_vector_train = np.delete( data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[ 1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) if not model_path.exists(): model_path.mkdir(parents=True) if not metrics_path.exists(): metrics_path.mkdir(parents=True) model_path = model_path / '{}'.format(img + '_clouds_' + str( pctl) + 'buff' + str(buffer_iter) + '.sav') # Save data flooding image to check that buffering is working correctly # imwrite(model_path.parents[0] / '{}'.format('buff' + str(buffer_iter) + '.jpg'), data_train[:, :, 6]) print('Training') start_time = time.time() logreg = LogisticRegression(n_jobs=-1, solver='sag') logreg.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(logreg, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([ np.repeat(pctls, len(buffer_iters)), np.tile(buffer_iters, len(pctls)), times ]) times_df = pd.DataFrame( times, columns=['cloud_cover', 'buffer_iters', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def rf_training(img_list, pctls, feat_list_new, data_path, batch, n_jobs): for j, img in enumerate(img_list): print(img + ': stacking tif, generating clouds') times = [] tif_stacker(data_path, img, feat_list_new, features=True, overwrite=False) cloud_generator(img, data_path, overwrite=False) for i, pctl in enumerate(pctls): print(img, pctl, '% CLOUD COVER') print('Preprocessing') tf.keras.backend.clear_session() data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(data_path, img, pctl, feat_list_new, test=False) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_train[ data_vector_train[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_train = np.delete(data_vector_train, perm_index, axis=1) # Remove perm water column shape = data_vector_train.shape X_train, y_train = data_vector_train[:, 0:shape[1] - 1], data_vector_train[:, shape[1] - 1] model_path = data_path / batch / 'models' / img metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format( img + '_clouds_' + str(pctl)) try: metrics_path.mkdir(parents=True) model_path.mkdir(parents=True) except FileExistsError: pass param_path = data_path / batch / 'models' / '4514_LC08_027033_20170826_1' / '{}'.format( '4514_LC08_027033_20170826_1_clouds_50params.pkl') model_path = model_path / '{}'.format(img + '_clouds_' + str(pctl) + '.sav') # # Hyperparameter optimization # print('Hyperparameter search') # base_rf = RandomForestClassifier(random_state=0, n_estimators=100, max_leaf_nodes=10) # space = [skopt.space.Integer(2, 1000, name="max_leaf_nodes"), # skopt.space.Integer(2, 200, name="n_estimators"), # skopt.space.Integer(2, 3000, name="max_depth")] # @use_named_args(space) # def objective(**params): # base_rf.set_params(**params) # return -np.mean(cross_val_score(base_rf, X_train, y_train, cv=5, n_jobs=n_jobs, scoring="f1")) # res_rf = forest_minimize(objective, space, base_estimator='RF', n_calls=11, # random_state=0, verbose=True, n_jobs=n_jobs) # print(type(res_rf)) # skopt.utils.dump(res_rf, param_path, store_objective=False) res_rf = skopt.utils.load(param_path) # Training print('Training with optimized hyperparameters') start_time = time.time() rf = RandomForestClassifier(random_state=0, max_leaf_nodes=res_rf.x[0], n_estimators=res_rf.x[1], max_depth=res_rf.x[2], n_jobs=-1) rf.fit(X_train, y_train) end_time = time.time() times.append(timer(start_time, end_time, False)) joblib.dump(rf, model_path) metrics_path = metrics_path.parent times = [float(i) for i in times] times = np.column_stack([pctls, times]) times_df = pd.DataFrame(times, columns=['cloud_cover', 'training_time']) times_df.to_csv(metrics_path / 'training_times.csv', index=False)
def prediction_BNN_gen_model(img_list, pctls, feat_list_new, data_path, batch, MC_passes, **model_params): for j, img in enumerate(img_list): times = [] accuracy, precision, recall, f1 = [], [], [], [] preds_path = data_path / batch / 'predictions' / img bin_file = preds_path / 'predictions.h5' model_path = data_path / batch / 'models' / 'gen_model.h5' uncertainties_path = data_path / batch / 'uncertainties' / img aleatoric_bin_file = uncertainties_path / 'aleatoric_uncertainties.h5' epistemic_bin_file = uncertainties_path / 'epistemic_uncertainties.h5' metrics_path = data_path / batch / 'metrics' / 'testing' / img try: metrics_path.mkdir(parents=True) except FileExistsError: print('Metrics directory already exists') for i, pctl in enumerate(pctls): print('Preprocessing', img, pctl, '% cloud cover') data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, feat_list_new, test=True) perm_index = feat_keep.index('GSW_perm') flood_index = feat_keep.index('flooded') data_vector_test[ data_vector_test[:, perm_index] == 1, flood_index] = 0 # Remove flood water that is perm water data_vector_test = np.delete(data_vector_test, perm_index, axis=1) # Remove GSW_perm column data_shape = data_vector_test.shape X_test, y_test = data_vector_test[:, 0:data_shape[ 1] - 1], data_vector_test[:, data_shape[1] - 1] print('Predicting for {} at {}% cloud cover'.format(img, pctl)) start_time = time.time() model = tf.keras.models.load_model(model_path) p_hat = [] for t in range(MC_passes): p_hat.append( model.predict(X_test, batch_size=model_params['batch_size'], use_multiprocessing=True)[:, 1]) p_hat = np.array(p_hat) preds = np.round(np.mean(p_hat, axis=0)) aleatoric = np.mean(p_hat * (1 - p_hat), axis=0) epistemic = np.mean(p_hat**2, axis=0) - np.mean(p_hat, axis=0)**2 try: preds_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier mean predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=preds) try: uncertainties_path.mkdir(parents=True) except FileExistsError: pass with h5py.File(epistemic_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier epistemic predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=epistemic) with h5py.File(aleatoric_bin_file, 'a') as f: if str(pctl) in f: print('Deleting earlier epistemic predictions') del f[str(pctl)] f.create_dataset(str(pctl), data=aleatoric) times.append(timer(start_time, time.time(), False)) print('Evaluating predictions') accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) del preds, p_hat, aleatoric, epistemic, X_test, y_test, model, data_test, data_vector_test, data_ind_test metrics = pd.DataFrame( np.column_stack([pctls, accuracy, precision, recall, f1]), columns=['cloud_cover', 'accuracy', 'precision', 'recall', 'f1']) metrics.to_csv(metrics_path / 'metrics.csv', index=False) times = [float(i) for i in times] times_df = pd.DataFrame(np.column_stack([pctls, times]), columns=['cloud_cover', 'testing_time']) times_df.to_csv(metrics_path / 'testing_times.csv', index=False)
def fpfn_map(self, probs): data_path = self.data_path plt.ioff() my_dpi = 300 # Get predictions and variances for img in self.img_list: print('Creating FN/FP map for {}'.format(img)) plot_path = data_path / self.batch / 'plots' / img preds_bin_file = data_path / self.batch / 'predictions' / img / 'predictions.h5' stack_path = data_path / 'images' / img / 'stack' / 'stack.tif' try: plot_path.mkdir(parents=True) except FileExistsError: pass # Reshape variance values back into image band with rasterio.open(stack_path, 'r') as ds: shape = ds.read(1).shape # Shape of full original image for pctl in self.pctls: data_test, data_vector_test, data_ind_test, feat_keep = preprocessing( data_path, img, pctl, self.feat_list_all, test=True) print('Fetching flood predictions for', str(pctl) + '{}'.format('%')) with h5py.File(preds_bin_file, 'r') as f: predictions = f[str(pctl)] if probs: predictions = np.argmax( np.array(predictions), axis=1) # Copy h5 dataset to array if not probs: predictions = np.array(predictions) prediction_img = np.zeros(shape) prediction_img[:] = np.nan rows, cols = zip(data_ind_test) prediction_img[rows, cols] = predictions perm_index = feat_keep.index('GSWPerm') flood_index = feat_keep.index('flooded') floods = data_test[:, :, flood_index] perm_water = (data_test[:, :, perm_index] == 1) tp = np.logical_and(prediction_img == 1, floods == 1).astype('int') tn = np.logical_and(prediction_img == 0, floods == 0).astype('int') fp = np.logical_and(prediction_img == 1, floods == 0).astype('int') fn = np.logical_and(prediction_img == 0, floods == 1).astype('int') # Mask out clouds, etc. tp = ma.masked_array(tp, mask=np.isnan(prediction_img)) fp = ma.masked_array(fp, mask=np.isnan(prediction_img)) fn = ma.masked_array(fn, mask=np.isnan(prediction_img)) true_false = fp + (fn * 2) + (tp * 3) true_false[perm_water] = -1 colors = [] class_labels = [] if np.sum(perm_water) != 0: colors.append('darkgrey') class_labels.append('Permanent Water') if np.sum(tn) != 0: colors.append('saddlebrown') class_labels.append('True Negatives') if np.sum(fp) != 0: colors.append('limegreen') class_labels.append('False Floods') if np.sum(fn) != 0: colors.append('red') class_labels.append('Missed Floods') if np.sum(tp) != 0: colors.append('blue') class_labels.append('True Floods') legend_patches = [ Patch(color=icolor, label=label) for icolor, label in zip(colors, class_labels) ] cmap = ListedColormap(colors) fig, ax = plt.subplots(figsize=(8, 5)) ax.imshow(true_false, cmap=cmap) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.legend(labels=class_labels, handles=legend_patches, loc='lower left', bbox_to_anchor=(0, 1), ncol=5, borderaxespad=0, frameon=False, prop={'size': 7}) plt.tight_layout() plt.savefig(plot_path / '{}'.format('map_fpfn_' + str(pctl) + '.png'), dpi=my_dpi, pad_inches=0.0) plt.close('all')