def main(): folder_ndvi = "NDVI_results_S2" path_datasets = os.path.expanduser('~/Desktop/Datasets/Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask_vegetation_water_mode_parts_2004_no_DOS1_/') path_datasets = os.path.expanduser('~/Desktop/Datasets/Montpellier_S2_Concatenated_1C_Clipped_norm_4096/') path_results = os.path.expanduser('~/Desktop/Results/TS_clustering/') + folder_ndvi + "/" create_dir(path_results) #We open extended images images_list = os.listdir(path_datasets) path_list = [] for image_name_with_extention in images_list: if image_name_with_extention.startswith("Montpellier_") and image_name_with_extention.endswith(".TIF"): img_path = path_datasets + image_name_with_extention path_list.append(img_path) print(image_name_with_extention) image_date = (re.search("S2_([0-9]*).", image_name_with_extention)).group(1) print(image_date) image_array, H, W, geo, proj, bands_nb = open_tiff(path_datasets, os.path.splitext(image_name_with_extention)[0]) # ndvi = (image_array[2]-image_array[1])/(image_array[2]+image_array[1]) #for Sentinel-2 ndvi = (image_array[3]-image_array[2])/(image_array[3]+image_array[2]) #for SPOT-5 dst_ds = create_tiff(1, path_results + "NDVI_" + str(image_date) + ".TIF", W, H, gdal.GDT_Float32, np.reshape(ndvi, (H, W)), geo, proj) dst_ds = None
def open_image(d1, d2, path, anomaly=False): if anomaly is True: loss_folder = \ list(filter(lambda f: (f.startswith("Joint_AE_" + d1 + "_" + d2)), os.listdir(path+"Anomaly/")))[0] + "/" image_name = list( filter(lambda f: (f.startswith('Anomaly_Outliers_average_' + d1 + '_to_' + d2) and f.endswith(".TIF")), os.listdir(path + "Anomaly/" + loss_folder)))[0] else: loss_folder = \ list(filter(lambda f: (f.startswith("Joint_AE_" + d1 + "_" + d2)), os.listdir(path)))[0] + "/" image_name = list( filter(lambda f: (f.startswith('Outliers_average_' + d1 + '_to_' + d2) and f.endswith(".TIF")), os.listdir(path +loss_folder)))[0] image_name = os.path.splitext(image_name)[0] image_array_outliers, H, W, geo, proj, bands_nb = open_tiff(path + loss_folder, image_name) ds = gdal.Open(path + loss_folder + image_name + ".TIF") return loss_folder, image_name, image_array_outliers, ds, H, W, geo, proj, bands_nb
clustering_final_name = "Hierarchical_" + metric_type + "_n_GT" clustering_final_name_sp = clustering_final_name.replace( "Hierarchical_" + metric_type, "Spectral") print(folder_enc) if already_computed: from quality_stats import calculate_stats calculate_stats(folder_enc, segmentation_name, clustering_final_name, apply_mask_outliers=apply_mask_outliers, S2=S2) else: encoded_array, H, W, geo, proj, feat_nb = open_tiff( path_encoded, enc_name) encoded_array = np.asarray(encoded_array, dtype=float) # We normalize the data list_norm = [] for band in range(len(encoded_array)): all_images_band = encoded_array[band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([min, max, mean, std]) for band in range(len(encoded_array)): encoded_array[band] = (encoded_array[band] - list_norm[band][2]) / list_norm[band][3]
def otsu(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, threshold=0.995, changes=None, mask=None): image_array_loss = np.divide((image_array_loss1 + image_array_loss2), 2) max_ = np.max(image_array_loss) coef = max_ / 256 image_array_loss = image_array_loss / coef image_array_loss = np.asarray(image_array_loss, dtype=int) if mask is not None: val = filters.threshold_otsu( np.sort( image_array_loss.flatten()[mask])[0:int(len(mask) * threshold)]) else: val = filters.threshold_otsu( np.sort(image_array_loss.flatten())[0:int(H * W * threshold)]) image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss.flatten() > val] = 1 if mask is not None: defected_mask = np.setdiff1d(np.arange(H * W), mask) image_array_outliers[defected_mask] = 0 outliers_image_mean = "Outliers_average_" + images_date + "_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Byte, np.reshape(image_array_outliers, (H, W)), geo, proj) gdal.SieveFilter(dst_ds.GetRasterBand(1), None, dst_ds.GetRasterBand(1), 5, 4) dst_ds.FlushCache() vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None if changes is not None: if changes in ["changes_2004_2005", "changes_2006_2008"]: path_cm = 'C:/Users/Ekaterina_the_Great/Dropbox/IJCNN/images/' + changes path_cm = '/home/user/Dropbox/IJCNN/images/' + changes path_cm = "/media/user/DATA/Results/RESULTS_CHANGE_DETECTION/GT_Montpellier/" + changes cm_truth_name = "mask_changes_small1" print(image_array_outliers.shape) if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() else: if changes in [ "changes_Rostov_20150830_20150919", "changes_Rostov_20170918_20180111" ]: print("hello") path_cm = "/media/user/DATA/Results/RESULTS_CHANGE_DETECTION/GT_Rostov/" cm_truth_name = changes + "_1" if changes == "changes_Rostov_20150830_20150919": print(image_array_outliers.shape) print(np.reshape(image_array_outliers, (H, W)).shape) cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:700, 0:900]).flatten() # cm_predicted = np.asarray(np.reshape(image_array_outliers, (H, W))[0:700, 0:900]).flatten() if changes == "changes_Rostov_20170918_20180111": cm_predicted = (np.reshape(image_array_outliers, (H, W))[2100:2400, 900:1400]).flatten() cm_predicted[cm_predicted == 0] = 0 cm_predicted[cm_predicted == 1] = 1 print(cm_predicted.shape) cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 0] = 0 cm_truth[cm_truth == 1] = 1 print(cm_truth.shape) cm_truth[cm_truth == 255] = 0 print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
def main(): gpu = on_gpu() print("ON GPU is " + str(gpu)) #Parameters parser = argparse.ArgumentParser(description='train') parser.add_argument('--patch_size', default=1, type=int) parser.add_argument('--nb_features', default=150, type=int, help="Number of hidden features in GRU.") parser.add_argument('--nb_features_final', default=10, type=int, help="Number of final features of the encoder.") parser.add_argument( '--nb_clusters', default=15, type=int, help= "Number of desired clusters. In case if we do not compute for a range of clusters." ) parser.add_argument('--batch_size', default=50, type=int) parser.add_argument('--epoch_nb', default=150, type=int) parser.add_argument('--learning_rate', default=0.001, type=float) args = parser.parse_args() #Start time start_time = time.time() run_name = "." + str(time.strftime("%Y-%m-%d_%H%M")) print(run_name) #Montpellier path_results_seg_series = os.path.expanduser( '~/Desktop/Results/Segmentation_outliers_upd_filled/Montpellier_SPOT5_graph_cut_series_2D/' ) seg_folder_series = "series_sigma_0.3_k_6_min_10_bands_3_threshold_int_0.4/" folder_encoded = "patch_9_feat_5.2019-09-03_1619_noise1_mean_std" path_results = path_results_seg_series + seg_folder_series + "Graph_coverage_filtered/" path_results_final = path_results + "alpha_" + str(alpha) + "_t1_" + str( t1) + "_t2_" + str(t2) + "_t3_" + str(t3) + "/" # We open BB file that contains synopses bb_final_list = np.load(path_results_final + "Graph_list_synopsys_alpha_" + str(alpha) + "_t1_" + str(t1) + "_t2_" + str(t2) + "_t3_" + str(t3) + "_" + folder_encoded + ".npy") for z in range(8): bb_final_list = np.c_[bb_final_list, np.full(len(bb_final_list), None)] folder_results = "Synopsys_padding_feat_" + str( args.nb_features) + "_lr_" + str(args.learning_rate) + run_name # Folder with the results path_results_NN = path_results_final + model + "_" + type + "/" + folder_results + "/" create_dir(path_results_NN) stats_file = path_results_NN + 'stats.txt' path_model = path_results_NN + 'model' + run_name + "/" create_dir(path_model) # We add new arguments to the parser print_stats(stats_file, folder_encoded, print_to_console=False) print_stats(stats_file, str(args), print_to_console=False) parser.add_argument('--stats_file', default=stats_file) parser.add_argument('--path_results', default=path_results_NN) parser.add_argument('--path_model', default=path_model) parser.add_argument('--run_name', default=run_name) args = parser.parse_args() # We open segmentation rasters segm_array_list = [] date_list = [] image_name_segm_list = np.sort( list( filter( lambda f: (f.endswith(".TIF") and f.startswith("Segments_1D_20")), os.listdir(path_results)))) nbr_images = len(image_name_segm_list) print(image_name_segm_list) for i in range(nbr_images): image_name_segm = image_name_segm_list[i] date = (re.search("_([0-9]*).TIF", image_name_segm)).group(1) print(date) date_list.append(date) image_array_seg, H, W, geo, proj, bands_nb = open_tiff( path_results, os.path.splitext(image_name_segm)[0]) segm_array_list.append(image_array_seg) nbr_images = np.max(bb_final_list[:, 0]) + 1 # we get synopses if type == "mean": segments = bb_final_list[:, 8] else: segments = bb_final_list[:, 7] feat_nb = len(segments[0][0]) # We zero-pad all the sequences, so they have the same length over the dataset (equal to dataset length). See the article segments_padding = np.zeros((len(bb_final_list), nbr_images, feat_nb)) for s in range(len(segments)): segments_padding[s][:len(segments[s])] = segments[s] print(segments_padding.shape) # We prepare the training dataset image = ImageDataset(segments_padding, args.patch_size, 0, np.arange(len(segments)), feat_nb) # we create a dataset with tensor patches loader_pretrain = dsloader(image, gpu, args.batch_size, shuffle=True) loader_enc = dsloader(image, gpu, batch_size=1000, shuffle=False) # We initialize the model encoder = Encoder(feat_nb, args.nb_features, args.nb_features_final) # On CPU decoder = Decoder(feat_nb, args.nb_features, args.nb_features_final) # On CPU if gpu: encoder = encoder.cuda() # On GPU decoder = decoder.cuda() # On GPU print_stats(stats_file, str(encoder), print_to_console=False) # We pretrain the model pretrain_lstm(args.epoch_nb, encoder, decoder, loader_pretrain, args) # pretrain_lstm(0, encoder, decoder, loader_pretrain, args) end_time = time.clock() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats( args.stats_file, "Total time pretraining =" + str(total_time_pretraining) + "\n") # We start encoding and clustering start_time = time.time() bb_final_list_flipped = np.flip(np.copy(bb_final_list), axis=0) print_stats(stats_file, 'Initializing clusters...') cl_nb = list(range(5, 51, 5)) labels_list, labels_h_list, hidden_array = encode_lstm( encoder, W, loader_enc, cl_nb) for c in range(len(cl_nb)): feat_cl = cl_nb[c] print(feat_cl) labels, labels_h = labels_list[c], labels_h_list[c] labels, labels_h = np.flip(labels, axis=0), np.flip(labels_h, axis=0) new_labels = np.zeros((H * W)) new_labels_h = np.zeros((H * W)) # We optionally write clustering results to the BB list for l in range(len(labels)): if feat_cl == 15: bb_final_list_flipped[l, 9] = labels_h[l] + 1 if feat_cl == 20: bb_final_list_flipped[l, 10] = labels_h[l] + 1 if feat_cl == 25: bb_final_list_flipped[l, 11] = labels_h[l] + 1 if feat_cl == 30: bb_final_list_flipped[l, 12] = labels_h[l] + 1 if feat_cl == 35: bb_final_list_flipped[l, 13] = labels_h[l] + 1 if feat_cl == 40: bb_final_list_flipped[l, 14] = labels_h[l] + 1 if feat_cl == 45: bb_final_list_flipped[l, 15] = labels_h[l] + 1 if feat_cl == 50: bb_final_list_flipped[l, 16] = labels_h[l] + 1 img, ind = bb_final_list_flipped[l, 0:2] coverage_ind = np.where(segm_array_list[img].flatten() == ind)[0] new_labels[coverage_ind] = labels[l] + 1 new_labels_h[coverage_ind] = labels_h[l] + 1 ds = create_tiff( 1, args.path_results + "Kmeans_initial_clusters_" + str(feat_cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels, (H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(0) vectorize_tiff(path_results, "Kmeans_initial_clusters_" + str(feat_cl), ds) ds = None ds = create_tiff( 1, args.path_results + "Hierarchical_initial_clusters_" + str(feat_cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels_h, (H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(0) vectorize_tiff(path_results, "Hierarchical_initial_clusters_" + str(feat_cl), ds) ds = None np.save( args.path_results + "Graph_list_synopsys_clusters_alpha_" + str(alpha) + "_t1_" + str(t1) + "_t2_" + str(t2) + "_t3_" + str(t3), np.flip(np.copy(bb_final_list_flipped), axis=0)) end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats(stats_file, "Total time encoding =" + str(total_time_pretraining) + "\n")
list_image_extended = [] list_image_date = [] new_images_list = [] if maskTrue: list_image_extended_temp = [] list_image_mask = [] for image_name_with_extention in images_list: # if image_name_with_extention.endswith(".TIF") and image_name_with_extention.startswith( # city_name) and not image_name_with_extention.endswith("band.TIF"): if image_name_with_extention.endswith( ".TIF") and not image_name_with_extention.endswith("band.TIF"): new_images_list.append(image_name_with_extention) img_path = path_datasets + image_name_with_extention path_list.append(img_path) image_array, H, W, geo, proj, bands_nb = open_tiff( path_datasets, os.path.splitext(image_name_with_extention)[0]) if satellite == "SPOT5": image_date = (re.search("XS_([0-9]*)_", image_name_with_extention)).group(1) if satellite == "S2": image_date = (re.search("S2_([0-9]*).", image_name_with_extention)).group(1) print(image_date) if bands_to_keep == 3: if satellite == "SPOT5": image_array = np.delete(image_array, 3, axis=0) bands_nb = 3 if satellite == "S2": image_array = np.delete(image_array, 0, axis=0) bands_nb = 3
lambda f: (f.endswith(".TIF") and f.startswith("Filled_grid_2")), os.listdir(path_results_final)))) covered_grids_flatten_to_be_filled_by_bb_list = np.sort( list( filter( lambda f: (f.endswith(".TIF") and f.startswith("Filled_grid_by_bb_")), os.listdir(path_results_final)))) nbr_img = len(covered_grids_flatten_to_be_filled_list) covered_grids_flatten_to_be_filled_by_bb = [] for i in range(nbr_img): grid_filled_name = covered_grids_flatten_to_be_filled_list[i] grid_filled_by_bb_name = covered_grids_flatten_to_be_filled_by_bb_list[i] grid_filled, H, W, geo, proj, bands_nb = open_tiff( path_results_final, os.path.splitext(grid_filled_name)[0]) grid_filled_by_bb, H, W, geo, proj, bands_nb = open_tiff( path_results_final, os.path.splitext(grid_filled_by_bb_name)[0]) covered_grids_flatten_to_be_filled_by_bb.append( np.transpose(np.reshape(grid_filled_by_bb, (bands_nb, H * W)))) unique, count = np.unique(grid_filled, return_counts=True) if 0 in unique: perc = int(count[1] / (count[1] + count[2]) * 100) else: perc = 0 print_stats( stats_file, ("Image " + str(i) + " has " + str(perc) + "% uncovered pixels"),
def otsu(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, changes=None): # We calculate the average reconstruction error image image_array_loss = np.divide((image_array_loss1 + image_array_loss2), 2) # We rescale the image values to 8 bits so it works with the functions from skimage max_ = np.max(image_array_loss) coef = max_ / 256 image_array_loss = image_array_loss / coef image_array_loss = np.asarray(image_array_loss, dtype=int) # THIS IS VERY IMPORTANT VALUE # Otsu threshold is automatic, however before applying it, we exclude 0.5% of the highest reconstruction error values as they ae considered to be outliers # This parameter can be modified if needed threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss.flatten()) [0:int(H * W * threshold)]) # Obtained threshold value # We get binary change map (1 - changes, 0 - no changes) using the threshold and write it to tiff and shp image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss.flatten() > val] = 1 outliers_image_mean = "Outliers_average_" + images_date + "_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Int16, np.reshape(image_array_outliers, (H, W)), geo, proj) vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None # We calculate the stats if the ground truth is available for this couple of images if changes is not None: # path of ground truth image, I have only 2 GT path_cm = '/home/user/Dropbox/IJCNN/images/' + changes cm_truth_name = "mask_changes_small1" if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 255] = 0 #Different stats taken from scikit print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
def otsu_independent(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, changes=None): # We calculate the change map for the 1st reconstruction error image. Same principle as in otsu() function max_ = np.max(image_array_loss1) coef = max_ / 256 image_array_loss1 = image_array_loss1 / coef image_array_loss1 = np.asarray(image_array_loss1, dtype=int) threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss1.flatten())[0:int(H * W * threshold)]) image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss1.flatten() > val] = 1 # We calculate the change map for the 2nd reconstruction error image. Same principle as in otsu() function max_ = np.max(image_array_loss2) coef = max_ / 256 image_array_loss2 = image_array_loss2 / coef image_array_loss2 = np.asarray(image_array_loss2, dtype=int) threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss2.flatten())[0:int(H * W * threshold)]) image_array_outliers[image_array_loss2.flatten( ) > val] = 1 # we add the change pixels to the results obtained from the 1st image # We write tiff and shp outliers_image_mean = "Outliers_average_" + images_date + "_independent_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Int16, np.reshape(image_array_outliers, (H, W)), geo, proj) vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None # We calculate the classification stats if the ground truth if available if changes is not None: path_cm = 'C:/Users/Ekaterina_the_Great/Dropbox/IJCNN/images/' + changes path_cm = '/home/user/Dropbox/IJCNN/images/' + changes cm_truth_name = "mask_changes_small1" print(image_array_outliers.shape) if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 255] = 0 #Different stats taken from scikit print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
nmi_new_list = [] nmi_bi_list = [] iter = list(range(5, 51, 5)) for cl in iter: print("Clusters=" + str(cl)) path_results = os.path.expanduser( '~/Desktop/Results/Segmentation_outliers_upd_filled/Montpellier_SPOT5_graph_cut_series_2D/series_sigma_0.3_k_6_min_10_bands_3_threshold_int_0.4/Graph_coverage_filtered/alpha_0.4_t1_0.4_t2_0_t3_0.2/' ) loss_folder = "LSTM_linear_mean/Synopsys_padding_feat_150_lr_0.001.2019-10-04_1601/" image_name_loss = "Kmeans_initial_clusters_" + str(cl) image_name_loss = "Hierarchical_initial_clusters_" + str(cl) image_array_cl, H, W, geo, proj, bands_nb = open_tiff( path_results + loss_folder, image_name_loss) path_cm = '/home/user/Desktop/Results/Segmentation_outliers_upd_filled/' cm_truth_name = "GT_Classes_Montpellier1" cm_predicted = image_array_cl.flatten() ind = np.where(cm_predicted > 0)[0] # print(cm_predicted.shape) cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() # cm_truth[cm_truth == 1] = 0 ind = np.intersect1d( np.where(cm_predicted > 0)[0], np.where(cm_truth > 0)[0])
def main(): gpu = on_gpu() print("ON GPU is " + str(gpu)) #Parameters parser = argparse.ArgumentParser(description='train') parser.add_argument('--satellite', default="SPOT5", type=str, help="choose from SPOT5 and S2") parser.add_argument('--patch_size', default=9, type=int) parser.add_argument('--patch_size_ndvi', default=5, type=int) parser.add_argument('--nb_features', default=10, type=int, help="f parameter from the article") parser.add_argument('--batch_size', default=150, type=int) parser.add_argument( '--bands_to_keep', default=4, type=int, help= 'whether we delete swir band for spot-5 or blue for S2, defauld - all 4 bands' ) parser.add_argument('--epoch_nb', default=2, type=int) parser.add_argument('--learning_rate', default=0.0001, type=float) parser.add_argument('--noise_factor', default=0.25, type=float, help='for denoising AE, original images') parser.add_argument('--noise_factor_ndvi', default=None, type=float, help='for denoising AE, NDVI branch') parser.add_argument( '--centered', default=True, type=bool, help='whether we center data with mean and std before training') parser.add_argument( '--original_layers', default=[32, 32, 64, 64], type=list, help='Nb of conv. layers to build AE') #Default article model parser.add_argument( '--ndvi_layers', default=[16, 16, True], type=list, help='Nb of conv. layers to build AE and pooling option' ) #Default article model args = parser.parse_args() start_time = time.time() run_name = "." + str(time.strftime("%Y-%m-%d_%H%M%S")) print(run_name) # We define all the paths path_results_final = os.path.expanduser('~/Desktop/Results/TS_clustering/') if args.satellite == "SPOT5": path_datasets = os.path.expanduser( '~/Desktop/Datasets/Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask_vegetation_water_mode_parts_2004_no_DOS1_/' ) path_datasets_ndvi = os.path.expanduser( '~/Desktop/Results/TS_clustering/NDVI_results/NDVI_images/') folder_results = "Double_Trivial_feat_" + str( args.nb_features) + "_patch_" + str(args.patch_size) + run_name path_results = path_results_final + "Conv_3D/" + folder_results + "/" else: path_datasets = os.path.expanduser( '~/Desktop/Datasets/Montpellier_S2_Concatenated_1C_Clipped_norm_4096/' ) path_datasets_ndvi = os.path.expanduser( '~/Desktop/Results/TS_clustering/NDVI_results/NDVI_images_S2/') folder_results = "Double_Trivial_feat_" + str( args.nb_features) + "_patch_" + str(args.patch_size) + run_name path_results = path_results_final + "Conv_3D_S2/" + folder_results + "/" create_dir(path_results) stats_file = path_results + 'stats.txt' path_model = path_results + 'model' + run_name + "/" create_dir(path_model) print_stats(stats_file, str(args), print_to_console=True) parser.add_argument('--stats_file', default=stats_file) parser.add_argument('--path_results', default=path_results) parser.add_argument('--path_model', default=path_model) parser.add_argument('--run_name', default=run_name) args = parser.parse_args() # This part of the code opens and pre-processes the images before creating a dataset # This is the part for original images, i am lazy, so i will copy-paste it for ndvi images below #We open extended images images_list = os.listdir(path_datasets) path_list = [] list_image_extended = [] list_image_date = [] for image_name_with_extention in images_list: if image_name_with_extention.endswith( ".TIF") and not image_name_with_extention.endswith("band.TIF"): img_path = path_datasets + image_name_with_extention if args.satellite == "SPOT5": image_date = (re.search("_([0-9]*)_", image_name_with_extention)).group(1) else: image_date = (re.search("S2_([0-9]*).", image_name_with_extention)).group(1) path_list.append(img_path) image_array, H, W, geo, proj, bands_nb = open_tiff( path_datasets, os.path.splitext(image_name_with_extention)[0]) if args.bands_to_keep == 3: if args.satellite == "SPOT5": image_array = np.delete(image_array, 3, axis=0) if args.satellite == "S2": image_array = np.delete(image_array, 0, axis=0) # We deal with all the saturated pixels if args.satellite == "S2": for b in range(len(image_array)): image_array[b][image_array[b] > 4096] = np.max( image_array[b][image_array[b] <= 4096]) if args.satellite == "SPOT5": for b in range(len(image_array)): image_array[b][image_array[b] > 475] = np.max( image_array[b][image_array[b] <= 475]) bands_nb = args.bands_to_keep image_extended = extend( image_array, args.patch_size ) # we mirror image border rows and columns so we would be able to clip patches for the pixels from these rows and cols list_image_extended.append(image_extended) list_image_date.append(image_date) sort_ind = np.argsort( list_image_date) # we arrange images by date of acquisition list_image_extended = np.asarray(list_image_extended, dtype=float)[sort_ind] bands_nb = list_image_extended.shape[1] temporal_dim = list_image_extended.shape[0] list_image_date = np.asarray(list_image_date)[sort_ind] nbr_images = len(list_image_extended) print(list_image_date) if args.centered is True: list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([min, max, mean, std]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][2]) / list_norm[band][3] list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) list_norm.append([min, max]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][0]) / (list_norm[band][1] - list_norm[band][0]) list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([mean, std]) #We do exactly the same with NDVI images. I was lasy to create a separate function for this images_list_ndvi = os.listdir(path_datasets_ndvi) path_list_ndvi = [] list_image_extended_ndvi = [] list_image_date_ndvi = [] for image_name_with_extention_ndvi in images_list_ndvi: if image_name_with_extention_ndvi.endswith( ".TIF") and image_name_with_extention_ndvi.startswith("NDVI_"): img_path_ndvi = path_datasets_ndvi + image_name_with_extention_ndvi # print(img_path_ndvi) image_date_ndvi = (re.search( "_([0-9]*).", image_name_with_extention_ndvi)).group(1) # print(image_date_ndvi) # print_stats(stats_file, str(image_date), print_to_console=True) path_list_ndvi.append(img_path_ndvi) image_array_ndvi, H, W, geo, proj, _ = open_tiff( path_datasets_ndvi, os.path.splitext(image_name_with_extention_ndvi)[0]) image_array_ndvi = np.reshape(image_array_ndvi, (1, H, W)) image_extended_ndvi = extend(image_array_ndvi, args.patch_size_ndvi) list_image_extended_ndvi.append(image_extended_ndvi) list_image_date_ndvi.append(image_date_ndvi) sort_ind_ndvi = np.argsort( list_image_date_ndvi) # we arrange images by date of acquisition list_image_extended_ndvi = np.asarray(list_image_extended_ndvi, dtype=float)[sort_ind_ndvi] list_image_date_ndvi = np.asarray(list_image_date_ndvi)[sort_ind_ndvi] print(list_image_date_ndvi) if args.centered is True: list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm_ndvi.append([min, max, mean, std]) for i in range(len(list_image_extended_ndvi)): for band in range(len(list_image_extended_ndvi[0])): list_image_extended_ndvi[i][band] = ( list_image_extended_ndvi[i][band] - list_norm_ndvi[band][2]) / list_norm_ndvi[band][3] list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) list_norm_ndvi.append([min, max]) for i in range(len(list_image_extended_ndvi)): for band in range(len(list_image_extended_ndvi[0])): list_image_extended_ndvi[i][band] = ( list_image_extended_ndvi[i][band] - list_norm_ndvi[band][0] ) / (list_norm_ndvi[band][1] - list_norm_ndvi[band][0]) list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm_ndvi.append([mean, std]) # We create a training dataset from our SITS list_image_extended_tr = np.transpose(list_image_extended, (1, 0, 2, 3)) list_image_extended_ndvi_tr = np.transpose(list_image_extended_ndvi, (1, 0, 2, 3)) nbr_patches_per_image = H * W # Nbr of training patches for the dataset print_stats(stats_file, "Nbr of training patches " + str(nbr_patches_per_image), print_to_console=True) image = ImageDataset( list_image_extended_tr, list_image_extended_ndvi_tr, args.patch_size, args.patch_size_ndvi, range(nbr_patches_per_image)) #we create a dataset with tensor patches loader_pretrain = dsloader(image, gpu, args.batch_size, shuffle=True) image = None # We create encoder and decoder models if args.noise_factor is not None: encoder = Encoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers, np.asarray(list_norm), np.asarray(list_norm_ndvi), args.noise_factor, args.noise_factor_ndvi) # On CPU else: encoder = Encoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers) # On CPU decoder = Decoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers) # On CPU if gpu: encoder = encoder.cuda() # On GPU decoder = decoder.cuda() # On GPU print_stats(stats_file, str(encoder), print_to_console=False) # We pretrain the model pretrain(args.epoch_nb, encoder, decoder, loader_pretrain, args) end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats( args.stats_file, "Total time pretraining =" + str(total_time_pretraining) + "\n") # We pass to the encoding part start_time = time.time() # We create a dataset for SITS encoding, its size depends on the available memory image = None loader_pretrain = None image = ImageDataset(list_image_extended_tr, list_image_extended_ndvi_tr, args.patch_size, args.patch_size_ndvi, range( H * W)) # we create a dataset with tensor patches try: batch_size = W loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) except RuntimeError: try: batch_size = int(W / 5) loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) except RuntimeError: batch_size = int(W / 20) loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) image = None print_stats(stats_file, 'Encoding...') encoded_array = encoding(encoder, loader_enc_final, batch_size) # We stretch encoded images between 0 and 255 encoded_norm = [] for band in range(args.nb_features): min = np.min(encoded_array[:, band]) max = np.max(encoded_array[:, band]) encoded_norm.append([min, max]) for band in range(args.nb_features): encoded_array[:, band] = 255 * ( encoded_array[:, band] - encoded_norm[band][0]) / ( encoded_norm[band][1] - encoded_norm[band][0]) print(encoded_array.shape) # We write the image new_encoded_array = np.transpose(encoded_array, (1, 0)) ds = create_tiff( encoded_array.shape[-1], args.path_results + "Encoded_3D_conv_" + str(encoded_array.shape[-1]) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_encoded_array, (encoded_array.shape[-1], H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(-9999) ds = None end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats(stats_file, "Total time encoding =" + str(total_time_pretraining) + "\n")
def main(): gpu = on_gpu() print("ON GPU is " + str(gpu)) start_time = time.time() run_name = "." + str(time.strftime("%Y-%m-%d_%H%M")) print(run_name) #Parameters parser = argparse.ArgumentParser(description='train') parser.add_argument('--patch_size', default=9, type=int) parser.add_argument('--nb_features', default=5, type=int) parser.add_argument('--batch_size', default=150, type=int) parser.add_argument('--bands_to_keep', default=4, type=int) parser.add_argument('--epoch_nb', default=4, type=int) parser.add_argument('--satellite', default="SPOT5", type=str) parser.add_argument('--learning_rate', default=0.0001, type=float) args = parser.parse_args() # path with images to encode path_datasets = os.path.expanduser( '~/Desktop/Datasets/Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask_vegetation_water_mode_parts_2004_no_DOS1_/' ) # folder and path to results folder_results = "All_images_ep_" + str(args.epoch_nb) + "_patch_" + str( args.patch_size) + "_batch_" + str(args.batch_size) + "_feat_" + str( args.nb_features) + "_lr_" + str( args.learning_rate) + run_name + "_noise1" path_results = os.path.expanduser( '~/Desktop/Results/Encode_TS_noise/') + folder_results + "/" create_dir(path_results) # folder with AE models path_model = path_results + 'model' + run_name + "/" create_dir(path_model) # file with corresponding statistics stats_file = path_results + 'stats.txt' print_stats(stats_file, str(args), print_to_console=False) parser.add_argument('--stats_file', default=stats_file) parser.add_argument('--path_results', default=path_results) parser.add_argument('--path_model', default=path_model) parser.add_argument('--run_name', default=run_name) args = parser.parse_args() #We open images and "extend" them (we mirror border rows and columns for correct patch extraction) images_list = os.listdir(path_datasets) path_list = [] list_image_extended = [] list_image_date = [] for image_name_with_extention in images_list: if image_name_with_extention.endswith( ".TIF") and not image_name_with_extention.endswith("band.TIF"): img_path = path_datasets + image_name_with_extention path_list.append(img_path) image_date = (re.search("_([0-9]*)_", image_name_with_extention)).group(1) # we open images image_array, H, W, geo, proj, bands_nb = open_tiff( path_datasets, os.path.splitext(image_name_with_extention)[0]) # we delete swir bands for spot-5 or blue for Sentinel-2 if needed if args.bands_to_keep == 3: if args.satellite == "SPOT5": image_array = np.delete(image_array, 3, axis=0) else: image_array = np.delete(image_array, 0, axis=0) bands_nb = args.bands_to_keep # we extend image image_extended = extend(image_array, args.patch_size) list_image_extended.append(image_extended) list_image_date.append(image_date) sort_ind = np.argsort( list_image_date) # we arrange images by date of acquisition list_image_extended = np.asarray(list_image_extended, dtype=float)[sort_ind] list_image_date = np.asarray(list_image_date)[sort_ind] # We normalize all the images with dataset mean and std list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([min, max, mean, std]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][2]) / list_norm[band][3] # We rescale from 0 to 1 list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([min, max, mean, std]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][0]) / (list_norm[band][1] - list_norm[band][0]) # We recompute mean and std to use them for creation of Gaussian noise later list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([mean, std]) # We create training and validation datasets with H*W/(SITS_length)*2 patches by concatenating datasets created for every image image = None image_valid = None nbr_patches_per_image = int(H * W / len(list_image_extended) * 2) # nbr_patches_per_image = H * W for ii in range(len(list_image_extended)): samples_list = np.sort(sample(range(H * W), nbr_patches_per_image)) samples_list_valid = np.sort( sample(range(H * W), int(nbr_patches_per_image / 100))) if image is None: image = ImageDataset( list_image_extended[ii], args.patch_size, ii, samples_list) # we create a dataset with tensor patches image_valid = ImageDataset( list_image_extended[ii], args.patch_size, ii, samples_list_valid) # we create a dataset with tensor patches else: image2 = ImageDataset( list_image_extended[ii], args.patch_size, ii, samples_list) # we create a dataset with tensor patches image = torch.utils.data.ConcatDataset([image, image2]) image_valid2 = ImageDataset( list_image_extended[ii], args.patch_size, ii, samples_list_valid) # we create a dataset with tensor patches image_valid = torch.utils.data.ConcatDataset( [image_valid, image_valid2]) loader = dsloader(image, gpu, args.batch_size, shuffle=True) loader_valid = dsloader(image_valid, gpu, H, shuffle=False) # we create AE model encoder = Encoder(bands_nb, args.patch_size, args.nb_features, np.asarray(list_norm)) # On CPU decoder = Decoder(bands_nb, args.patch_size, args.nb_features) # On CPU if gpu: encoder = encoder.cuda() # On GPU decoder = decoder.cuda() # On GPU optimizer_encoder = torch.optim.Adam(encoder.parameters(), lr=args.learning_rate) optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.MSELoss() with open(path_results + "stats.txt", 'a') as f: f.write(str(encoder) + "\n") f.close() # Here we deploy early stopping algorithm taken from https://github.com/Bjarten/early-stopping-pytorch # to track the average training loss per epoch as the model trains avg_train_losses = [] # to track the average validation loss per epoch as the model trains avg_valid_losses = [] early_stopping = EarlyStopping(patience=1, verbose=True) # we train the model def train(epoch): encoder.train() decoder.train() train_loss_total = 0 for batch_idx, (data, _, _) in enumerate(loader): if gpu: data = data.cuda() encoded, id1 = encoder(Variable(data)) decoded = decoder(encoded, id1) loss = criterion(decoded, Variable(data)) train_loss_total += loss.item() optimizer_encoder.zero_grad() optimizer_decoder.zero_grad() loss.backward() optimizer_encoder.step() optimizer_decoder.step() if (batch_idx + 1) % 200 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( (epoch), (batch_idx + 1) * args.batch_size, len(samples_list) * len(list_image_extended), 100. * (batch_idx + 1) / len(loader), loss.item())) train_loss_total = train_loss_total / len(loader) epoch_stats = "Epoch {} Complete: Avg. Loss: {:.7f}".format( epoch, train_loss_total) print(epoch_stats) with open(path_results + "stats.txt", 'a') as f: f.write(epoch_stats + "\n") f.close() # We save trained model after each epoch. Optional torch.save([encoder, decoder], (path_model + 'ae-model_ep_' + str(epoch + 1) + "_loss_" + str(round(train_loss_total, 5)) + run_name + '.pkl')) torch.save( [encoder.state_dict(), decoder.state_dict()], (path_model + 'ae-dict_ep_' + str(epoch + 1) + "_loss_" + str(round(train_loss_total, 5)) + run_name + '.pkl')) #Validation part valid_loss_total = 0 encoder.eval() decoder.eval() # prep model for evaluation for batch_idx, (data, _, _) in enumerate(loader_valid): if gpu: data = data.cuda() # forward pass: compute predicted outputs by passing inputs to the model encoded, id1 = encoder(Variable(data)) decoded = decoder(encoded, id1) # calculate the loss loss = criterion(decoded, Variable(data)) # record validation loss valid_loss_total += loss.item() valid_loss_total = valid_loss_total / len(loader_valid) avg_train_losses.append(train_loss_total) avg_valid_losses.append(valid_loss_total) epoch_len = len(str(args.epoch_nb)) print_msg = (f'[{epoch:>{epoch_len}}/{args.epoch_nb:>{epoch_len}}] ' + f'train_loss: {train_loss_total:.5f} ' + f'valid_loss: {valid_loss_total:.5f}') print(print_msg) # We plot the loss if (epoch + 1) % 5 == 0: plotting(epoch, avg_train_losses, path_results) # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(valid_loss_total, [encoder, decoder]) for epoch in range(1, args.epoch_nb + 1): train(epoch) if early_stopping.early_stop: print("Early stopping") break end_time_learning = time.clock() total_time_learning = end_time_learning - start_time total_time_learning = str(datetime.timedelta(seconds=total_time_learning)) print_stats(args.stats_file, "Total time pretraining =" + str(total_time_learning) + "\n") # We get the best model (here by default it is the last one) best_epoch = epoch best_epoch_loss = avg_train_losses[best_epoch - 1] print("best epoch " + str(best_epoch)) print("best epoch loss " + str(best_epoch_loss)) best_encoder = encoder if gpu: best_encoder = best_encoder.cuda() # On GPU #ENCODING PART for ii in range(len(list_image_extended)): print("Encoding " + str(list_image_date[ii])) samples_list = np.array(range(H * W)) image_encode = ImageDataset( list_image_extended[ii], args.patch_size, ii, samples_list) # we create a dataset with tensor patches loader_encode = dsloader(image_encode, gpu, H, shuffle=False) name_results = list_image_date[ii] encode_image(best_encoder, loader_encode, H * 10, args.nb_features, gpu, H, W, geo, proj, name_results, path_results) end_time_encoding = time.time() total_time_encoding = end_time_encoding - end_time_learning total_time_encoding = str(datetime.timedelta(seconds=total_time_encoding)) print_stats(args.stats_file, "Total time encoding =" + str(total_time_encoding) + "\n")
def calculate_stats(folder_enc, segmentation_name, clustering_final_name, apply_mask_outliers=True, S2=False): print("S2", S2) stats_file = path_main + folder_enc + 'stats.txt' path_cm = os.path.expanduser('~/Desktop/Datasets/occupation_des_sols/') # We open Corina Land Cover GT maps, they have 3 levels of precision # We combinate different classes to create a desired GT map cm_truth_name = "clc_2008_lvl1" cm_truth_name2 = "clc_2008_lvl2" cm_truth_name3 = "clc_2008_lvl3" if S2: cm_truth_name = "clc_2017_lvl1" cm_truth_name2 = "clc_2017_lvl2" cm_truth_name3 = "clc_2017_lvl3" cm_truth, H, W, geo, proj, _ = open_tiff(path_cm, cm_truth_name) cm_truth2, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name2) cm_truth3, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name3) cm_truth = cm_truth.flatten() cm_truth2 = cm_truth2.flatten() cm_truth3 = cm_truth3.flatten() cm_truth[cm_truth == 1] = 1 # city cm_truth[cm_truth == 2] = 1 # industrial area cm_truth[cm_truth == 3] = 1 # extractions des materiaux cm_truth[cm_truth == 4] = 6 #espaces vertes cm_truth[cm_truth3 == 511] = 6 #Jardins familiaux cm_truth[cm_truth3 == 512] = 6 #Espaces libres urbains cm_truth[cm_truth3 == 513] = 513 #Cultures annuelles cm_truth[cm_truth3 == 514] = 514 # Prairies cm_truth[cm_truth3 == 521] = 521 # vignes cm_truth[cm_truth3 == 522] = 522 # vergers cm_truth[cm_truth3 == 523] = 523 # oliveraies cm_truth[cm_truth == 6] = 6 #espaces boisés cm_truth[cm_truth == 7] = 7 #espaces non-boisés cm_truth[cm_truth == 8] = 8 #sea cm_truth[cm_truth3 == 240] = 0 #aeroport _, cm_truth_mod = np.unique(cm_truth, return_inverse=True) print(np.unique(cm_truth)) ds = create_tiff(1, path_cm + cm_truth_name + "_custom", W, H, gdal.GDT_Int16, np.reshape(cm_truth_mod+1, (H,W)), geo, proj) vectorize_tiff(path_cm, cm_truth_name + "_custom", ds) ds.FlushCache() ds = None outliers_total, _, _, _, _, _ = open_tiff(path_main, "Outliers_total") mask = np.where(outliers_total.flatten() == 1)[0] for mean_or_median in ["mean", "median"]: print("Descriptor type " + mean_or_median) nmi_list = [] ari_list = [] print_stats(stats_file, "\n " + str("New classes"), print_to_console=True) print_stats(stats_file, "\n " + str(segmentation_name) + "_" + str(clustering_final_name), print_to_console=True) for cl in range(8, 16): print("Clusters="+str(cl)) image_name_clust = clustering_final_name + "_" + mean_or_median + "_" + str(cl) image_array_cl, H, W, geo, proj, _ = open_tiff(path_main + folder_enc + segmentation_name + "/" + clustering_final_name + "/", image_name_clust) cm_predicted = image_array_cl.flatten() cm_truth = cm_truth_mod ind = np.where(cm_predicted<0)[0] if len(ind)==1: cm_predicted[-1] = cm_predicted[-2] if apply_mask_outliers == True: ind = np.intersect1d(mask, np.where(cm_truth>0)[0]) else: ind = np.where(cm_truth > 0)[0] cm_truth = cm_truth[ind] cm_predicted = cm_predicted[ind] nmi = normalized_mutual_info_score(cm_truth, cm_predicted) ari = adjusted_rand_score(cm_truth, cm_predicted) print(nmi) print(ari) nmi_list.append(np.round(nmi,2)) ari_list.append(np.round(ari,2)) if apply_mask_outliers: print_stats(stats_file, mean_or_median + " WITH MASK", print_to_console=True) else: print_stats(stats_file, mean_or_median + " WITHOUT MASK", print_to_console=True) print_stats(stats_file, "NMI", print_to_console=True) print_stats(stats_file, str(nmi_list), print_to_console=True) print_stats(stats_file, "ARI", print_to_console=True) print_stats(stats_file, str(ari_list), print_to_console=True)
# We open encoded and segmented images path_list = [] list_image_encoded = [] list_image_date = [] segm_array_list = [] encoded_image_name_list = np.sort( list( filter(lambda f: (f.endswith(".TIF") and f.startswith("Encoded_")), os.listdir(path_encoded)))) for img in range(len(encoded_image_name_list)): image_name = encoded_image_name_list[img] image_name = os.path.splitext(image_name)[0] date = re.search("Encoded_([0-9]*)", str(image_name)).group(1) print(date) image_array, H, W, geo, proj, feat_nb = open_tiff(path_encoded, image_name) try: image_array_seg, H, W, geo, proj, _ = open_tiff( path_results, "Segments_1D_" + str(date)) segm_array_list.append(image_array_seg) list_image_encoded.append(image_array) list_image_date.append(date) except: continue sort_ind = np.argsort(list_image_date) list_image_date = np.asarray(list_image_date)[sort_ind] list_image_encoded = np.asarray(list_image_encoded, dtype=float)[sort_ind] nbr_images = len(list_image_date) print(nbr_images) segm_array_list = np.asarray(segm_array_list)[sort_ind]
# We open segmented images and construct candidates bb list. segm_array_list = [] # future stack of segmented images date_list = [] # image dates outliers_total_list = [] # list with the corresponding outliers masks (changes/ no changes) outliers_total = None image_name_segm_list = np.sort(list(filter(lambda f: (f.endswith(".TIF") and f.startswith("Segments_1D_20")), os.listdir(path_results)))) nbr_images = len(image_name_segm_list) for i in range(nbr_images): image_name_segm = image_name_segm_list[i] date = (re.search("_([0-9]*).TIF", image_name_segm)).group(1) print(date) date_list.append(date) image_array_seg, H, W, geo, proj, bands_nb = open_tiff(path_results, os.path.splitext(image_name_segm)[0]) segm_array_list.append(image_array_seg) # For each segmentation image we create a mask with no change areas # 0 - no changes, 1 - changes (corresponds to the segmented area) outliers_array = np.zeros((H, W)) outliers_array[image_array_seg != 0] = 1 # outliers_total correspond to global change/no change mask if outliers_total is None: outliers_total = np.zeros((H, W)) outliers_total += outliers_array outliers_total_list.append(outliers_array)
create_dir(path_results) path_model = path_results + 'model' + run_name + "/" #we will save the pretrained encoder/decoder models here create_dir(path_model) # We open all the images of time series images and mirror the borders. # Then we create 4D array with all the images of the dataset images_list = os.listdir(path_datasets) path_list = [] list_image_extended = [] for image_name_with_extention in images_list: if image_name_with_extention.endswith( ".TIF") and not image_name_with_extention.endswith("band.TIF"): img_path = path_datasets + image_name_with_extention path_list.append(img_path) image_array, H, W, geo, proj, bands_nb = open_tiff( path_datasets, os.path.splitext(image_name_with_extention)[0]) # We keep only essential bands if needed if bands_to_keep == 3: if satellite == "SPOT5": if bands_nb == 4: image_array = np.delete(image_array, 3, axis=0) bands_nb = 3 if bands_nb == 8: image_array = np.delete(image_array, [3, 7], axis=0) bands_nb = 6 if satellite == "S2": image_array = np.delete(image_array, 0, axis=0) image_extended = extend( image_array, patch_size) # We mirror the border rows and cols list_image_extended.append(image_extended)