def main(): folder_ndvi = "NDVI_results_S2" path_datasets = os.path.expanduser('~/Desktop/Datasets/Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask_vegetation_water_mode_parts_2004_no_DOS1_/') path_datasets = os.path.expanduser('~/Desktop/Datasets/Montpellier_S2_Concatenated_1C_Clipped_norm_4096/') path_results = os.path.expanduser('~/Desktop/Results/TS_clustering/') + folder_ndvi + "/" create_dir(path_results) #We open extended images images_list = os.listdir(path_datasets) path_list = [] for image_name_with_extention in images_list: if image_name_with_extention.startswith("Montpellier_") and image_name_with_extention.endswith(".TIF"): img_path = path_datasets + image_name_with_extention path_list.append(img_path) print(image_name_with_extention) image_date = (re.search("S2_([0-9]*).", image_name_with_extention)).group(1) print(image_date) image_array, H, W, geo, proj, bands_nb = open_tiff(path_datasets, os.path.splitext(image_name_with_extention)[0]) # ndvi = (image_array[2]-image_array[1])/(image_array[2]+image_array[1]) #for Sentinel-2 ndvi = (image_array[3]-image_array[2])/(image_array[3]+image_array[2]) #for SPOT-5 dst_ds = create_tiff(1, path_results + "NDVI_" + str(image_date) + ".TIF", W, H, gdal.GDT_Float32, np.reshape(ndvi, (H, W)), geo, proj) dst_ds = None
for cl in range(3, 16): print("Dealing with " + str(cl) + " clusters") labels = fcluster(Z, cl, criterion='maxclust') labels_median = fcluster(Z_median, cl, criterion='maxclust') new_labels = np.zeros((H * W)) - 9999 new_labels_median = np.zeros((H * W)) - 9999 for s in prange(len(segments)): segment = segments[s] ind_seg = np.where(segmented_array.flatten() == segment)[0] new_labels[ind_seg] = labels[s] new_labels_median[ind_seg] = labels_median[s] ds = create_tiff( 1, path_encoded + segmentation_name + "/" + clustering_final_name + "/" + clustering_final_name + "_mean_" + str(cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels, (H, W)), geo, proj) # vectorize_tiff(path_encoded + segmentation_name+ "/", "Hierarchical_" + str(cl), ds) ds.GetRasterBand(1).SetNoDataValue(-9999) ds.FlushCache() ds = None ds = create_tiff( 1, path_encoded + segmentation_name + "/" + clustering_final_name + "/" + clustering_final_name + "_median_" + str(cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels_median, (H, W)), geo, proj) # vectorize_tiff(path_encoded + segmentation_name + "/", "Hierarchical_" + str(cl), ds) ds.GetRasterBand(1).SetNoDataValue(-9999) ds.FlushCache()
pixelHeight) clipped_anomaly = ds_clipped_anomaly.GetRasterBand(1).ReadAsArray() clipped_t_t3 = image_array_outliers_nn_t_t3[yOffset:yOffset + y_res, xOffset:xOffset + x_res] * clipped_anomaly size_clipped_anomaly = np.count_nonzero(clipped_anomaly) if (len(np.where((clipped_anomaly.flatten()+clipped_t_t3.flatten())==2)[0])/size_clipped_anomaly)>=thr_int: intersection_arr_anomaly[yOffset:yOffset + y_res, xOffset:xOffset + x_res] = clipped_anomaly else: layer_no_intersection.DeleteFeature(feature2.GetFID()) ds_clipped_anomaly = None ds_anomaly = create_tiff(1, path_results + "Anomaly/" + loss_folder_nn_t_t1 + "Anomaly_" +image_name_nn_t_t1 + ".TIF", W, H, gdal.GDT_Byte, intersection_arr_anomaly, geo, proj) source_no_intersection.Destroy() # gdal.SieveFilter(ds_anomaly.GetRasterBand(1), None, ds_anomaly.GetRasterBand(1), 3, 4) # ds_anomaly.FlushCache() # vectorize_tiff(path_results_nn + "Anomaly/" + loss_folder_nn_t_t1, "/" + "Anomaly_" +image_name_nn_t_t1, ds_anomaly) # intersection_arr_anomaly = ds_anomaly.GetRasterBand(1).ReadAsArray() intersection_arr_anomaly_list.append(intersection_arr_anomaly) ds_anomaly = None else: #we deal with the first CM. we cannot correct it, so we just copy it in the new folder date1, date2 = dates_couples[d] loss_folder_nn_t_t1, image_name_nn_t_t1, image_array_outliers_nn_t_t1, ds1, H, W, geo, proj, bands_nb = open_image(date1, date2, path_results_nn_t_t1) create_dir(path_results+loss_folder_nn_t_t1)
def otsu(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, threshold=0.995, changes=None, mask=None): image_array_loss = np.divide((image_array_loss1 + image_array_loss2), 2) max_ = np.max(image_array_loss) coef = max_ / 256 image_array_loss = image_array_loss / coef image_array_loss = np.asarray(image_array_loss, dtype=int) if mask is not None: val = filters.threshold_otsu( np.sort( image_array_loss.flatten()[mask])[0:int(len(mask) * threshold)]) else: val = filters.threshold_otsu( np.sort(image_array_loss.flatten())[0:int(H * W * threshold)]) image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss.flatten() > val] = 1 if mask is not None: defected_mask = np.setdiff1d(np.arange(H * W), mask) image_array_outliers[defected_mask] = 0 outliers_image_mean = "Outliers_average_" + images_date + "_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Byte, np.reshape(image_array_outliers, (H, W)), geo, proj) gdal.SieveFilter(dst_ds.GetRasterBand(1), None, dst_ds.GetRasterBand(1), 5, 4) dst_ds.FlushCache() vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None if changes is not None: if changes in ["changes_2004_2005", "changes_2006_2008"]: path_cm = 'C:/Users/Ekaterina_the_Great/Dropbox/IJCNN/images/' + changes path_cm = '/home/user/Dropbox/IJCNN/images/' + changes path_cm = "/media/user/DATA/Results/RESULTS_CHANGE_DETECTION/GT_Montpellier/" + changes cm_truth_name = "mask_changes_small1" print(image_array_outliers.shape) if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() else: if changes in [ "changes_Rostov_20150830_20150919", "changes_Rostov_20170918_20180111" ]: print("hello") path_cm = "/media/user/DATA/Results/RESULTS_CHANGE_DETECTION/GT_Rostov/" cm_truth_name = changes + "_1" if changes == "changes_Rostov_20150830_20150919": print(image_array_outliers.shape) print(np.reshape(image_array_outliers, (H, W)).shape) cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:700, 0:900]).flatten() # cm_predicted = np.asarray(np.reshape(image_array_outliers, (H, W))[0:700, 0:900]).flatten() if changes == "changes_Rostov_20170918_20180111": cm_predicted = (np.reshape(image_array_outliers, (H, W))[2100:2400, 900:1400]).flatten() cm_predicted[cm_predicted == 0] = 0 cm_predicted[cm_predicted == 1] = 1 print(cm_predicted.shape) cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 0] = 0 cm_truth[cm_truth == 1] = 1 print(cm_truth.shape) cm_truth[cm_truth == 255] = 0 print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
def main(): gpu = on_gpu() print("ON GPU is " + str(gpu)) #Parameters parser = argparse.ArgumentParser(description='train') parser.add_argument('--patch_size', default=1, type=int) parser.add_argument('--nb_features', default=150, type=int, help="Number of hidden features in GRU.") parser.add_argument('--nb_features_final', default=10, type=int, help="Number of final features of the encoder.") parser.add_argument( '--nb_clusters', default=15, type=int, help= "Number of desired clusters. In case if we do not compute for a range of clusters." ) parser.add_argument('--batch_size', default=50, type=int) parser.add_argument('--epoch_nb', default=150, type=int) parser.add_argument('--learning_rate', default=0.001, type=float) args = parser.parse_args() #Start time start_time = time.time() run_name = "." + str(time.strftime("%Y-%m-%d_%H%M")) print(run_name) #Montpellier path_results_seg_series = os.path.expanduser( '~/Desktop/Results/Segmentation_outliers_upd_filled/Montpellier_SPOT5_graph_cut_series_2D/' ) seg_folder_series = "series_sigma_0.3_k_6_min_10_bands_3_threshold_int_0.4/" folder_encoded = "patch_9_feat_5.2019-09-03_1619_noise1_mean_std" path_results = path_results_seg_series + seg_folder_series + "Graph_coverage_filtered/" path_results_final = path_results + "alpha_" + str(alpha) + "_t1_" + str( t1) + "_t2_" + str(t2) + "_t3_" + str(t3) + "/" # We open BB file that contains synopses bb_final_list = np.load(path_results_final + "Graph_list_synopsys_alpha_" + str(alpha) + "_t1_" + str(t1) + "_t2_" + str(t2) + "_t3_" + str(t3) + "_" + folder_encoded + ".npy") for z in range(8): bb_final_list = np.c_[bb_final_list, np.full(len(bb_final_list), None)] folder_results = "Synopsys_padding_feat_" + str( args.nb_features) + "_lr_" + str(args.learning_rate) + run_name # Folder with the results path_results_NN = path_results_final + model + "_" + type + "/" + folder_results + "/" create_dir(path_results_NN) stats_file = path_results_NN + 'stats.txt' path_model = path_results_NN + 'model' + run_name + "/" create_dir(path_model) # We add new arguments to the parser print_stats(stats_file, folder_encoded, print_to_console=False) print_stats(stats_file, str(args), print_to_console=False) parser.add_argument('--stats_file', default=stats_file) parser.add_argument('--path_results', default=path_results_NN) parser.add_argument('--path_model', default=path_model) parser.add_argument('--run_name', default=run_name) args = parser.parse_args() # We open segmentation rasters segm_array_list = [] date_list = [] image_name_segm_list = np.sort( list( filter( lambda f: (f.endswith(".TIF") and f.startswith("Segments_1D_20")), os.listdir(path_results)))) nbr_images = len(image_name_segm_list) print(image_name_segm_list) for i in range(nbr_images): image_name_segm = image_name_segm_list[i] date = (re.search("_([0-9]*).TIF", image_name_segm)).group(1) print(date) date_list.append(date) image_array_seg, H, W, geo, proj, bands_nb = open_tiff( path_results, os.path.splitext(image_name_segm)[0]) segm_array_list.append(image_array_seg) nbr_images = np.max(bb_final_list[:, 0]) + 1 # we get synopses if type == "mean": segments = bb_final_list[:, 8] else: segments = bb_final_list[:, 7] feat_nb = len(segments[0][0]) # We zero-pad all the sequences, so they have the same length over the dataset (equal to dataset length). See the article segments_padding = np.zeros((len(bb_final_list), nbr_images, feat_nb)) for s in range(len(segments)): segments_padding[s][:len(segments[s])] = segments[s] print(segments_padding.shape) # We prepare the training dataset image = ImageDataset(segments_padding, args.patch_size, 0, np.arange(len(segments)), feat_nb) # we create a dataset with tensor patches loader_pretrain = dsloader(image, gpu, args.batch_size, shuffle=True) loader_enc = dsloader(image, gpu, batch_size=1000, shuffle=False) # We initialize the model encoder = Encoder(feat_nb, args.nb_features, args.nb_features_final) # On CPU decoder = Decoder(feat_nb, args.nb_features, args.nb_features_final) # On CPU if gpu: encoder = encoder.cuda() # On GPU decoder = decoder.cuda() # On GPU print_stats(stats_file, str(encoder), print_to_console=False) # We pretrain the model pretrain_lstm(args.epoch_nb, encoder, decoder, loader_pretrain, args) # pretrain_lstm(0, encoder, decoder, loader_pretrain, args) end_time = time.clock() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats( args.stats_file, "Total time pretraining =" + str(total_time_pretraining) + "\n") # We start encoding and clustering start_time = time.time() bb_final_list_flipped = np.flip(np.copy(bb_final_list), axis=0) print_stats(stats_file, 'Initializing clusters...') cl_nb = list(range(5, 51, 5)) labels_list, labels_h_list, hidden_array = encode_lstm( encoder, W, loader_enc, cl_nb) for c in range(len(cl_nb)): feat_cl = cl_nb[c] print(feat_cl) labels, labels_h = labels_list[c], labels_h_list[c] labels, labels_h = np.flip(labels, axis=0), np.flip(labels_h, axis=0) new_labels = np.zeros((H * W)) new_labels_h = np.zeros((H * W)) # We optionally write clustering results to the BB list for l in range(len(labels)): if feat_cl == 15: bb_final_list_flipped[l, 9] = labels_h[l] + 1 if feat_cl == 20: bb_final_list_flipped[l, 10] = labels_h[l] + 1 if feat_cl == 25: bb_final_list_flipped[l, 11] = labels_h[l] + 1 if feat_cl == 30: bb_final_list_flipped[l, 12] = labels_h[l] + 1 if feat_cl == 35: bb_final_list_flipped[l, 13] = labels_h[l] + 1 if feat_cl == 40: bb_final_list_flipped[l, 14] = labels_h[l] + 1 if feat_cl == 45: bb_final_list_flipped[l, 15] = labels_h[l] + 1 if feat_cl == 50: bb_final_list_flipped[l, 16] = labels_h[l] + 1 img, ind = bb_final_list_flipped[l, 0:2] coverage_ind = np.where(segm_array_list[img].flatten() == ind)[0] new_labels[coverage_ind] = labels[l] + 1 new_labels_h[coverage_ind] = labels_h[l] + 1 ds = create_tiff( 1, args.path_results + "Kmeans_initial_clusters_" + str(feat_cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels, (H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(0) vectorize_tiff(path_results, "Kmeans_initial_clusters_" + str(feat_cl), ds) ds = None ds = create_tiff( 1, args.path_results + "Hierarchical_initial_clusters_" + str(feat_cl) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_labels_h, (H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(0) vectorize_tiff(path_results, "Hierarchical_initial_clusters_" + str(feat_cl), ds) ds = None np.save( args.path_results + "Graph_list_synopsys_clusters_alpha_" + str(alpha) + "_t1_" + str(t1) + "_t2_" + str(t2) + "_t3_" + str(t3), np.flip(np.copy(bb_final_list_flipped), axis=0)) end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats(stats_file, "Total time encoding =" + str(total_time_pretraining) + "\n")
new_coordinates_loss_mean21 = np.asarray( new_coordinates_loss_mean21).flatten() if maskTrue: defected_mask = np.setdiff1d(np.arange(H * W), mask) new_coordinates_loss_mean12[defected_mask] = 0 new_coordinates_loss_mean21[defected_mask] = 0 else: defected_mask = None mask = None # We create a loss image in new coordinate system image_array_tr_mean = np.reshape(new_coordinates_loss_mean12, (H, W)) loss_image_name_mean = name_results12 loss_image_mean = path_results + "Loss_mean_" + loss_image_name_mean + ".TIF" dst_ds = create_tiff(1, loss_image_mean, W, H, gdal.GDT_Float32, image_array_tr_mean, geo, proj) dst_ds = None image_array_loss1 = image_array_tr_mean # We create a loss image in new coordinate system image_array_tr_mean = np.reshape(new_coordinates_loss_mean21, (H, W)) loss_image_name_mean = name_results21 loss_image_mean = path_results + "Loss_mean_" + loss_image_name_mean + ".TIF" dst_ds = create_tiff(1, loss_image_mean, W, H, gdal.GDT_Float32, image_array_tr_mean, geo, proj) dst_ds = None image_array_loss2 = image_array_tr_mean # we compute otsu thresholding for 2 different threshold paratemers 0.095 and 0.098 # the parameter "changes" is used only when we have a GT change map for this couple of images and we want to compute accuracy statistics otsu(image_array_loss1,
def otsu(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, changes=None): # We calculate the average reconstruction error image image_array_loss = np.divide((image_array_loss1 + image_array_loss2), 2) # We rescale the image values to 8 bits so it works with the functions from skimage max_ = np.max(image_array_loss) coef = max_ / 256 image_array_loss = image_array_loss / coef image_array_loss = np.asarray(image_array_loss, dtype=int) # THIS IS VERY IMPORTANT VALUE # Otsu threshold is automatic, however before applying it, we exclude 0.5% of the highest reconstruction error values as they ae considered to be outliers # This parameter can be modified if needed threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss.flatten()) [0:int(H * W * threshold)]) # Obtained threshold value # We get binary change map (1 - changes, 0 - no changes) using the threshold and write it to tiff and shp image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss.flatten() > val] = 1 outliers_image_mean = "Outliers_average_" + images_date + "_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Int16, np.reshape(image_array_outliers, (H, W)), geo, proj) vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None # We calculate the stats if the ground truth is available for this couple of images if changes is not None: # path of ground truth image, I have only 2 GT path_cm = '/home/user/Dropbox/IJCNN/images/' + changes cm_truth_name = "mask_changes_small1" if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 255] = 0 #Different stats taken from scikit print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
def otsu_independent(image_array_loss1, image_array_loss2, H, W, geo, proj, path_results, images_date, changes=None): # We calculate the change map for the 1st reconstruction error image. Same principle as in otsu() function max_ = np.max(image_array_loss1) coef = max_ / 256 image_array_loss1 = image_array_loss1 / coef image_array_loss1 = np.asarray(image_array_loss1, dtype=int) threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss1.flatten())[0:int(H * W * threshold)]) image_array_outliers = np.zeros(H * W) image_array_outliers[image_array_loss1.flatten() > val] = 1 # We calculate the change map for the 2nd reconstruction error image. Same principle as in otsu() function max_ = np.max(image_array_loss2) coef = max_ / 256 image_array_loss2 = image_array_loss2 / coef image_array_loss2 = np.asarray(image_array_loss2, dtype=int) threshold = 0.995 val = filters.threshold_otsu( np.sort(image_array_loss2.flatten())[0:int(H * W * threshold)]) image_array_outliers[image_array_loss2.flatten( ) > val] = 1 # we add the change pixels to the results obtained from the 1st image # We write tiff and shp outliers_image_mean = "Outliers_average_" + images_date + "_independent_" + str( threshold) dst_ds = create_tiff(1, path_results + "/" + outliers_image_mean + ".TIF", W, H, gdal.GDT_Int16, np.reshape(image_array_outliers, (H, W)), geo, proj) vectorize_tiff(path_results, "/" + outliers_image_mean, dst_ds) dst_ds = None # We calculate the classification stats if the ground truth if available if changes is not None: path_cm = 'C:/Users/Ekaterina_the_Great/Dropbox/IJCNN/images/' + changes path_cm = '/home/user/Dropbox/IJCNN/images/' + changes cm_truth_name = "mask_changes_small1" print(image_array_outliers.shape) if changes == "changes_2004_2005": cm_predicted = (np.reshape(image_array_outliers, (H, W))[0:600, 600:1400]).flatten() if changes == "changes_2006_2008": cm_predicted = (np.reshape(image_array_outliers, (H, W))[100:370, 1000:1320]).flatten() cm_truth, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name) cm_truth = cm_truth.flatten() cm_truth[cm_truth == 255] = 0 #Different stats taken from scikit print( classification_report(cm_truth, cm_predicted, target_names=["no changes", "changes"])) print(accuracy_score(cm_truth, cm_predicted)) print(cohen_kappa_score(cm_truth, cm_predicted)) conf = confusion_matrix(cm_truth, cm_predicted) print(confusion_matrix(cm_truth, cm_predicted)) omission = conf[1][0] / sum(conf[1]) print(omission)
j_min, j_max = np.min(ind_seg_j), np.max(ind_seg_j) image_seg = encoded_array[:, i_min:i_max + 1, j_min:j_max + 1] # we change BB's indices into new "coordinate system" that starts with zero ind_seg_i_mod = ind_seg_i - i_min ind_seg_j_mod = ind_seg_j - j_min # we extract the mask that corresponds to the backgroung of the segment in this BB mask = np.zeros((image_seg.shape[1:]), dtype=int) mask[ind_seg_i_mod, ind_seg_j_mod] = 1 # we perform the segmentation of the whole BB labels = segmented_array_enc[ i_min:i_max + 1, j_min:j_max + 1] #we open segmentation of the encoded image # we apply mask to extract only the segment of the interest and we create a temporal file with it labels = labels * mask geo_seg = geo ds = create_tiff(1, "", labels.shape[1], labels.shape[0], gdal.GDT_Float32, labels, geo_seg, proj) ds_mask = create_tiff(1, "", labels.shape[1], labels.shape[0], gdal.GDT_Float32, mask, geo_seg, proj) gdal.SieveFilter(ds.GetRasterBand(1), ds_mask.GetRasterBand(1), ds.GetRasterBand(1), min_obj_size + 1, 4) # we filter out small objects ds.FlushCache() # We correct segmentation in case we have two separate segments with the same label, because of the mask application on the original segmentation of bb labels = ds.GetRasterBand(1).ReadAsArray().astype(int) # print(labels) labels_no_zero = np.delete(labels.flatten(), np.where(labels.flatten() == 0)[0]) unique_labels, unique_labels_size = np.unique(labels_no_zero, return_counts=True) if len(unique_labels) > 1:
def main(): gpu = on_gpu() print("ON GPU is " + str(gpu)) #Parameters parser = argparse.ArgumentParser(description='train') parser.add_argument('--satellite', default="SPOT5", type=str, help="choose from SPOT5 and S2") parser.add_argument('--patch_size', default=9, type=int) parser.add_argument('--patch_size_ndvi', default=5, type=int) parser.add_argument('--nb_features', default=10, type=int, help="f parameter from the article") parser.add_argument('--batch_size', default=150, type=int) parser.add_argument( '--bands_to_keep', default=4, type=int, help= 'whether we delete swir band for spot-5 or blue for S2, defauld - all 4 bands' ) parser.add_argument('--epoch_nb', default=2, type=int) parser.add_argument('--learning_rate', default=0.0001, type=float) parser.add_argument('--noise_factor', default=0.25, type=float, help='for denoising AE, original images') parser.add_argument('--noise_factor_ndvi', default=None, type=float, help='for denoising AE, NDVI branch') parser.add_argument( '--centered', default=True, type=bool, help='whether we center data with mean and std before training') parser.add_argument( '--original_layers', default=[32, 32, 64, 64], type=list, help='Nb of conv. layers to build AE') #Default article model parser.add_argument( '--ndvi_layers', default=[16, 16, True], type=list, help='Nb of conv. layers to build AE and pooling option' ) #Default article model args = parser.parse_args() start_time = time.time() run_name = "." + str(time.strftime("%Y-%m-%d_%H%M%S")) print(run_name) # We define all the paths path_results_final = os.path.expanduser('~/Desktop/Results/TS_clustering/') if args.satellite == "SPOT5": path_datasets = os.path.expanduser( '~/Desktop/Datasets/Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask_vegetation_water_mode_parts_2004_no_DOS1_/' ) path_datasets_ndvi = os.path.expanduser( '~/Desktop/Results/TS_clustering/NDVI_results/NDVI_images/') folder_results = "Double_Trivial_feat_" + str( args.nb_features) + "_patch_" + str(args.patch_size) + run_name path_results = path_results_final + "Conv_3D/" + folder_results + "/" else: path_datasets = os.path.expanduser( '~/Desktop/Datasets/Montpellier_S2_Concatenated_1C_Clipped_norm_4096/' ) path_datasets_ndvi = os.path.expanduser( '~/Desktop/Results/TS_clustering/NDVI_results/NDVI_images_S2/') folder_results = "Double_Trivial_feat_" + str( args.nb_features) + "_patch_" + str(args.patch_size) + run_name path_results = path_results_final + "Conv_3D_S2/" + folder_results + "/" create_dir(path_results) stats_file = path_results + 'stats.txt' path_model = path_results + 'model' + run_name + "/" create_dir(path_model) print_stats(stats_file, str(args), print_to_console=True) parser.add_argument('--stats_file', default=stats_file) parser.add_argument('--path_results', default=path_results) parser.add_argument('--path_model', default=path_model) parser.add_argument('--run_name', default=run_name) args = parser.parse_args() # This part of the code opens and pre-processes the images before creating a dataset # This is the part for original images, i am lazy, so i will copy-paste it for ndvi images below #We open extended images images_list = os.listdir(path_datasets) path_list = [] list_image_extended = [] list_image_date = [] for image_name_with_extention in images_list: if image_name_with_extention.endswith( ".TIF") and not image_name_with_extention.endswith("band.TIF"): img_path = path_datasets + image_name_with_extention if args.satellite == "SPOT5": image_date = (re.search("_([0-9]*)_", image_name_with_extention)).group(1) else: image_date = (re.search("S2_([0-9]*).", image_name_with_extention)).group(1) path_list.append(img_path) image_array, H, W, geo, proj, bands_nb = open_tiff( path_datasets, os.path.splitext(image_name_with_extention)[0]) if args.bands_to_keep == 3: if args.satellite == "SPOT5": image_array = np.delete(image_array, 3, axis=0) if args.satellite == "S2": image_array = np.delete(image_array, 0, axis=0) # We deal with all the saturated pixels if args.satellite == "S2": for b in range(len(image_array)): image_array[b][image_array[b] > 4096] = np.max( image_array[b][image_array[b] <= 4096]) if args.satellite == "SPOT5": for b in range(len(image_array)): image_array[b][image_array[b] > 475] = np.max( image_array[b][image_array[b] <= 475]) bands_nb = args.bands_to_keep image_extended = extend( image_array, args.patch_size ) # we mirror image border rows and columns so we would be able to clip patches for the pixels from these rows and cols list_image_extended.append(image_extended) list_image_date.append(image_date) sort_ind = np.argsort( list_image_date) # we arrange images by date of acquisition list_image_extended = np.asarray(list_image_extended, dtype=float)[sort_ind] bands_nb = list_image_extended.shape[1] temporal_dim = list_image_extended.shape[0] list_image_date = np.asarray(list_image_date)[sort_ind] nbr_images = len(list_image_extended) print(list_image_date) if args.centered is True: list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([min, max, mean, std]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][2]) / list_norm[band][3] list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) list_norm.append([min, max]) for i in range(len(list_image_extended)): for band in range(len(list_image_extended[0])): list_image_extended[i][band] = ( list_image_extended[i][band] - list_norm[band][0]) / (list_norm[band][1] - list_norm[band][0]) list_norm = [] for band in range(len(list_image_extended[0])): all_images_band = list_image_extended[:, band, :, :].flatten() mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm.append([mean, std]) #We do exactly the same with NDVI images. I was lasy to create a separate function for this images_list_ndvi = os.listdir(path_datasets_ndvi) path_list_ndvi = [] list_image_extended_ndvi = [] list_image_date_ndvi = [] for image_name_with_extention_ndvi in images_list_ndvi: if image_name_with_extention_ndvi.endswith( ".TIF") and image_name_with_extention_ndvi.startswith("NDVI_"): img_path_ndvi = path_datasets_ndvi + image_name_with_extention_ndvi # print(img_path_ndvi) image_date_ndvi = (re.search( "_([0-9]*).", image_name_with_extention_ndvi)).group(1) # print(image_date_ndvi) # print_stats(stats_file, str(image_date), print_to_console=True) path_list_ndvi.append(img_path_ndvi) image_array_ndvi, H, W, geo, proj, _ = open_tiff( path_datasets_ndvi, os.path.splitext(image_name_with_extention_ndvi)[0]) image_array_ndvi = np.reshape(image_array_ndvi, (1, H, W)) image_extended_ndvi = extend(image_array_ndvi, args.patch_size_ndvi) list_image_extended_ndvi.append(image_extended_ndvi) list_image_date_ndvi.append(image_date_ndvi) sort_ind_ndvi = np.argsort( list_image_date_ndvi) # we arrange images by date of acquisition list_image_extended_ndvi = np.asarray(list_image_extended_ndvi, dtype=float)[sort_ind_ndvi] list_image_date_ndvi = np.asarray(list_image_date_ndvi)[sort_ind_ndvi] print(list_image_date_ndvi) if args.centered is True: list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm_ndvi.append([min, max, mean, std]) for i in range(len(list_image_extended_ndvi)): for band in range(len(list_image_extended_ndvi[0])): list_image_extended_ndvi[i][band] = ( list_image_extended_ndvi[i][band] - list_norm_ndvi[band][2]) / list_norm_ndvi[band][3] list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() min = np.min(all_images_band) max = np.max(all_images_band) list_norm_ndvi.append([min, max]) for i in range(len(list_image_extended_ndvi)): for band in range(len(list_image_extended_ndvi[0])): list_image_extended_ndvi[i][band] = ( list_image_extended_ndvi[i][band] - list_norm_ndvi[band][0] ) / (list_norm_ndvi[band][1] - list_norm_ndvi[band][0]) list_norm_ndvi = [] for band in range(len(list_image_extended_ndvi[0])): all_images_band = list_image_extended_ndvi[:, band, :, :].flatten() mean = np.mean(all_images_band) std = np.std(all_images_band) list_norm_ndvi.append([mean, std]) # We create a training dataset from our SITS list_image_extended_tr = np.transpose(list_image_extended, (1, 0, 2, 3)) list_image_extended_ndvi_tr = np.transpose(list_image_extended_ndvi, (1, 0, 2, 3)) nbr_patches_per_image = H * W # Nbr of training patches for the dataset print_stats(stats_file, "Nbr of training patches " + str(nbr_patches_per_image), print_to_console=True) image = ImageDataset( list_image_extended_tr, list_image_extended_ndvi_tr, args.patch_size, args.patch_size_ndvi, range(nbr_patches_per_image)) #we create a dataset with tensor patches loader_pretrain = dsloader(image, gpu, args.batch_size, shuffle=True) image = None # We create encoder and decoder models if args.noise_factor is not None: encoder = Encoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers, np.asarray(list_norm), np.asarray(list_norm_ndvi), args.noise_factor, args.noise_factor_ndvi) # On CPU else: encoder = Encoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers) # On CPU decoder = Decoder(bands_nb, args.patch_size, args.patch_size_ndvi, args.nb_features, temporal_dim, args.original_layers, args.ndvi_layers) # On CPU if gpu: encoder = encoder.cuda() # On GPU decoder = decoder.cuda() # On GPU print_stats(stats_file, str(encoder), print_to_console=False) # We pretrain the model pretrain(args.epoch_nb, encoder, decoder, loader_pretrain, args) end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats( args.stats_file, "Total time pretraining =" + str(total_time_pretraining) + "\n") # We pass to the encoding part start_time = time.time() # We create a dataset for SITS encoding, its size depends on the available memory image = None loader_pretrain = None image = ImageDataset(list_image_extended_tr, list_image_extended_ndvi_tr, args.patch_size, args.patch_size_ndvi, range( H * W)) # we create a dataset with tensor patches try: batch_size = W loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) except RuntimeError: try: batch_size = int(W / 5) loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) except RuntimeError: batch_size = int(W / 20) loader_enc_final = dsloader(image, gpu, batch_size=batch_size, shuffle=False) image = None print_stats(stats_file, 'Encoding...') encoded_array = encoding(encoder, loader_enc_final, batch_size) # We stretch encoded images between 0 and 255 encoded_norm = [] for band in range(args.nb_features): min = np.min(encoded_array[:, band]) max = np.max(encoded_array[:, band]) encoded_norm.append([min, max]) for band in range(args.nb_features): encoded_array[:, band] = 255 * ( encoded_array[:, band] - encoded_norm[band][0]) / ( encoded_norm[band][1] - encoded_norm[band][0]) print(encoded_array.shape) # We write the image new_encoded_array = np.transpose(encoded_array, (1, 0)) ds = create_tiff( encoded_array.shape[-1], args.path_results + "Encoded_3D_conv_" + str(encoded_array.shape[-1]) + ".TIF", W, H, gdal.GDT_Int16, np.reshape(new_encoded_array, (encoded_array.shape[-1], H, W)), geo, proj) ds.GetRasterBand(1).SetNoDataValue(-9999) ds = None end_time = time.time() total_time_pretraining = end_time - start_time total_time_pretraining = str( datetime.timedelta(seconds=total_time_pretraining)) print_stats(stats_file, "Total time encoding =" + str(total_time_pretraining) + "\n")
int(patch_size / 2)]), axis=0) if (batch_idx + 1) % 200 == 0: print('Encoding : [{}/{} ({:.0f}%)]'.format( (batch_idx + 1) * batch_size, len(loader.dataset), 100. * (batch_idx + 1) / len(loader))) new_coordinates_loss_mean12 = np.asarray( new_coordinates_loss_mean12).flatten() new_coordinates_loss_mean21 = np.asarray( new_coordinates_loss_mean21).flatten() # We create a loss image in new coordinate system for reconstruction of 2nd image from the 1st image_array_loss1 = np.reshape(new_coordinates_loss_mean12, (H, W)) loss_image_mean = path_results + "Loss_mean_" + name_results12 + ".TIF" dst_ds = create_tiff(1, loss_image_mean, W, H, gdal.GDT_Float32, image_array_loss1, geo, proj) dst_ds = None # We reconstruct the 2nd image from the 1st image_array_tr = np.reshape(new_coordinates_reconstructed12, (H, W, bands_nb)) image_array = np.transpose(image_array_tr, (2, 0, 1)) for b in range(len(list(image_array))): image_array[b] = image_array[b] * (list_norm[b][1] - list_norm[b][0]) + list_norm[b][0] reprojected_image = path_results + "Encoded_decoded_" + name_results12 + ".TIF" dst_ds = create_tiff(bands_nb, reprojected_image, W, H, gdal.GDT_Int16, image_array, geo, proj) dst_ds = None # We create a loss image in new coordinate system of 1st image from the 2nd
def calculate_stats(folder_enc, segmentation_name, clustering_final_name, apply_mask_outliers=True, S2=False): print("S2", S2) stats_file = path_main + folder_enc + 'stats.txt' path_cm = os.path.expanduser('~/Desktop/Datasets/occupation_des_sols/') # We open Corina Land Cover GT maps, they have 3 levels of precision # We combinate different classes to create a desired GT map cm_truth_name = "clc_2008_lvl1" cm_truth_name2 = "clc_2008_lvl2" cm_truth_name3 = "clc_2008_lvl3" if S2: cm_truth_name = "clc_2017_lvl1" cm_truth_name2 = "clc_2017_lvl2" cm_truth_name3 = "clc_2017_lvl3" cm_truth, H, W, geo, proj, _ = open_tiff(path_cm, cm_truth_name) cm_truth2, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name2) cm_truth3, _, _, _, _, _ = open_tiff(path_cm, cm_truth_name3) cm_truth = cm_truth.flatten() cm_truth2 = cm_truth2.flatten() cm_truth3 = cm_truth3.flatten() cm_truth[cm_truth == 1] = 1 # city cm_truth[cm_truth == 2] = 1 # industrial area cm_truth[cm_truth == 3] = 1 # extractions des materiaux cm_truth[cm_truth == 4] = 6 #espaces vertes cm_truth[cm_truth3 == 511] = 6 #Jardins familiaux cm_truth[cm_truth3 == 512] = 6 #Espaces libres urbains cm_truth[cm_truth3 == 513] = 513 #Cultures annuelles cm_truth[cm_truth3 == 514] = 514 # Prairies cm_truth[cm_truth3 == 521] = 521 # vignes cm_truth[cm_truth3 == 522] = 522 # vergers cm_truth[cm_truth3 == 523] = 523 # oliveraies cm_truth[cm_truth == 6] = 6 #espaces boisés cm_truth[cm_truth == 7] = 7 #espaces non-boisés cm_truth[cm_truth == 8] = 8 #sea cm_truth[cm_truth3 == 240] = 0 #aeroport _, cm_truth_mod = np.unique(cm_truth, return_inverse=True) print(np.unique(cm_truth)) ds = create_tiff(1, path_cm + cm_truth_name + "_custom", W, H, gdal.GDT_Int16, np.reshape(cm_truth_mod+1, (H,W)), geo, proj) vectorize_tiff(path_cm, cm_truth_name + "_custom", ds) ds.FlushCache() ds = None outliers_total, _, _, _, _, _ = open_tiff(path_main, "Outliers_total") mask = np.where(outliers_total.flatten() == 1)[0] for mean_or_median in ["mean", "median"]: print("Descriptor type " + mean_or_median) nmi_list = [] ari_list = [] print_stats(stats_file, "\n " + str("New classes"), print_to_console=True) print_stats(stats_file, "\n " + str(segmentation_name) + "_" + str(clustering_final_name), print_to_console=True) for cl in range(8, 16): print("Clusters="+str(cl)) image_name_clust = clustering_final_name + "_" + mean_or_median + "_" + str(cl) image_array_cl, H, W, geo, proj, _ = open_tiff(path_main + folder_enc + segmentation_name + "/" + clustering_final_name + "/", image_name_clust) cm_predicted = image_array_cl.flatten() cm_truth = cm_truth_mod ind = np.where(cm_predicted<0)[0] if len(ind)==1: cm_predicted[-1] = cm_predicted[-2] if apply_mask_outliers == True: ind = np.intersect1d(mask, np.where(cm_truth>0)[0]) else: ind = np.where(cm_truth > 0)[0] cm_truth = cm_truth[ind] cm_predicted = cm_predicted[ind] nmi = normalized_mutual_info_score(cm_truth, cm_predicted) ari = adjusted_rand_score(cm_truth, cm_predicted) print(nmi) print(ari) nmi_list.append(np.round(nmi,2)) ari_list.append(np.round(ari,2)) if apply_mask_outliers: print_stats(stats_file, mean_or_median + " WITH MASK", print_to_console=True) else: print_stats(stats_file, mean_or_median + " WITHOUT MASK", print_to_console=True) print_stats(stats_file, "NMI", print_to_console=True) print_stats(stats_file, str(nmi_list), print_to_console=True) print_stats(stats_file, "ARI", print_to_console=True) print_stats(stats_file, str(ari_list), print_to_console=True)
else: covered_grids_flatten_to_be_filled[image][coverage_ind] = 0 covered_grids_flatten_to_be_filled_by_bb[image, :, coverage_ind] = [0, 0] bb_final_list = np.delete(bb_final_list, to_delete, axis=0) np.save(path_results_final + "Graph_list_alpha_"+str(alpha)+"_t1_"+str(t1)+"_t2_"+str(t2) + "_t3_" + str(t3), bb_final_list) # We write the grids to rasters for i in range(nbr_images): grid = covered_grids_flatten[i] grid = np.reshape(grid, ((H, W))) ds = create_tiff(1, path_results_final + "BB_"+date_list[i]+"_alpha_"+str(alpha)+"_t1_"+str(t1)+"_t2_"+str(t2) + "_t3_" + str(t3) +".TIF", W, H, gdal.GDT_Int32, grid, geo, proj) ds.GetRasterBand(1).SetNoDataValue(-1) ds.FlushCache() # gdal.SieveFilter(ds.GetRasterBand(1), ds_outliers.GetRasterBand(1), ds.GetRasterBand(1), 4, 4) vectorize_tiff(path_results_final, "BB_"+date_list[i]+"_alpha_"+str(alpha)+"_t1_"+str(t1)+"_t2_"+str(t2) + "_t3_" + str(t3), ds) grid_filled = covered_grids_flatten_to_be_filled[i] grid_filled = np.reshape(grid_filled, ((H, W))) unique, count = np.unique(grid_filled, return_counts=True) # We compute the overall graph coverage if 0 in unique: perc =int(count[1]/(count[1]+count[2])*100) else: perc=0 print("Image " + str(i) + " has " + str(perc) + "% uncovered pixels")