def __init__( self, isExtract=True, root_dir="C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\newest", filename="4Band_Subtracted_20040514_20050427"): """ Args: root_dir (string): the path of the file file_name (string): the name of the picture isExtract (boolean): use the mask to extract changed area """ self.isExtract = isExtract self.root_dir = root_dir self.file_name = filename self.dataset = oi.open_tiff(root_dir, filename) self.H = self.dataset[1] self.W = self.dataset[2] self.n_bands = self.dataset[3] self.npdataset = art.tif2vec( self.dataset[0]) #flatten and transform the array if self.isExtract: # extract out the changed area self.select_path = "C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\EXTRACT" self.select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_" self.simg = oi.open_tiff(self.select_path, self.select_img) self.select = self.simg[0] #(2720000) self.changePos = DataProcess.selectArea(self.select, self.n_bands, -1, isStack=True) self.ns_changePos = DataProcess.selectArea(self.select, self.n_bands, -1, isStack=False) self.ns_nonChangePos = DataProcess.selectArea(self.select, self.n_bands, 0, isStack=False) self.npdataset = self.npdataset[self.changePos].reshape( -1, self.n_bands) # normalization self.nmax = self.npdataset.max(axis=0) self.nmin = self.npdataset.min(axis=0) self.norm_data = (self.npdataset - self.nmin) / (self.nmax - self.nmin) # TODO:don't know what's for yet, add only to be compatible to TensorDataset self.target_data = np.zeros_like(self.norm_data) #clear the memory # self.simg=None self.dataset = None
def extract_compareClustering(clusterClass): # Get data, n_bands=4 norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest" img = "4Band_Subtracted_20040514_20050427" dataset = oi.open_tiff(norm_img_path, img) H = dataset[1] W = dataset[2] n_bands = dataset[3] org_data = art.tif2vec(dataset[0]) #NOTE: this step is really important select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT" select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_" simg = oi.open_tiff(select_path, select_img) select = simg[0] #(2720000) changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True) ns_changePos = DataProcess.selectArea(select, n_bands, -1, isStack=False) ns_nonChangePos = DataProcess.selectArea(select, n_bands, 0, isStack=False) X_train = org_data[changePos].reshape(-1, n_bands) result = np.zeros_like(select.reshape(-1, 1)) for cls_name, cls_class in clusterClass.items(): print("running", cls_name, "...") t0 = time.clock() cls_class.fit(X_train) usingTime = time.clock() - t0 # combine the result result[ns_changePos] = cls_class.labels_ result[ns_nonChangePos] = np.max(cls_class.labels_) + 1 evaluation = silhouette_score(X=org_data, labels=result, metric='euclidean', sample_size=10000) save_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\sklearn_clustering\\compare" DataProcess.visualize_class( result.reshape(H, W), save_path + '\\' + cls_name + "_change_area_class") # save using time print("save the information to txt file...") with open( save_path + '/' + "Outlier Detection Algorithms Running Time.txt", 'a') as f: f.write("detetion algorithm: " + cls_name + "\nsilhouette_score:" + str(evaluation) + "\ndetection using time: " + str(usingTime)) f.write("\n----------------------------------------------\n")
def __init__( self, filename, isExtract=True, root_dir="C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\Montpellier_SPOT5_Clipped_relatively_normalized_03_02_mask1" ): """ Args: root_dir (string): the path of the file file_name (string): the name of the picture """ self.root_dir = root_dir self.file_name = filename self.dataset = oi.open_tiff(root_dir, filename) self.H = self.dataset[1] #1700 self.W = self.dataset[2] #1600 self.n_bands = self.dataset[3] self.geo = self.dataset[4] self.prj = self.dataset[5] self.norm_data = self.normalization() # TODO:don't know what's for yet, add only to be compatible to TensorDataset self.target_data = np.zeros_like(self.norm_data) #clear the memory self.dataset = None
def RunPyodOutlier(classifiers, outlier_save_path, isExtract=True): # Get data, n_bands=4 norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest" img = "4Band_Subtracted_20040514_20050427" dataset = oi.open_tiff(norm_img_path, img) H = dataset[1] W = dataset[2] n_bands = dataset[3] org_data = art.tif2vec(dataset[0]) #NOTE: this step is really important #NOTE: Normalize the scale of the orignialdata org_data = org_data / org_data.max(axis=0) #TODO: normalize the data? if isExtract: # extract out the changed area select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT" select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_" simg = oi.open_tiff(select_path, select_img) select = simg[0] #(2720000) changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True) ns_changePos = DataProcess.selectArea(select, n_bands, -1, isStack=False) ns_nonChangePos = DataProcess.selectArea(select, n_bands, 0, isStack=False) X_train = org_data[changePos].reshape(-1, n_bands) print("shape of original data: ", org_data.shape) print("shape of extracted data: ", X_train.shape) # to save the final result outlier_result = np.zeros_like(select.reshape(-1, 1)) score_result = np.empty_like(select.reshape(-1, 1)) else: X_train = org_data.reshape(-1, n_bands) print("shape of training data: ", X_train.shape) for clf_name, clf in classifiers.items(): if not isExtract: clf_name = "no_extract_" + clf_name print("running " + clf_name + "...") t0 = time.clock() clf.fit(X_train) usingTime = time.clock() - t0 # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores if isExtract: # combine the extraction non-changed label&&scores and the algorithm result outlier_result[ns_changePos] = y_train_pred outlier_result[ns_nonChangePos] = 0 score_result[ns_changePos] = DataProcess.scaleNormalize( y_train_scores, (0, 500)).reshape(-1, ) score_result[ns_nonChangePos] = 0 #save the outlier detection result as .tif and .shp file else: # combine the extraction non-changed label and the algorithm result outlier_result = y_train_pred score_result = DataProcess.scaleNormalize(y_train_scores, (0, 500)).reshape(-1, ) print("the scale of the y_train_score is:", y_train_scores.min(), y_train_scores.max()) print("the scale of the score_result is:", score_result.min(), score_result.max()) DataProcess.int_to_csv(outlier_save_path, img, outlier_result, clf_name + "_outliers") GeoProcess.getSHP(norm_img_path, img, outlier_save_path, clf_name + "_outliers", outlier_result) #save the outlier scores as heatmap DataProcess.saveHeatMap(score_result.reshape(H, W), outlier_save_path + "\\" + clf_name) print("save the information to txt file...") with open( outlier_save_path + '/' + "Outlier Detection Algorithms Running Time.txt", 'a') as f: f.write("detetion algorithm: " + clf_name + "\ndetection using time: " + str(usingTime)) f.write("\n----------------------------------------------\n")