def load_index_data(): data_path = os.getcwd() train_index = scipy.io.loadmat(os.path.join( data_path, 'TrainIndex.mat'))['TrainIndex'] test_index = scipy.io.loadmat(os.path.join(data_path, 'TestIndex.mat'))['TestIndex'] train_index = train_index[0] test_index = test_index[0] TrainData = {} TrainData['train_patch'] = np.array( [All_data['patch'][i] for i in train_index]) TrainLabel = [All_data['labels'][i] for i in train_index] TrainLabel = np.array(TrainLabel) TrainLabel = convertToOneHot(TrainLabel - 1, num_classes=Num_Classes) TrainData['train_labels'] = TrainLabel TestData = {} TestData['test_patch'] = np.array( [All_data['patch'][i] for i in test_index]) TestLabel = [All_data['labels'][i] for i in test_index] TestLabel = np.array(TestLabel) TestLabel = convertToOneHot(TestLabel - 1, num_classes=Num_Classes) TestData['test_labels'] = TestLabel return TrainData, TestData, train_index, test_index
def __init__(self, pathName, matName, patchSize, portionOrNum, ratio): # load data self.data = scipy.io.loadmat(pathName[0])[matName[0]] self.label = scipy.io.loadmat(pathName[1])[matName[1]] # prepare some basic propertities self.patchSize = patchSize self.numClasses = len(np.unique(self.label)) - 1 self.height = self.data.shape[0] self.width = self.data.shape[1] self.bands = self.data.shape[2] for i in range(self.numClasses): self.classPatches.append([]) self.classSpectrum.append([]) self.classIndex.append([]) # normalize and pad self.data = self.data.astype(float) for band in range(self.bands): # print(np.min(self.data[:,:,band])) self.data[:, :, band] = (self.data[:, :, band] - np.min(self.data[:, :, band])) / \ (np.max(self.data[:, :, band]) - np.min(self.data[:, :, band])) padSize = patchSize // 2 self.data = np.pad(self.data, ((padSize, padSize), (padSize, padSize), (0, 0)), "symmetric") self.__slice() if portionOrNum < 1: self.__prepareDataByPortion(portionOrNum) else: self.__prepareDataByNum(portionOrNum) if ratio != 0: self.__dataAugment(ratio) self.trainLabel = np.array(self.trainLabel) self.trainPatch = np.array(self.trainPatch) self.trainSpectrum = np.array(self.trainSpectrum) self.trainSpectrum = np.reshape(self.trainSpectrum, [-1, self.bands, 1]) self.testLabel = np.array(self.testLabel) self.testPatch = np.array(self.testPatch) self.testSpectrum = np.array(self.testSpectrum) self.testSpectrum = np.reshape(self.testSpectrum, [-1, self.bands, 1]) self.trainLabel = convertToOneHot(self.trainLabel, num_classes=self.numClasses) self.testLabel = convertToOneHot(self.testLabel, num_classes=self.numClasses) self.trainNum = self.trainLabel.shape[0] self.testNum = self.testLabel.shape[0]
def loadAllLabeledData(self, patchOnly=False): patch = [] spectrum = [] label = [] index = [] for i in range(self.numClasses): patch.extend(self.classPatches[i][j] for j in range(self.numEachClass[i])) spectrum.extend(self.classSpectrum[i][j] for j in range(self.numEachClass[i])) index.extend(self.classIndex[i][j] for j in range(self.numEachClass[i])) # print("hhh***", len(index), len(np.unique(index)),i) label.extend(i for j in range(self.numEachClass[i])) # with open("wtf4.txt","w+") as f: # for i in self.classIndex: # for j in i: # print("%5d"%j,end=" ",file=f) # print(file=f) patch = np.array(patch) spectrum = np.array(spectrum) label = convertToOneHot(np.array(label)) index = np.array(index) spectrum = np.reshape(spectrum, [-1, self.bands, 1]) if patchOnly: return patch, label, index else: return patch, spectrum, label, index
def loadAllLabeledData(self, patchOnly=False): patch = [] spectrum = [] label = [] index = [] for i in range(self.numClasses): patch.extend(self.classPatches[i][j] for j in range(self.numEachClass[i])) spectrum.extend(self.classSpectrum[i][j] for j in range(self.numEachClass[i])) index.extend(self.classIndex[i][j] for j in range(self.numEachClass[i])) label.extend(i for j in range(self.numEachClass[i])) patch = np.array(patch) spectrum = np.array(spectrum) label = convertToOneHot(np.array(label)) index = np.array(index) spectrum = np.reshape(spectrum, [-1, self.bands, 1]) if patchOnly: return patch, label, index else: return patch, spectrum, label, index
def __init__(self, pathName, matName, patchSize, portionOrNum, ratio): # load data self.data = scipy.io.loadmat(pathName[0])[matName[0]] self.label = scipy.io.loadmat(pathName[1])[matName[1]] # prepare some basic propertities self.patchSize = patchSize self.numClasses = len(np.unique(self.label)) - 1 self.height = self.data.shape[0] self.width = self.data.shape[1] self.bands = self.data.shape[2] for i in range(self.numClasses): self.classPatches.append([]) self.classSpectrum.append([]) self.classIndex.append([]) # normalize and pad self.data = self.data.astype(float) for band in range(self.bands): # print(np.min(self.data[:,:,band])) self.data[:, :, band] = (self.data[:, :, band] - np.min(self.data[:, :, band])) / \ (np.max(self.data[:, :, band]) - np.min(self.data[:, :, band])) padSize = patchSize // 2 # print(np.shape(self.data)) self.data = np.pad(self.data, ((padSize, padSize), (padSize, padSize), (0, 0)), "symmetric") # print(np.shape(self.data)) # print(self.height,self.width,self.bands) self.__slice() with open("seeData.txt", "w+") as f: last = np.shape(self.allPatch[0]) for a, i in enumerate(self.allPatch): if last != np.shape(i): print(a, "last: ", last, "i: ", np.shape(i)) last = np.shape(i) if portionOrNum < 1: self.__prepareDataByRatio(portionOrNum) else: self.__prepareDataByNum(portionOrNum) if ratio != 0: self.dataAugment(ratio) self.trainLabel = np.array(self.trainLabel) self.trainPatch = np.array(self.trainPatch) self.trainSpectrum = np.array(self.trainSpectrum) self.trainIndex = np.array(self.trainIndex) self.trainSpectrum = np.reshape(self.trainSpectrum, [-1, self.bands, 1]) self.testLabel = np.array(self.testLabel) self.testPatch = np.array(self.testPatch) self.testSpectrum = np.array(self.testSpectrum) self.testSpectrum = np.reshape(self.testSpectrum, [-1, self.bands, 1]) # print(np.shape(self.trainLabel)) self.trainLabel = convertToOneHot(self.trainLabel, num_classes=self.numClasses) self.testLabel = convertToOneHot(self.testLabel, num_classes=self.numClasses) self.trainNum = self.trainLabel.shape[0] self.testNum = self.testLabel.shape[0] for i in range(self.numClasses): self.allLabeledNum += self.numEachClass[i]
OS_Aug_Num_Training_Each = Resample_Num_Count # release list to elements Temp1,Temp2 = [],[] for k in range(Num_Classes): Temp1.extend(Train_Patch[k]) Temp2.extend(Train_Label[k]) Train_Patch = Temp1 Train_Label = Temp2 Train_Patch = np.array(Train_Patch) # Convert the labels to One-Hot vector Train_Label = np.array(Train_Label) Test_Label = np.array(Test_Label) Train_Label = convertToOneHot(Train_Label,num_classes=Num_Classes) Test_Label = convertToOneHot(Test_Label,num_classes=Num_Classes) ## Save the patches in segments # Train Data train_dict = {} num_train = len(Train_Patch) num_train_file = 10 num_each_file = int(num_train / num_train_file) res_num = num_train - num_train_file * num_each_file Num_Each_File = num_each_file * np.ones((1,num_train_file),dtype=int) Num_Each_File = Num_Each_File[0] Num_Each_File[num_train_file-1] = Num_Each_File[num_train_file-1] + res_num start = 0 for i in range(num_train_file): file_name = 'Train_'+str(patch_size)+'_'+str(i+1)+'.mat'
def train(self): n_folds = self._n_folds self.train_accs = np.zeros((n_folds)) self.val_accs = np.zeros((n_folds)) self.test_accs = np.zeros((n_folds)) self.train_f1s = np.zeros((n_folds)) self.val_f1s = np.zeros((n_folds)) self.test_f1s = np.zeros((n_folds)) self.max_f1s = np.zeros((n_folds)) y = convertToOneHot(self._labels) for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*self._picked_k_fold)): if fold !=1: print("skip fold : {}".format(fold)) else: assert set.intersection(set(train_idx), set(test_idx)) == set() assert set.intersection(set(train_idx), set(val_idx)) == set() assert set.union(set(train_idx), set(test_idx), set(val_idx)) == set(np.arange(self._N)) train_dataset = Dataset(self._graphs[train_idx], y[train_idx]) test_dataset = Dataset(self._graphs[test_idx], y[test_idx]) val_dataset = Dataset(self._graphs[val_idx], y[val_idx]) print('Split size') print(' - Training : {}/{}'.format(train_dataset.N, self._N )) print(' - Testing : {}/{}'.format(test_dataset.N, self._N )) print(' - Validation : {}/{}'.format(val_dataset.N, self._N )) # Change the saved directory self.system._params['save_dir'] = self._save_dir + '_{}'.format(fold) self.system._params['summary_dir'] = self._summary_dir + '_{}'.format(fold) self.system.train(train_dataset, val_dataset, resume=False) checkpoint_f1, checkpoint_loss = self.system.best_checkpoint # Average over 10 realization to be even more precise. self.system.params['average_validation'] = 10 pred_train, labels_train,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_f1) pred_valid, labels_valid,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_f1) pred_test, labels_test,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_f1) self.train_f1s[fold] = f1_score(labels_train, pred_train, average='weighted') self.train_accs[fold] = accuracy_score(labels_train, pred_train) self.val_f1s[fold] = f1_score(labels_valid, pred_valid, average='weighted') self.val_accs[fold] = accuracy_score(labels_valid, pred_valid) self.test_f1s[fold] = f1_score(labels_test, pred_test, average='weighted') self.test_accs[fold] = accuracy_score(labels_test, pred_test) self.max_f1s[fold] = self.system.max_f1 ret = dict() ret['train_accs'] = self.train_accs ret['val_accs'] = self.val_accs ret['test_accs'] = self.test_accs ret['train_f1s'] = self.train_f1s ret['val_f1s'] = self.val_f1s ret['test_f1s'] = self.test_f1s ret['max_f1s'] = self.max_f1s np.savez('result_rollback/{}/f1_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold) pred_train, labels_train,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss) pred_valid, labels_valid,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss) pred_test, labels_test,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss) self.train_f1s[fold] = f1_score(labels_train, pred_train, average='weighted') self.train_accs[fold] = accuracy_score(labels_train, pred_train) self.val_f1s[fold] = f1_score(labels_valid, pred_valid, average='weighted') self.val_accs[fold] = accuracy_score(labels_valid, pred_valid) self.test_f1s[fold] = f1_score(labels_test, pred_test, average='weighted') self.test_accs[fold] = accuracy_score(labels_test, pred_test) self.max_f1s[fold] = self.system.max_f1 ret = dict() ret['train_accs'] = self.train_accs ret['val_accs'] = self.val_accs ret['test_accs'] = self.test_accs ret['train_f1s'] = self.train_f1s ret['val_f1s'] = self.val_f1s ret['test_f1s'] = self.test_f1s ret['max_f1s'] = self.max_f1s np.savez('result_rollback/{}/loss_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold) return ret
def train(self): n_folds = self._n_folds self.train_accs = np.zeros((n_folds)) self.val_accs = np.zeros((n_folds)) self.test_accs = np.zeros((n_folds)) self.train_f1s = np.zeros((n_folds)) self.val_f1s = np.zeros((n_folds)) self.test_f1s = np.zeros((n_folds)) self.max_f1s = np.zeros((n_folds)) y = convertToOneHot(self._labels) for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*self._picked_k_fold)): if fold == 1: raise ValueError('stop') assert set.intersection(set(train_idx), set(test_idx)) == set() assert set.intersection(set(train_idx), set(val_idx)) == set() assert set.union(set(train_idx), set(test_idx), set(val_idx)) == set(np.arange(self._N)) if self._node_attributes is not None: train_dataset = Dataset(self._graphs[train_idx], y[train_idx], node_attributes=self._node_attributes[train_idx]) test_dataset = Dataset(self._graphs[test_idx], y[test_idx], node_attributes=self._node_attributes[test_idx]) val_dataset = Dataset(self._graphs[val_idx], y[val_idx], node_attributes=self._node_attributes[val_idx]) print("we use node attributes") else: train_dataset = Dataset(self._graphs[train_idx], y[train_idx]) test_dataset = Dataset(self._graphs[test_idx], y[test_idx]) val_dataset = Dataset(self._graphs[val_idx], y[val_idx]) print("we don't use node attributes") print('Split size') print(' - Training : {}/{}'.format(train_dataset.N, self._N )) print(' - Testing : {}/{}'.format(test_dataset.N, self._N )) print(' - Validation : {}/{}'.format(val_dataset.N, self._N )) # Change the saved directory #embed() self.system._params['save_dir'] = self._save_dir + '_{}'.format(fold) self.system._params['summary_dir'] = self._summary_dir + '_{}'.format(fold) #embed() self.system.train(train_dataset, val_dataset, test_dataset, resume=False) #embed() #raise ValueError('he') checkpoint_f1, checkpoint_loss = self.system.best_checkpoint # Average over 10 realization to be even more precise. self.system.params['average_validation'] = 1 #pred_train, labels_train,_,_,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss) #pred_valid, labels_valid,_,_,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss) #pred_test, labels_test,_,_,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss) # #self.train_f1s[fold] = f1_score(labels_train, pred_train, average='weighted') #self.train_accs[fold] = accuracy_score(labels_train, pred_train) #self.val_f1s[fold] = f1_score(labels_valid, pred_valid, average='weighted') #self.val_accs[fold] = accuracy_score(labels_valid, pred_valid) #self.test_f1s[fold] = f1_score(labels_test, pred_test, average='weighted') #self.test_accs[fold] = accuracy_score(labels_test, pred_test) # #self.max_f1s[fold] = self.system.max_f1 # #ret = dict() #ret['train_accs'] = self.train_accs #ret['val_accs'] = self.val_accs #ret['test_accs'] = self.test_accs #ret['train_f1s'] = self.train_f1s #ret['val_f1s'] = self.val_f1s #ret['test_f1s'] = self.test_f1s #ret['max_f1s'] = self.max_f1s # #np.savez('result_vis/{}/f1_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold) pred_train, labels_train,_,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss) pred_valid, labels_valid,_,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss) pred_test, labels_test,_, save_info = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss, savefeat=True) #embed() self.train_f1s[fold] = f1_score(labels_train, pred_train, average='weighted') self.train_accs[fold] = accuracy_score(labels_train, pred_train) self.val_f1s[fold] = f1_score(labels_valid, pred_valid, average='weighted') self.val_accs[fold] = accuracy_score(labels_valid, pred_valid) self.test_f1s[fold] = f1_score(labels_test, pred_test, average='weighted') self.test_accs[fold] = accuracy_score(labels_test, pred_test) self.max_f1s[fold] = self.system.max_f1 ret = dict() ret['train_accs'] = self.train_accs ret['val_accs'] = self.val_accs ret['test_accs'] = self.test_accs ret['train_f1s'] = self.train_f1s ret['val_f1s'] = self.val_f1s ret['test_f1s'] = self.test_f1s ret['max_f1s'] = self.max_f1s ret['save_info'] = save_info #ret['node_embedding'] = node_embedding #ret['graph_embedding'] = graph_embedding #ret['test_idx'] = test_idx np.savez('result_vis/{}/loss_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold) return ret