def load_index_data():
    data_path = os.getcwd()
    train_index = scipy.io.loadmat(os.path.join(
        data_path, 'TrainIndex.mat'))['TrainIndex']
    test_index = scipy.io.loadmat(os.path.join(data_path,
                                               'TestIndex.mat'))['TestIndex']
    train_index = train_index[0]
    test_index = test_index[0]

    TrainData = {}
    TrainData['train_patch'] = np.array(
        [All_data['patch'][i] for i in train_index])
    TrainLabel = [All_data['labels'][i] for i in train_index]
    TrainLabel = np.array(TrainLabel)
    TrainLabel = convertToOneHot(TrainLabel - 1, num_classes=Num_Classes)
    TrainData['train_labels'] = TrainLabel

    TestData = {}
    TestData['test_patch'] = np.array(
        [All_data['patch'][i] for i in test_index])
    TestLabel = [All_data['labels'][i] for i in test_index]
    TestLabel = np.array(TestLabel)
    TestLabel = convertToOneHot(TestLabel - 1, num_classes=Num_Classes)
    TestData['test_labels'] = TestLabel
    return TrainData, TestData, train_index, test_index
Esempio n. 2
0
    def __init__(self, pathName, matName, patchSize, portionOrNum, ratio):
        # load data
        self.data = scipy.io.loadmat(pathName[0])[matName[0]]
        self.label = scipy.io.loadmat(pathName[1])[matName[1]]

        # prepare some basic propertities
        self.patchSize = patchSize
        self.numClasses = len(np.unique(self.label)) - 1
        self.height = self.data.shape[0]
        self.width = self.data.shape[1]
        self.bands = self.data.shape[2]

        for i in range(self.numClasses):
            self.classPatches.append([])
            self.classSpectrum.append([])
            self.classIndex.append([])

        # normalize and pad
        self.data = self.data.astype(float)
        for band in range(self.bands):
            # print(np.min(self.data[:,:,band]))
            self.data[:, :, band] = (self.data[:, :, band] - np.min(self.data[:, :, band])) / \
                  (np.max(self.data[:, :, band]) - np.min(self.data[:, :, band]))
        padSize = patchSize // 2
        self.data = np.pad(self.data,
                           ((padSize, padSize), (padSize, padSize), (0, 0)),
                           "symmetric")

        self.__slice()
        if portionOrNum < 1:
            self.__prepareDataByPortion(portionOrNum)
        else:
            self.__prepareDataByNum(portionOrNum)
        if ratio != 0:
            self.__dataAugment(ratio)

        self.trainLabel = np.array(self.trainLabel)
        self.trainPatch = np.array(self.trainPatch)
        self.trainSpectrum = np.array(self.trainSpectrum)
        self.trainSpectrum = np.reshape(self.trainSpectrum,
                                        [-1, self.bands, 1])
        self.testLabel = np.array(self.testLabel)
        self.testPatch = np.array(self.testPatch)
        self.testSpectrum = np.array(self.testSpectrum)
        self.testSpectrum = np.reshape(self.testSpectrum, [-1, self.bands, 1])

        self.trainLabel = convertToOneHot(self.trainLabel,
                                          num_classes=self.numClasses)
        self.testLabel = convertToOneHot(self.testLabel,
                                         num_classes=self.numClasses)
        self.trainNum = self.trainLabel.shape[0]
        self.testNum = self.testLabel.shape[0]
Esempio n. 3
0
 def loadAllLabeledData(self, patchOnly=False):
     patch = []
     spectrum = []
     label = []
     index = []
     for i in range(self.numClasses):
         patch.extend(self.classPatches[i][j]
                      for j in range(self.numEachClass[i]))
         spectrum.extend(self.classSpectrum[i][j]
                         for j in range(self.numEachClass[i]))
         index.extend(self.classIndex[i][j]
                      for j in range(self.numEachClass[i]))
         # print("hhh***", len(index), len(np.unique(index)),i)
         label.extend(i for j in range(self.numEachClass[i]))
     # with open("wtf4.txt","w+") as f:
     # 	for i in self.classIndex:
     # 		for j in i:
     # 			print("%5d"%j,end=" ",file=f)
     # 		print(file=f)
     patch = np.array(patch)
     spectrum = np.array(spectrum)
     label = convertToOneHot(np.array(label))
     index = np.array(index)
     spectrum = np.reshape(spectrum, [-1, self.bands, 1])
     if patchOnly:
         return patch, label, index
     else:
         return patch, spectrum, label, index
Esempio n. 4
0
 def loadAllLabeledData(self, patchOnly=False):
     patch = []
     spectrum = []
     label = []
     index = []
     for i in range(self.numClasses):
         patch.extend(self.classPatches[i][j]
                      for j in range(self.numEachClass[i]))
         spectrum.extend(self.classSpectrum[i][j]
                         for j in range(self.numEachClass[i]))
         index.extend(self.classIndex[i][j]
                      for j in range(self.numEachClass[i]))
         label.extend(i for j in range(self.numEachClass[i]))
     patch = np.array(patch)
     spectrum = np.array(spectrum)
     label = convertToOneHot(np.array(label))
     index = np.array(index)
     spectrum = np.reshape(spectrum, [-1, self.bands, 1])
     if patchOnly:
         return patch, label, index
     else:
         return patch, spectrum, label, index
Esempio n. 5
0
    def __init__(self, pathName, matName, patchSize, portionOrNum, ratio):
        # load data
        self.data = scipy.io.loadmat(pathName[0])[matName[0]]
        self.label = scipy.io.loadmat(pathName[1])[matName[1]]

        # prepare some basic propertities
        self.patchSize = patchSize
        self.numClasses = len(np.unique(self.label)) - 1
        self.height = self.data.shape[0]
        self.width = self.data.shape[1]
        self.bands = self.data.shape[2]

        for i in range(self.numClasses):
            self.classPatches.append([])
            self.classSpectrum.append([])
            self.classIndex.append([])

        # normalize and pad
        self.data = self.data.astype(float)
        for band in range(self.bands):
            # print(np.min(self.data[:,:,band]))
            self.data[:, :, band] = (self.data[:, :, band] - np.min(self.data[:, :, band])) / \
                  (np.max(self.data[:, :, band]) - np.min(self.data[:, :, band]))
        padSize = patchSize // 2
        # print(np.shape(self.data))
        self.data = np.pad(self.data,
                           ((padSize, padSize), (padSize, padSize), (0, 0)),
                           "symmetric")
        # print(np.shape(self.data))
        # print(self.height,self.width,self.bands)

        self.__slice()
        with open("seeData.txt", "w+") as f:
            last = np.shape(self.allPatch[0])
            for a, i in enumerate(self.allPatch):
                if last != np.shape(i):
                    print(a, "last: ", last, "i: ", np.shape(i))
                last = np.shape(i)
        if portionOrNum < 1:
            self.__prepareDataByRatio(portionOrNum)
        else:
            self.__prepareDataByNum(portionOrNum)
        if ratio != 0:
            self.dataAugment(ratio)

        self.trainLabel = np.array(self.trainLabel)
        self.trainPatch = np.array(self.trainPatch)
        self.trainSpectrum = np.array(self.trainSpectrum)
        self.trainIndex = np.array(self.trainIndex)
        self.trainSpectrum = np.reshape(self.trainSpectrum,
                                        [-1, self.bands, 1])
        self.testLabel = np.array(self.testLabel)
        self.testPatch = np.array(self.testPatch)
        self.testSpectrum = np.array(self.testSpectrum)
        self.testSpectrum = np.reshape(self.testSpectrum, [-1, self.bands, 1])

        # print(np.shape(self.trainLabel))
        self.trainLabel = convertToOneHot(self.trainLabel,
                                          num_classes=self.numClasses)
        self.testLabel = convertToOneHot(self.testLabel,
                                         num_classes=self.numClasses)
        self.trainNum = self.trainLabel.shape[0]
        self.testNum = self.testLabel.shape[0]

        for i in range(self.numClasses):
            self.allLabeledNum += self.numEachClass[i]
    OS_Aug_Num_Training_Each = Resample_Num_Count
            
# release list to elements             
Temp1,Temp2 = [],[]
for k in range(Num_Classes):
    Temp1.extend(Train_Patch[k])
    Temp2.extend(Train_Label[k])
Train_Patch = Temp1
Train_Label = Temp2

Train_Patch = np.array(Train_Patch)

# Convert the labels to One-Hot vector
Train_Label = np.array(Train_Label)
Test_Label = np.array(Test_Label)
Train_Label = convertToOneHot(Train_Label,num_classes=Num_Classes)
Test_Label = convertToOneHot(Test_Label,num_classes=Num_Classes)
                      
## Save the patches in segments
# Train Data
train_dict = {}
num_train = len(Train_Patch)
num_train_file = 10
num_each_file = int(num_train / num_train_file)
res_num = num_train - num_train_file * num_each_file
Num_Each_File = num_each_file * np.ones((1,num_train_file),dtype=int)
Num_Each_File = Num_Each_File[0]
Num_Each_File[num_train_file-1] = Num_Each_File[num_train_file-1] + res_num
start = 0
for i in range(num_train_file):
    file_name = 'Train_'+str(patch_size)+'_'+str(i+1)+'.mat'
Esempio n. 7
0
    def train(self):
        n_folds = self._n_folds
        self.train_accs = np.zeros((n_folds))
        self.val_accs   = np.zeros((n_folds))
        self.test_accs  = np.zeros((n_folds))
        self.train_f1s  = np.zeros((n_folds))
        self.val_f1s    = np.zeros((n_folds))
        self.test_f1s   = np.zeros((n_folds))
        self.max_f1s   = np.zeros((n_folds))
        
        y = convertToOneHot(self._labels)
        for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*self._picked_k_fold)):
            if fold !=1:
                print("skip fold : {}".format(fold))
            else:
                assert set.intersection(set(train_idx), set(test_idx)) == set()
                assert set.intersection(set(train_idx), set(val_idx)) == set()
                assert set.union(set(train_idx), set(test_idx), set(val_idx)) == set(np.arange(self._N))


                train_dataset = Dataset(self._graphs[train_idx], y[train_idx])
                test_dataset  = Dataset(self._graphs[test_idx], y[test_idx])
                val_dataset   = Dataset(self._graphs[val_idx], y[val_idx])
                print('Split size')
                print('  - Training   : {}/{}'.format(train_dataset.N, self._N ))
                print('  - Testing    : {}/{}'.format(test_dataset.N, self._N ))
                print('  - Validation : {}/{}'.format(val_dataset.N, self._N ))
                
                # Change the saved directory
                self.system._params['save_dir'] = self._save_dir + '_{}'.format(fold)
                self.system._params['summary_dir'] = self._summary_dir + '_{}'.format(fold)
                
                self.system.train(train_dataset, val_dataset, resume=False)
                
                checkpoint_f1, checkpoint_loss = self.system.best_checkpoint
                # Average over 10 realization to be even more precise.
                self.system.params['average_validation'] = 10
                
                pred_train, labels_train,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_f1)
                pred_valid, labels_valid,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_f1)
                pred_test, labels_test,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_f1)
                
                self.train_f1s[fold]  = f1_score(labels_train, pred_train, average='weighted')
                self.train_accs[fold] = accuracy_score(labels_train, pred_train)

                self.val_f1s[fold]  = f1_score(labels_valid, pred_valid, average='weighted')
                self.val_accs[fold] = accuracy_score(labels_valid, pred_valid)

                self.test_f1s[fold]  = f1_score(labels_test, pred_test, average='weighted')
                self.test_accs[fold] = accuracy_score(labels_test, pred_test)
                
                self.max_f1s[fold] = self.system.max_f1
                
                ret = dict()
                ret['train_accs'] = self.train_accs
                ret['val_accs']   = self.val_accs 
                ret['test_accs']  = self.test_accs 
                ret['train_f1s']  = self.train_f1s
                ret['val_f1s']    = self.val_f1s 
                ret['test_f1s']   = self.test_f1s
                ret['max_f1s']    = self.max_f1s
                
                np.savez('result_rollback/{}/f1_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold)
        

                pred_train, labels_train,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss)
                pred_valid, labels_valid,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss)
                pred_test, labels_test,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss)
                
                self.train_f1s[fold]  = f1_score(labels_train, pred_train, average='weighted')
                self.train_accs[fold] = accuracy_score(labels_train, pred_train)

                self.val_f1s[fold]  = f1_score(labels_valid, pred_valid, average='weighted')
                self.val_accs[fold] = accuracy_score(labels_valid, pred_valid)

                self.test_f1s[fold]  = f1_score(labels_test, pred_test, average='weighted')
                self.test_accs[fold] = accuracy_score(labels_test, pred_test)
                
                self.max_f1s[fold] = self.system.max_f1
                
                ret = dict()
                ret['train_accs'] = self.train_accs
                ret['val_accs']   = self.val_accs 
                ret['test_accs']  = self.test_accs 
                ret['train_f1s']  = self.train_f1s
                ret['val_f1s']    = self.val_f1s 
                ret['test_f1s']   = self.test_f1s
                ret['max_f1s']    = self.max_f1s
                
                np.savez('result_rollback/{}/loss_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold)
        
        return ret
Esempio n. 8
0
    def train(self):
        n_folds = self._n_folds
        self.train_accs = np.zeros((n_folds))
        self.val_accs   = np.zeros((n_folds))
        self.test_accs  = np.zeros((n_folds))
        self.train_f1s  = np.zeros((n_folds))
        self.val_f1s    = np.zeros((n_folds))
        self.test_f1s   = np.zeros((n_folds))
        self.max_f1s   = np.zeros((n_folds))
        
        y = convertToOneHot(self._labels)
        for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*self._picked_k_fold)):
            if fold == 1:
                raise ValueError('stop')
            assert set.intersection(set(train_idx), set(test_idx)) == set()
            assert set.intersection(set(train_idx), set(val_idx)) == set()
            assert set.union(set(train_idx), set(test_idx), set(val_idx)) == set(np.arange(self._N))
            if self._node_attributes is not None:
                train_dataset = Dataset(self._graphs[train_idx], y[train_idx], node_attributes=self._node_attributes[train_idx])
                test_dataset  = Dataset(self._graphs[test_idx], y[test_idx], node_attributes=self._node_attributes[test_idx])
                val_dataset   = Dataset(self._graphs[val_idx], y[val_idx], node_attributes=self._node_attributes[val_idx])
                print("we use node attributes")
            else:
                train_dataset = Dataset(self._graphs[train_idx], y[train_idx])
                test_dataset  = Dataset(self._graphs[test_idx], y[test_idx])
                val_dataset   = Dataset(self._graphs[val_idx], y[val_idx])
                print("we don't use node attributes")
            print('Split size')
            print('  - Training   : {}/{}'.format(train_dataset.N, self._N ))
            print('  - Testing    : {}/{}'.format(test_dataset.N, self._N ))
            print('  - Validation : {}/{}'.format(val_dataset.N, self._N ))
            
            # Change the saved directory
            #embed()
            self.system._params['save_dir'] = self._save_dir + '_{}'.format(fold)
            self.system._params['summary_dir'] = self._summary_dir + '_{}'.format(fold)
            #embed()
            self.system.train(train_dataset, val_dataset, test_dataset, resume=False)
            #embed()
            #raise ValueError('he')
            checkpoint_f1, checkpoint_loss = self.system.best_checkpoint
            # Average over 10 realization to be even more precise.
            self.system.params['average_validation'] = 1
            
            #pred_train, labels_train,_,_,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss)
            #pred_valid, labels_valid,_,_,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss)
            #pred_test, labels_test,_,_,_ = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss)
            #
            #self.train_f1s[fold]  = f1_score(labels_train, pred_train, average='weighted')
            #self.train_accs[fold] = accuracy_score(labels_train, pred_train)

            #self.val_f1s[fold]  = f1_score(labels_valid, pred_valid, average='weighted')
            #self.val_accs[fold] = accuracy_score(labels_valid, pred_valid)

            #self.test_f1s[fold]  = f1_score(labels_test, pred_test, average='weighted')
            #self.test_accs[fold] = accuracy_score(labels_test, pred_test)
            #
            #self.max_f1s[fold] = self.system.max_f1
            #
            #ret = dict()
            #ret['train_accs'] = self.train_accs
            #ret['val_accs']   = self.val_accs 
            #ret['test_accs']  = self.test_accs 
            #ret['train_f1s']  = self.train_f1s
            #ret['val_f1s']    = self.val_f1s 
            #ret['test_f1s']   = self.test_f1s
            #ret['max_f1s']    = self.max_f1s
            #
            #np.savez('result_vis/{}/f1_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold)
        

            pred_train, labels_train,_,_ = self.system.predict_dataset(train_dataset, checkpoint=checkpoint_loss)
            pred_valid, labels_valid,_,_ = self.system.predict_dataset(val_dataset, checkpoint=checkpoint_loss)
            pred_test, labels_test,_, save_info = self.system.predict_dataset(test_dataset, checkpoint=checkpoint_loss, savefeat=True)

            #embed()
            
            self.train_f1s[fold]  = f1_score(labels_train, pred_train, average='weighted')
            self.train_accs[fold] = accuracy_score(labels_train, pred_train)

            self.val_f1s[fold]  = f1_score(labels_valid, pred_valid, average='weighted')
            self.val_accs[fold] = accuracy_score(labels_valid, pred_valid)

            self.test_f1s[fold]  = f1_score(labels_test, pred_test, average='weighted')
            self.test_accs[fold] = accuracy_score(labels_test, pred_test)
            
            self.max_f1s[fold] = self.system.max_f1
            
            ret = dict()
            ret['train_accs'] = self.train_accs
            ret['val_accs']   = self.val_accs 
            ret['test_accs']  = self.test_accs 
            ret['train_f1s']  = self.train_f1s
            ret['val_f1s']    = self.val_f1s 
            ret['test_f1s']   = self.test_f1s
            ret['max_f1s']    = self.max_f1s
            ret['save_info'] = save_info
            #ret['node_embedding'] = node_embedding
            #ret['graph_embedding'] = graph_embedding
            #ret['test_idx'] = test_idx
            
            np.savez('result_vis/{}/loss_{}'.format(self._dbname, self._name), **ret, n_folds=self._n_folds, picked_k_fold=self._picked_k_fold)
        
        return ret