class PCA(Model): """Given a set of input vectors, find their principle components""" def __init__(self, fn=None, n_comp=None, batch_size=None): self.model = IncrementalPCA() self.fn = fn self.params = {"n_components": n_comp, "batch_size": batch_size} self.set_params() def load(self, fn): """Set parameters after loading from filename""" super().load(fn) self.params = self.model.get_params() return def fit(self, reps): """Fit a list of representations""" X = [r.to_vector() for r in reps] self.model.fit(X) def err(self, to_transform, to_check_against): """Mesh error between reconstructed to_transform representation and mesh conversion of to_check_against """ vec = to_transform.to_vector() vec_trans = self.model.transform(vec) vec_recon = self.model.inverse_transform(vec_trans) transformed = to_transform.from_vector(vec_recon) mesh1 = transformed.mesh() mesh2 = to_check_against.mesh() error = representation.mesh_error(mesh1, mesh2) return error
class IPCA(object): def __init__(self, n_components=None, whiten=False, copy=True, batch_size=None): """ :param n_components: default为None ,int 或None, 想要保留的分量数,None 时, min(n_samples, n_features) :param whiten: bool型,可选项, 默认为False, 当true(默认情况下为false)时,components_ 向量除以 n_samples*components_以确保具有单位组件级方差的不相关输出。 :param copy: 默认为True, False时,x 将被覆盖,将节约能存,但存在不安全 :param batch_size: default None, 批量样本数, 只在fit 中使用,设为None,系统自动设成5*n_features, 以保持经度与内存开销的平衡 """ self.model = IncrementalPCA(n_components=n_components, whiten=whiten, copy=copy, batch_size=batch_size) def fit(self, x, y=None): self.model.fit(X=x, y=y) def transform(self, x): return self.model.transform(X=x) def fit_transform(self, x, y=None): return self.model.fit_transform(X=x, y=y) def get_params(self, deep=True): # 获取评估器的参数 return self.model.get_params(deep=deep) def set_params(self, **params): # 设置评估器的参数 self.model.set_params(**params) def inverse_transform(self, x): # 与 fit_tansform 刚好相反的两个操作 return self.model.inverse_transform(X=x) def get_precision(self): # 根据生成模型计算精度矩阵 return self.model.get_precision() def get_covariance(self): # 根据生成模型获取协方差 return self.model.get_covariance() def partial_fit(self, x, y=None, check_input=True): # 增量训练 self.model.partial_fit(X=x, y=y, check_input=check_input) def get_attributes(self): component = self.model.components_ explained_variance = self.model.explained_variance_ explained_variance_ratio = self.model.explained_variance_ratio_ singular_values = self.model.singular_values_ means = self.model.mean_ # 每个特征的均值 var = self.model.var_ # 每个特征的方差 noise_variance = self.model.noise_variance_ # 评估的噪声协方差 n_component = self.model.n_components_ n_samples_seen = self.model.n_samples_seen_ return component, explained_variance, explained_variance_ratio, singular_values, means, var, noise_variance, \ n_component, n_samples_seen
def btnConvert_click(self): msgBox = QMessageBox() # OutFile OutFile = ui.txtOutFile.text() if not len(OutFile): msgBox.setText("Please enter out file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # InFile InFile = ui.txtInFile.text() if not len(InFile): msgBox.setText("Please enter input file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not os.path.isfile(InFile): msgBox.setText("Input file not found!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if ui.rbScale.isChecked() == True and ui.rbALScale.isChecked( ) == False: msgBox.setText( "Subject Level Normalization is just available for Subject Level Analysis!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False InData = io.loadmat(InFile) OutData = dict() OutData["imgShape"] = InData["imgShape"] if not len(ui.txtData.currentText()): msgBox.setText("Please enter Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: X = InData[ui.txtData.currentText()] if ui.cbScale.isChecked() and (not ui.rbScale.isChecked()): X = preprocessing.scale(X) print("Whole of data is scaled X~N(0,1).") except: print("Cannot load data") return try: NumFea = np.int32(ui.txtNumFea.text()) except: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea < 1: msgBox.setText("Number of features must be greater than zero!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(X)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Batch try: Batch = np.int32(ui.txtBatch.text()) except: msgBox.setText("Size of batch is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if Batch == 0: Batch = None # Subject if not len(ui.txtSubject.currentText()): msgBox.setText("Please enter Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: Subject = InData[ui.txtSubject.currentText()] OutData[ui.txtOSubject.text()] = Subject except: print("Cannot load Subject ID") return # Label if not len(ui.txtLabel.currentText()): msgBox.setText("Please enter Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOLabel.text()] = InData[ui.txtLabel.currentText()] # Task if ui.cbTask.isChecked(): if not len(ui.txtTask.currentText()): msgBox.setText("Please enter Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOTask.text()] = InData[ui.txtTask.currentText()] # Run if ui.cbRun.isChecked(): if not len(ui.txtRun.currentText()): msgBox.setText("Please enter Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtORun.text()] = InData[ui.txtRun.currentText()] # Counter if ui.cbCounter.isChecked(): if not len(ui.txtCounter.currentText()): msgBox.setText("Please enter Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCounter.text()] = InData[ ui.txtCounter.currentText()] # Matrix Label if ui.cbmLabel.isChecked(): if not len(ui.txtmLabel.currentText()): msgBox.setText("Please enter Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOmLabel.text()] = InData[ui.txtmLabel.currentText()] # Design if ui.cbDM.isChecked(): if not len(ui.txtDM.currentText()): msgBox.setText("Please enter Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtODM.text()] = InData[ui.txtDM.currentText()] # Coordinate if ui.cbCol.isChecked(): if not len(ui.txtCol.currentText()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCol.text()] = InData[ui.txtCol.currentText()] # Condition if ui.cbCond.isChecked(): if not len(ui.txtCond.currentText()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCond.text()] = InData[ui.txtCond.currentText()] # Number of Scan if ui.cbNScan.isChecked(): if not len(ui.txtScan.currentText()): msgBox.setText("Please enter Number of Scan variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOScan.text()] = InData[ui.txtScan.currentText()] Models = dict() Models["Name"] = "IPCA" if ui.rbALScale.isChecked(): print("Partition data to subject level ...") SubjectUniq = np.unique(Subject) X_Sub = list() for subj in SubjectUniq: if ui.cbScale.isChecked() and ui.rbScale.isChecked(): X_Sub.append( preprocessing.scale( X[np.where(Subject == subj)[1], :])) print("Data in subject level is scaled, X_" + str(subj) + "~N(0,1).") else: X_Sub.append(X[np.where(Subject == subj)[1], :]) print("Subject ", subj, " is extracted from data.") print("Running IPCA in subject level ...") X_Sub_PCA = list() lenPCA = len(X_Sub) for xsubindx, xsub in enumerate(X_Sub): model = IncrementalPCA(n_components=NumFea, batch_size=Batch) model.fit(xsub) X_Sub_PCA.append(model.transform(xsub)) Models["Model" + str(xsubindx + 1)] = str( model.get_params(deep=True)) print("IPCA: ", xsubindx + 1, " of ", lenPCA, " is done.") print("Data integration ... ") X_new = None for xsubindx, xsub in enumerate(X_Sub_PCA): X_new = np.concatenate( (X_new, xsub)) if X_new is not None else xsub print("Integration: ", xsubindx + 1, " of ", lenPCA, " is done.") OutData[ui.txtOData.text()] = X_new else: print("Running IPCA ...") model = IncrementalPCA(n_components=NumFea, batch_size=Batch) OutData[ui.txtOData.text()] = model.fit_transform(X) Models["Model"] = str(model.get_params(deep=True)) OutData["ModelParameter"] = Models print("Saving ...") io.savemat(ui.txtOutFile.text(), mdict=OutData) print("DONE.") msgBox.setText("Incremental PCA is done.") msgBox.setIcon(QMessageBox.Information) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_()
print('#================================================================#') print('\nshape of Training Matrix = ', Train_matrix.shape) print('shape of Test Matrix = ', Test_matrix.shape,'\n') print('#================================================================#') #========================= Principal Component Analysis ==========================# print ('\nRunning Incrmental PCA with 200 Componenets and 5000 batch size') pca = IncrementalPCA(n_components=200, batch_size = 5000) pca.fit(Train_matrix) Train_matrix = pca.transform(Train_matrix) Test_matrix = pca.transform(Test_matrix) parameters = pca.get_params() variance = pca.explained_variance_ratio_ cumvariance = pca.explained_variance_ratio_.cumsum() #np.savetxt("pca_result_variance_200.csv", variance, delimiter=",") #np.savetxt("pca_result_cum_variance_200.csv", variance, delimiter=",") print ('\nPCA complete!\n') print ('#================================================================#') print('\nWriting transformed Train and Test matrices to CSV\n') print('#================================================================#') with open(csv_pca_train_out_path, 'w', newline='') as csvtrainoutfile: csv_matrix_writer(csvtrainoutfile , Train_matrix) with open(csv_pca_test_out_path, 'w', newline='') as csvtestoutfile: csv_matrix_writer(csvtestoutfile , Test_matrix)
# In[ ]: # In[16]: # IncrementalPCA ipca = IncrementalPCA(n_components=3) print(ipca) ipca.fit(feature) x = ipca.transform(feature) print(ipca.explained_variance_ratio_) # In[17]: # ipca.get_covariance() ipca.get_precision() ipca.get_params(deep=True) # ## Gaussian random projection # In[27]: from sklearn import random_projection transform = random_projection.GaussianRandomProjection(n_components=5) feature_new = transform.fit_transform(feature) classifier_f = open('n_GaussianRandomProjection.pickle', 'wb') pickle.dump(feature_new, classifier_f) classifier_f.close() print(feature_new) # In[52]: