def btnConvert_click(self): msgBox = QMessageBox() # Kernel Kernel = ui.cbKernel.currentText() # Gamma try: Gamma = np.float(ui.txtGamma.text()) except: msgBox.setText("Gamma is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Degree try: Degree = np.int32(ui.txtDegree.text()) except: msgBox.setText("Degree is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Coef0 try: Coef0 = np.float(ui.txtCoef0.text()) except: msgBox.setText("Coef0 is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Alpha try: Alpha = np.int32(ui.txtAlpha.text()) except: msgBox.setText("Alpha is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Tol try: Tol = np.float(ui.txtTole.text()) except: msgBox.setText("Tolerance is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # MaxIte try: MaxIter = np.int32(ui.txtMaxIter.text()) except: msgBox.setText("Maximum number of iterations is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if MaxIter <= 0: MaxIter = None # Number of Job try: NJob = np.int32(ui.txtJobs.text()) except: msgBox.setText("The number of parallel jobs is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NJob < -1 or NJob == 0: msgBox.setText( "The number of parallel jobs must be -1 or greater than 0!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # OutFile OutFile = ui.txtOutFile.text() if not len(OutFile): msgBox.setText("Please enter out file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # InFile InFile = ui.txtInFile.text() if not len(InFile): msgBox.setText("Please enter input file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not os.path.isfile(InFile): msgBox.setText("Input file not found!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if ui.rbScale.isChecked() == True and ui.rbALScale.isChecked( ) == False: msgBox.setText( "Subject Level Normalization is just available for Subject Level Analysis!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False InData = io.loadmat(InFile) OutData = dict() OutData["imgShape"] = InData["imgShape"] if not len(ui.txtData.currentText()): msgBox.setText("Please enter Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: X = InData[ui.txtData.currentText()] if ui.cbScale.isChecked() and (not ui.rbScale.isChecked()): X = preprocessing.scale(X) print("Whole of data is scaled X~N(0,1).") except: print("Cannot load data") return try: NumFea = np.int32(ui.txtNumFea.text()) except: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea < 1: msgBox.setText("Number of features must be greater than zero!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(X)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Subject if not len(ui.txtSubject.currentText()): msgBox.setText("Please enter Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: Subject = InData[ui.txtSubject.currentText()] OutData[ui.txtOSubject.text()] = Subject except: print("Cannot load Subject ID") return # Label if not len(ui.txtLabel.currentText()): msgBox.setText("Please enter Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOLabel.text()] = InData[ui.txtLabel.currentText()] # Task if ui.cbTask.isChecked(): if not len(ui.txtTask.currentText()): msgBox.setText("Please enter Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOTask.text()] = InData[ui.txtTask.currentText()] # Run if ui.cbRun.isChecked(): if not len(ui.txtRun.currentText()): msgBox.setText("Please enter Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtORun.text()] = InData[ui.txtRun.currentText()] # Counter if ui.cbCounter.isChecked(): if not len(ui.txtCounter.currentText()): msgBox.setText("Please enter Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCounter.text()] = InData[ ui.txtCounter.currentText()] # Matrix Label if ui.cbmLabel.isChecked(): if not len(ui.txtmLabel.currentText()): msgBox.setText("Please enter Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOmLabel.text()] = InData[ui.txtmLabel.currentText()] # Design if ui.cbDM.isChecked(): if not len(ui.txtDM.currentText()): msgBox.setText("Please enter Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtODM.text()] = InData[ui.txtDM.currentText()] # Coordinate if ui.cbCol.isChecked(): if not len(ui.txtCol.currentText()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCol.text()] = InData[ui.txtCol.currentText()] # Condition if ui.cbCond.isChecked(): if not len(ui.txtCond.currentText()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOCond.text()] = InData[ui.txtCond.currentText()] # Number of Scan if ui.cbNScan.isChecked(): if not len(ui.txtScan.currentText()): msgBox.setText("Please enter Number of Scan variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False OutData[ui.txtOScan.text()] = InData[ui.txtScan.currentText()] Models = dict() Models["Name"] = "KPCA" if ui.rbALScale.isChecked(): print("Partition data to subject level ...") SubjectUniq = np.unique(Subject) X_Sub = list() for subj in SubjectUniq: if ui.cbScale.isChecked() and ui.rbScale.isChecked(): X_Sub.append( preprocessing.scale( X[np.where(Subject == subj)[1], :])) print("Data in subject level is scaled, X_" + str(subj) + "~N(0,1).") else: X_Sub.append(X[np.where(Subject == subj)[1], :]) print("Subject ", subj, " is extracted from data.") print("Running KPCA in subject level ...") X_Sub_PCA = list() lenPCA = len(X_Sub) for xsubindx, xsub in enumerate(X_Sub): model = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\ coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob) X_Sub_PCA.append(model.fit_transform(xsub)) Models["Model" + str(xsubindx + 1)] = str( model.get_params(deep=True)) print("KPCA: ", xsubindx + 1, " of ", lenPCA, " is done.") print("Data integration ... ") X_new = None for xsubindx, xsub in enumerate(X_Sub_PCA): X_new = np.concatenate( (X_new, xsub)) if X_new is not None else xsub print("Integration: ", xsubindx + 1, " of ", lenPCA, " is done.") OutData[ui.txtOData.text()] = X_new else: print("Running KPCA ...") model = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\ coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob) OutData[ui.txtOData.text()] = model.fit_transform(X) Models["Model"] = str(model.get_params(deep=True)) OutData["ModelParameter"] = Models print("Saving ...") io.savemat(ui.txtOutFile.text(), mdict=OutData) print("DONE.") msgBox.setText("Kernel PCA is done.") msgBox.setIcon(QMessageBox.Information) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_()
#ETAPA XX: APLICAÇÃO DO KERNEL PCA #Bloco 01: Aplicação do KernelPCA #Variance Caused by Each of the Principal Components from sklearn.decomposition import KernelPCA kpca_linear = KernelPCA(kernel='linear') kpca_linear.get_params().keys() X_train_kpca_new = kpca_linear.fit_transform(X_train_kpca) X_test_kpca_new = kpca_linear.transform(X_test_kpca) print('Original Number of Features:', X_train_kpca.shape[1]) print('Reduced Number of Features:', X_train_kpca_new.shape[1]) print('Original Number of Features:', X_test_kpca.shape[1]) print('Reduced Number of Features:', X_test_kpca_new.shape[1]) from pprint import pprint print('Parameters Currently In Use:\n') pprint(kpca_linear.get_params()) explained_variance_kpca_linear = kpca_linear.explained_variance_ratio_ kpca_rbf = KernelPCA(kernel='rbf') #kpca = KernelPCA(kernel = "rbf", gamma = 15, n_components = 1) kpca_rbf.get_params().keys()
def main(): print("Enter main()") #========================================================================== # カーネル主成分分析 [kernelPCA] による教師なしデータの次元削除、特徴抽出 # scikit-learn ライブラリでのカーネル主成分分析使用 #========================================================================== #==================================================== # Data Preprocessing(前処理) #==================================================== #---------------------------------------------------- # read & set data #---------------------------------------------------- # 検証用サンプルデータセットの生成 #dat_X, dat_y = DataPreProcess.DataPreProcess.generateMoonsDataSet() # 半月状のデータセット dat_X, dat_y = DataPreProcess.DataPreProcess.generateCirclesDataSet() # 同心円状のデータセット plot_numUp = 500 # plot_numDown = 500 # #======================================================================== # Learning Process #======================================================================== # scikit-learn ライブラリでの PCA pca1 = PCA( n_components = 2 ) # PC1, PC2 pca2 = PCA( n_components = None) # 主成分(固有値)解析用 X_pca1 = pca1.fit_transform( dat_X ) X_pca2 = pca2.fit_transform( dat_X ) # pca2 オブジェクトの内容確認 print( "pca2.explained_variance_ : \n", pca2.explained_variance_ ) print( "pca2.explained_variance_ratio_ : \n", pca2.explained_variance_ratio_ ) # 寄与率(分散比) print( "numpy.cumsum( pca2.explained_variance_ratio_ ) : \n", numpy.cumsum( pca2.explained_variance_ratio_ ) ) # 累積寄与率 print( "pca2.components_ : \n", pca2.components_ ) # 主成分ベクトル(固有ベクトル) print( "pca2.get_covariance() : \n", pca2.get_covariance() ) # 共分散分散行列 print( "numpy.linalg.eig( pca2.get_covariance() )[0] : \n" , numpy.linalg.eig( pca2.get_covariance() )[0] ) # 固有値のリスト # scikit-learn ライブラリでの kernelPCA scikit_kpca1 = KernelPCA( n_components = 2, kernel = 'rbf', # カーネル関数として, RBF カーネルを指定 gamma = 15, # カーネル関数のパラメータ n_jobs = -1 # CPUの並列処理 (default=1) ) scikit_kpca2 = KernelPCA( n_components = None, kernel = 'rbf', # カーネル関数として, RBF カーネルを指定 gamma = 15, # カーネル関数のパラメータ remove_zero_eig = True, # カーネル行列の固有値 0 となるものを削除 n_jobs = -1 # CPUの並列処理 (default=1) ) X_scikit_kpca1 = scikit_kpca1.fit_transform( dat_X ) X_scikit_kpca2 = scikit_kpca2.fit_transform( dat_X ) # scikit_kpca2 オブジェクトの内容確認 print( "scikit_kpca2.get_params() : \n", scikit_kpca2.get_params() ) print( "scikit_kpca2.coef0 : \n", scikit_kpca2.coef0 ) print( "scikit_kpca2.lambdas_ : \n", scikit_kpca2.lambdas_ ) # カーネル行列の固有値 print( "scikit_kpca2.lambdas_[0:40] : \n", scikit_kpca2.lambdas_[0:40] ) # カーネル行列の固有値 #==================================================== # 汎化性能の評価 #==================================================== #========================================================== # 検証用サンプルデータ(半月)での図を plot(通常のPCA) #========================================================== #------------------------------------ # サンプルデータの散布図 plot #------------------------------------ # 現在の図をクリア plt.clf() # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 1) plt.grid() # サンプルデータの散布図を plot plt.scatter( dat_X[ dat_y == 0, 0 ], dat_X[ dat_y == 0, 1 ], color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( dat_X[ dat_y == 1, 0 ], dat_X[ dat_y == 1, 1 ], color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title( "verification data \n sklearn.datasets.make_moons() dataset" ) plt.legend( loc = 'best' ) #plt.tight_layout() #---------------------------------------- # 変換した主成分空間での散布図 plot #---------------------------------------- # x_axis = PC1, y_axis = PC2 (2次元→2次元で次元削除を行わない) # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 2) plt.grid() # サンプルデータの散布図を plot plt.scatter( X_pca1[ dat_y == 0, 0 ], X_pca1[ dat_y == 0, 1 ], color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( dat_X[ dat_y == 1, 0 ], X_pca1[ dat_y == 1, 1 ], color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title("transformed data (PCA) \n dimension is not deleted") plt.xlabel( "PC1" ) plt.ylabel( "PC2" ) plt.legend( loc = 'best' ) #plt.tight_layout() # x_axis = PC1 (2次元→1次元で次元削除) # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 3) # サンプルデータの散布図を plot plt.scatter( X_pca1[ dat_y == 0, 0 ], numpy.zeros( (len(X_pca1)/2,1) ) + 0.02, color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( dat_X[ dat_y == 1, 0 ], numpy.zeros( (len(X_pca1)/2,1) ) - 0.02, color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title("transformed data (PCA) \n dimension is deleted") plt.xlabel( "PC1" ) plt.ylim( [-1,1] ) plt.axhline( 0.0, color = 'gray', linestyle = '--', linewidth = 1 ) plt.yticks( [] ) plt.legend( loc = 'best' ) #plt.tight_layout() #plt.show() #------------------------------------ # 第 k 主成分の固有値の図 plot #------------------------------------ # 現在の図をクリア #plt.clf() # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 4) # 棒グラフ(第1主成分, 第2主成分) plt.bar( range(1, 3), numpy.linalg.eig( pca2.get_covariance() )[0], alpha = 1.0, align = 'center', label = 'Eigenvalues', color = "blue" ) plt.axhline( 0.2, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.4, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.6, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.8, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 1.0, color = 'gray', linestyle = '--', linewidth = 1 ) plt.xticks( range(1, 3), [ "lamda_1", "lamda_2", "" ], rotation = 90 ) plt.title("Principal components - Eigenvalues (PCA)") plt.xlabel('Principal components') plt.ylabel('Eigenvalues') plt.legend( loc = 'best' ) plt.tight_layout() #---------------------------------------- # 第 k 主成分の寄与率&累積寄与率の plot #---------------------------------------- # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 5) # 棒グラフ(第1主成分, 第2主成分)赤棒 plt.bar( range(1, 3), pca2.explained_variance_ratio_, alpha = 1.0, align = 'center', label = ' explained variance ratio(principal component 1 and 2)', color = "blue" ) # 累積寄与率の階段グラフ plt.step( range(1, 3), numpy.cumsum( pca2.explained_variance_ratio_ ), where = 'mid', label='cumulative proportion of the variance', color = "red" ) plt.axhline( 0.1, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.2, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.3, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.4, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.5, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.6, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.7, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.8, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 0.9, color = 'gray', linestyle = '--', linewidth = 1 ) plt.axhline( 1.0, color = 'gray', linestyle = '--', linewidth = 1 ) plt.xticks( range(1, 3), range(1, 3) ) plt.title("Principal components - Proportion of the variance (PCA)") plt.xlabel('Principal components') plt.ylabel('Proportion of the variance \n individual explained variance') plt.legend( loc = 'best' ) plt.tight_layout() plt.savefig("./kernelPCA_scikit-learn_1.png", dpi = 300, bbox_inches = 'tight' ) plt.show() #=========================================================== # 検証用サンプルデータ(半月)での図を plot(カーネルPCA) #=========================================================== #------------------------------------ # サンプルデータの散布図 plot #------------------------------------ # 現在の図をクリア plt.clf() # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 1) plt.grid() # サンプルデータの散布図を plot plt.scatter( dat_X[ dat_y == 0, 0 ], dat_X[ dat_y == 0, 1 ], color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( dat_X[ dat_y == 1, 0 ], dat_X[ dat_y == 1, 1 ], color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title( "verification data \n sklearn.datasets.make_moons() dataset" ) plt.legend( loc = 'best' ) #plt.tight_layout() #---------------------------------------- # 変換した主成分空間での散布図 plot #---------------------------------------- # x_axis = PC1, y_axis = PC2 (2次元→2次元で次元削除を行わない) # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 2) plt.grid() # サンプルデータの散布図を plot plt.scatter( X_scikit_kpca1[ dat_y == 0, 0 ], X_scikit_kpca1[ dat_y == 0, 1 ], color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( X_scikit_kpca1[ dat_y == 1, 0 ], X_scikit_kpca1[ dat_y == 1, 1 ], color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title("transformed data (RBF-kernel PCA) \n dimension is not deleted") plt.xlabel( "PC1" ) plt.ylabel( "PC2" ) plt.legend( loc = 'best' ) #plt.tight_layout() # x_axis = PC1 (2次元→1次元で次元削除) # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 3) # サンプルデータの散布図を plot plt.scatter( X_scikit_kpca1[ dat_y == 0, 0 ], numpy.zeros( (len(X_scikit_kpca1)/2,1) ) + 0.02, color = 'red', marker = '^', label = '0', alpha = 0.5 ) plt.scatter( X_scikit_kpca1[ dat_y == 1, 0 ], numpy.zeros( (len(X_scikit_kpca1)/2,1) ) - 0.02, color = 'blue', marker = 'o', label = '1', alpha = 0.5 ) plt.title("transformed data (RBF-kernelPCA) \n dimension is deleted") plt.xlabel( "PC1" ) plt.ylim( [-1,1] ) plt.axhline( 0.0, color = 'gray', linestyle = '--', linewidth = 1 ) plt.yticks( [] ) plt.legend( loc = 'best' ) #plt.tight_layout() #-------------------------- # 固有値の図 #-------------------------- #plt.clf() # plt.subplot(行数, 列数, 何番目のプロットか) plt.subplot(2, 3, 4) # 棒グラフ(固有値の値が上位のものを表示 ) plt.bar( range( 1, 41 ), scikit_kpca2.lambdas_[0:40], alpha = 1.0, align = 'center', color = "blue" ) plt.xticks( range( 1, len(scikit_kpca2.lambdas_[0:40]) ), rotation = 90 ) plt.title("Eigenvalues (RBF-kernelPCA)") plt.xlabel('index of Eigenvalues') plt.ylabel('Eigenvalues') plt.legend( loc = 'best' ) plt.tight_layout() plt.savefig("./kernelPCA_scikit-learn_2.png", dpi = 300, bbox_inches = 'tight' ) plt.show() print("Finish main()") return