def fit_kernel_pca(x, n_components, kernel='rbf', *args, **kwargs): x_new = KernelPCA(n_components=n_components, kernel=kernel, *args, **kwargs).fit_transform(x) return x_new
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( sparse_corpus_tfidf_transpose, df.ix[:, 1], test_size=0.2, random_state=seed) from sklearn.decomposition import KernelPCA from sklearn.linear_model import LogisticRegression from sklearn.model_selection import learning_curve import matplotlib.pyplot as plt # reduce dimensions print('Starting dimensionality reduction') reducer = KernelPCA(n_components=1500, kernel="cosine", random_state=seed) corpus_train_tfidf_kpca = reducer.fit_transform(X_train) corpus_test_tfidf_kpca = reducer.transform(X_test) print('Finished dimensionality reduction') #Initialize Logistic Regression log_reg = LogisticRegression(C=1.0) log_reg.fit(corpus_train_tfidf_kpca, y_train) a = log_reg.score(corpus_test_tfidf_kpca, y_test) print('Starting logistic regression 2') log_reg.fit(X_train, y_train) b = log_reg.score(X_test, y_test)
for i in range(0, n): for j in range(i, n): sim_mat[i][j] = M2(ts_list[i], ts_list[j], delta, eps) sim_mat[j][i] = sim_mat[i][j] count = count + 1 if count % 1000 == 0: print count target_dim = 10 from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=target_dim, kernel="precomputed", eigen_solver="auto", tol=1e-9, max_iter=3000, n_jobs=-1) feature_coords = kpca.fit_transform((sim_mat**2) * -0.5) from statsmodels.nonparametric.kernel_regression import KernelReg landfalls = np.array([float(h.made_landfall) for h in hurricane_list]) inds = np.argsort(feature_coords[:, 0]) feature_coords_sorted = feature_coords[inds] landfalls_sorted = landfalls[inds] vartypes = ''.join('c' * target_dim) reg = KernelReg(landfalls_sorted, feature_coords_sorted, vartypes)
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs): """Two-dimensional embedding of sequence distances in dmatDf, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne""" if isinstance(dmatDf, pd.DataFrame): dmat = dmatDf.values else: dmat = dmatDf if method == 'tsne': xy = tsne.run_tsne(dmat, no_dims=n_components, perplexity=kwargs['perplexity']) elif method == 'isomap': isoObj = Isomap(n_neighbors=10, n_components=n_components) xy = isoObj.fit_transform(dmat) elif method == 'mds': mds = MDS(n_components=n_components, max_iter=3000, eps=1e-9, random_state=15, dissimilarity="precomputed", n_jobs=1) xy = mds.fit(dmat).embedding_ rot = PCA(n_components=n_components) xy = rot.fit_transform(xy) elif method == 'pca': pcaObj = PCA(n_components=None) xy = pcaObj.fit_transform(dmat)[:, :n_components] elif method == 'kpca': pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense') try: gram = dist2kernel(dmat) except: print( 'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead' ) gram = 1 - dmat / dmat.max() xy = pcaObj.fit_transform(gram)[:, :n_components] elif method == 'lle': lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard') xy = lle.fit_transform(dist) elif method == 'sklearn-tsne': tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity']) xy = tsneObj.fit_transform(dmat) elif method == 'umap': umapObj = umap.UMAP(n_components=n_components, metric='precomputed', random_state=110820, **kwargs) xy = umapObj.fit_transform(dmat) else: print('Method unknown: %s' % method) return assert xy.shape[0] == dmatDf.shape[0] xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components)) if method == 'kpca': """Not sure how negative eigenvalues should be handled here, but they are usually small so it shouldn't make a big difference""" setattr( xyDf, 'explained_variance_', pcaObj.lambdas_[:n_components] / pcaObj.lambdas_[pcaObj.lambdas_ > 0].sum()) return xyDf
# Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) #Applying Kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=2, kernel="rbf") X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) #Fitting logistic regression to train set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(X_train, y_train) #Predicting the test set result y_pred = classifier.predict(X_test) #making the confusion matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred)
plot_digits( X_test_noisy, f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}") # %% # Learn the `PCA` basis # --------------------- # # We can now learn our PCA basis using both a linear PCA and a kernel PCA that # uses a radial basis function (RBF) kernel. from sklearn.decomposition import PCA, KernelPCA pca = PCA(n_components=32) kernel_pca = KernelPCA(n_components=400, kernel="rbf", gamma=1e-3, fit_inverse_transform=True, alpha=5e-3) pca.fit(X_train_noisy) _ = kernel_pca.fit(X_train_noisy) # %% # Reconstruct and denoise test images # ----------------------------------- # # Now, we can transform and reconstruct the noisy test set. Since we used less # components than the number of original features, we will get an approximation # of the original set. Indeed, by dropping the components explaining variance # in PCA the least, we hope to remove noise. Similar thinking happens in kernel # PCA; however, we expect a better reconstruction because we use a non-linear
kernel_pca_linear_lambdas_, kernel_pca_full_linear_lambdas_, explained_variance = compare_KernelPCA( ) print('Is equal singular values KernelPCA with linear kernel and PCA') print( np.sqrt(kernel_pca_linear_lambdas_).round(3) == pca_85.singular_values_.round(3)) kernel_pca_linear_lambdas_, kernel_pca_full_linear_lambdas_, explained_variance res_poly = compare_KernelPCA(kernel='poly') res_rbf = compare_KernelPCA(kernel='rbf') res_sigmoid = compare_KernelPCA(kernel='sigmoid') res_cosine = compare_KernelPCA(kernel='cosine') kernel_pca_precomputed = KernelPCA(n_components=kernel_pca_n_comp, kernel='precomputed') kernel_pca_precomputed_data = kernel_pca_precomputed.fit_transform( data.dot(data.T)) kernel_pca_precomputed.lambdas_.round(3) # --- # ## Модификации метода главных компонент # ### SparcePCA sparse_pca_lars = SparsePCA(2, method='lars') sparse_pca_lars_data = sparse_pca_lars.fit_transform(data) print("Sparse PCA with lars method components") print(sparse_pca_lars.components_) sparse_pca_cd = SparsePCA(2, method='cd')
def __init__(self, feature_extractor='tsne', perplexity=30, pixels=100, random_state=None, n_jobs=None): """Generate an ImageTransformer instance Args: feature_extractor: string of value ('tsne', 'pca', 'kpca') or a class instance with method `fit_transform` that returns a 2-dimensional array of extracted features. pixels: int (square matrix) or tuple of ints (height, width) that defines the size of the image matrix. random_state: int or RandomState. Determines the random number generator, if present, of a string defined feature_extractor. n_jobs: The number of parallel jobs to run for a string defined feature_extractor. """ self.random_state = random_state self.n_jobs = n_jobs if isinstance(feature_extractor, str): fe = feature_extractor.casefold() if fe == 'tsne_exact'.casefold(): fe = TSNE(n_components=2, metric='cosine', perplexity=perplexity, n_iter=1000, method='exact', random_state=self.random_state, n_jobs=self.n_jobs) elif fe == 'tsne'.casefold(): fe = TSNE(n_components=2, metric='cosine', perplexity=perplexity, n_iter=1000, method='barnes_hut', random_state=self.random_state, n_jobs=self.n_jobs) elif fe == 'pca'.casefold(): fe = PCA(n_components=2, random_state=self.random_state) elif fe == 'kpca'.casefold(): fe = KernelPCA(n_components=2, kernel='rbf', random_state=self.random_state, n_jobs=self.n_jobs) else: raise ValueError(("Feature extraction method '{}' not accepted" ).format(feature_extractor)) self._fe = fe elif hasattr(feature_extractor, 'fit_transform') and inspect.ismethod( feature_extractor.fit_transform): self._fe = feature_extractor else: raise TypeError('Parameter feature_extractor is not a ' 'string nor has method "fit_transform"') if isinstance(pixels, int): pixels = (pixels, pixels) # The resolution of transformed image self._pixels = pixels self._xrot = None
def pca_transform(self, nb_PC=4, remove_mean0=False, remove_mean1=False, standard=False, sklearn=False, sklearn_kernel=False, cov=True): """ Perform the Principal component analysis with SKlearn using singular value fft The dataframe is standardize parameters: standard: default = True, standardize the dataframe nb_PC: default = 4, number of principal components to be used sklearn: if True (default=False) use svd by sklearn cov: if true (by default) sue the correlation matrix to perform the PCA analysis Stock in the object Dataframe with: eigenvalues eigenvectors scores list of vectors: eigenpairs NOTE: By default sklearn remove the mean from the dataset. So I cant use it to perform the downscalling References: http://sebastianraschka.com/Articles/2015_pca_in_3_steps.html#projection-onto-the-new-feature-space """ df = self.df self.nb_PC = nb_PC if remove_mean0: print('remove_mean0') df = df.subtract(df.mean(axis=0), axis='columns') if remove_mean1: print('remove_mean1') df = df.subtract(df.mean(axis=1), axis='index') print(df) if standard: # standardize # df_std = StandardScaler().fit_transform(df) self.standard = True df = (df - df.mean(axis=0)) / df.std( axis=0) # another way to standardise #======================================================================= # Sklearn #======================================================================= if sklearn: print("o" * 80) print("SVD sklearn used") print("o" * 80) if sklearn_kernel: print('sklearn_kernel') pca = KernelPCA(nb_PC, kernel="rbf", fit_inverse_transform=True, gamma=10) #Create a PCA model with nb_PC principal components else: pca = PCA(nb_PC) # fit data pca.fit(df) #Get the components from transforming the original data. scores = pca.transform(df) # or PCs eigenvalues = pca.explained_variance_ eigenvectors = pca.components_ # or loading # Make a list of (eigenvalue, eigenvector) tuples self.eigpairs = [(np.abs(self.eigenvalues[i]), self.eigenvector[i, :]) for i in range(len(self.eigenvalues))] #======================================================================= # Covariance Matrix #======================================================================= if cov: print("o" * 80) print("Covariance used") print("o" * 80) X = df.values cov_mat = np.cov(X.T) eigenvalues, eigenvectors = np.linalg.eig(cov_mat) scores = X.dot(eigenvectors) scores = pd.DataFrame(scores, columns=np.arange(1, len(df.columns) + 1), index=df.index) eigenvalues = pd.Series(eigenvalues, index=np.arange(1, len(df.columns) + 1)) eigenvectors = pd.DataFrame(eigenvectors.T, columns=df.columns, index=np.arange( 1, len(df.columns) + 1)) self.scores = scores.iloc[:, 0:nb_PC] self.eigenvalues = eigenvalues #[0:nb_PC] self.eigenvectors = eigenvectors[0:nb_PC] tot = sum(eigenvalues) self.var_exp = [(i / tot) * 100 for i in sorted(eigenvalues, reverse=True)]
y = dataset.iloc[:, -1].values #all rows and only the last column # Splitting the dataset into Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) # Feature scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) #transform both but only fit to X_train so that X_train and X_test have same scale # Applying Kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=2, kernel='rbf') # None changed to 2 X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) # Fitting classifier to dataset from sklearn.linear_model import LogisticRegression clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) # Predicting the test set results y_pred = clf.predict(X_test) # Testing model w/ confusion matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) #90 correct predictions; 10 incorrect
from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split if __name__ == "__main__": dt_heart = pd.read_csv('./data/heart.csv') print(dt_heart.head(5)) dt_features = dt_heart.drop(['target'], axis=1) dt_target = dt_heart['target'] dt_features = StandardScaler().fit_transform(dt_features) X_train, X_test, y_train, y_test = train_test_split(dt_features, dt_target, test_size=0.3, random_state=42) kpca = KernelPCA(n_components=4, kernel='poly') kpca.fit(X_train) dt_train = kpca.transform(X_train) dt_test = kpca.transform(X_test) logistic = LogisticRegression(solver='lbfgs') logistic.fit(dt_train, y_train) print("SCORE KPCA: ", logistic.score(dt_test, y_test)) #kernels_y_kpca
# saving the model to disk for future use corpora.MmCorpus.serialize('train_tfidf.mm', train_tfidf) # convert to a sparse and compatible format for dimensionality reduction using sklearn sparse_train_corpus_tfidf = matutils.corpus2csc(train_tfidf) sparse_train_corpus_tfidf_transpose = sparse_train_corpus_tfidf.transpose() #%% # visualize the tf-idf corpus using kernel PCA import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import KernelPCA from mpl_toolkits.mplot3d import Axes3D from sklearn.manifold import TSNE kpca2 = KernelPCA(n_components=3, kernel="cosine", random_state=seed) corpus_train_tfidf_kpca2 = kpca2.fit_transform( sparse_train_corpus_tfidf_transpose) #RENAMED FOR EASE X = corpus_train_tfidf_kpca2 #kpca = KernelPCA(n_components = 1000 , kernel="cosine", random_state=seed) #corpus_train_tfidf_kpca = kpca.fit_transform(sparse_train_corpus_tfidf_transpose) #reducer = TSNE(n_components = 3, perplexity=.0, early_exaggeration=4.0, learning_rate=30.0, n_iter=10000, metric='cosine') # visualize the tf-idf corpus using kernel PCA #CREATE DICTIONARY TO ASSIGN COLORS categories = train_category.unique() #REINDEX OUTPUT TO COMPARE WITH LABELS
fpr, tpr, thresholds = roc_curve(y_train_5, y_scores) score = roc_auc_score(y_test, y_hat) ### dimensionality reduction from sklearn.decomposition import PCA, KernelPCA pca = PCA(n_components=20) X_all = np.concantenate([X_train, X_test]) pca.fit(X_all) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) pca.explained_variance_ratio_, components_ rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04) X_reduced = rbf_pca.fit_transform(X) ### clustering from sklearn.cluster import KMeans, MeanShift, DBSCAN k_means = KMeans(init = "k-means++", n_clusters = 4, n_init = 12) k_means.fit(X) k_means.predict(X) k_means.labels_, cluster_centers_ dbscan = DBSCAN(eps=0.05, min_samples=5) ### search for hyperparameters
X_errors_image, (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns)) ocr_utils.montage( X_errors2D, title='PCA Error Characters, components={}'.format(n_components)) title = 'inverse transform errors' X_inverse = pca.inverse_transform(X_errors_pca) X2D = np.reshape(X_inverse, (X_inverse.shape[0], ds.train.num_rows, ds.train.num_columns)) X2D = X2D - np.min(X2D) ocr_utils.montage(X2D, title=title) ######################################################################################## kernel = 'rbf' # really slow pca = KernelPCA(n_components=2, kernel=kernel, gamma=15) X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) lr = LogisticRegression() logistic_fitted = lr.fit(X_train_pca, y_train) y_train_pred = logistic_fitted.predict(X_train_pca) y_test_pred = logistic_fitted.predict(X_test_pca) print( '\nKernel PCA Train Accuracy: {:4.6f}, n_components={}, kernel={}'.format( accuracy_score(y_train, y_train_pred), pca.n_components, kernel)) print('Kernel PCA Test Accuracy: {:4.6f}, n_components={}, kernel={}'.format( accuracy_score(y_test, y_test_pred), pca.n_components, kernel))
kmeans = KMeans(n_clusters = k).fit(delta_noname) kmeans.fit(delta_noname) distortion.append(sum(numpy.min(cdist(delta_noname, kmeans.cluster_centers_, 'euclidean'), axis=1)) / delta_noname.shape[0]) plt.plot(K, distortion, 'bx-') plt.title('The Elbow Method showing the optimal k') plt.show() # In[277]: #PCA with RBF from sklearn.decomposition import PCA, KernelPCA kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10,n_components=4) kpca.fit(delta_noname) delta_noname_components_rbf = kpca.transform(delta_noname) # In[278]: #to get the variance being explained by the components import numpy explained_variance = numpy.var(delta_noname_components_rbf, axis=0) explained_variance_ratio = explained_variance / numpy.sum(explained_variance) print(explained_variance_ratio) # In[279]:
#X_reduced = inc_pca.transform(X_train) #You can also use the memmap class to do this same problem: #look at page 217. ##Randomized PCA: rnd_pca = PCA(n_components = 153, svd_solver = "randomized") X_reduced = rnd_pca.fit_transform(X_train) ##Kernel PCA: #It seems that this instance has the same charactistics as the support vector machine #methods from the earlier chapters. from sklearn.decomposition import KernelPCA rbf_pca = KernelPCA(n_components = 2, kernel = "rbf", gamma = 0.04) X_reduced = rbf_pca.fit(X) #Interesting the console says that #kernelPCA doesn't have a transform_fit() method. Will need to look #into this. ##Selecting a kernel and tuning hyperparameters. #To find the best hyperparameters for this method (which is a unsupervised #statistical training method) you can use the function GridSearchCV(). from sklearn.model_selection import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.datasets import make_moons moons = make_moons(n_samples = 1000, shuffle = True, noise = 0.2, random_state = 42) X = moons[0]
def kpcaModel(): return KernelPCA(n_components=5, degree=2, kernel="poly")
#For predictiong on new data data_pred = pd.read_csv('data/pred_data.csv') data_for_pred = data_pred[["contr", "energ", "maxpr"]].values data_pred["pred_class"] = '' y_pred = kmeans.predict(data_for_pred) for i in range(0, len(y_pred)): data_pred["pred_class"][i] = label_encoder.classes_[original_labels[ y_pred[i]]] data_pred.to_csv("data/pred_data.csv", index=False) #Graphical Visulization of Data #Plot Code Starts from sklearn.decomposition import KernelPCA pca = KernelPCA(n_components=2) principalComponents = pca.fit_transform(x, y) principalDf = pd.DataFrame( data=principalComponents, columns=['principal component 1', 'principal component 2']) finalDf = pd.concat([principalDf, dataset[['CLASS']]], axis=1) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) ax.set_xlabel('Component 1', fontsize=15) ax.set_ylabel('Component 2', fontsize=15) ax.set_title('2 component PCA', fontsize=20) targets = [0, 1, 2, 3] colors = ['r', 'g', 'b', 'y'] for target, color in zip(targets, colors):
X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] value, counts = np.unique(y_train, return_counts=True) minority_class = value[np.argmin(counts)] majority_class = value[np.argmax(counts)] idx_min = np.where(y_train == minority_class)[0] idx_maj = np.where(y_train == majority_class)[0] full_X = np.concatenate((X_train[idx_maj], X_test)) full_y = np.concatenate((y_train[idx_maj], y_test)) # Adding PCA Method transformer = KernelPCA(n_components=math.ceil(X_train.shape[1] / 3), kernel='poly') X_transformed = transformer.fit_transform(full_X) # Training the kmean model kmeans = KMeans(n_clusters=number_of_clusters) kmeans.fit(X_transformed) points_under_each_cluster = { i: np.where(kmeans.labels_ == i)[0] for i in range(kmeans.n_clusters) } # From each cluster removing the test instances for i in points_under_each_cluster.keys(): temp = []
plt.legend(loc='lower left') plt.show() print('LDA transform_support vector machines_training score: ', svm.score(X_train_lda, y_train)) print('LDA transform_support vector machines_testing score: ', svm.score(X_test_lda, y_test)) #kPCA gamma_space = np.logspace(-2, 0, 10) lr_train = [] lr_test = [] svm_train = [] svm_test = [] for gamma in gamma_space: kPCA = KernelPCA(n_components=2, kernel='rbf', gamma=gamma) X_train_kpca = kPCA.fit_transform(X_train_std, y_train) X_test_kpca = kPCA.transform(X_test_std) lr = LogisticRegression() lr = lr.fit(X_train_kpca, y_train) lr_train.append(lr.score(X_train_kpca, y_train)) lr_test.append(lr.score(X_test_kpca, y_test)) svm = SVC(kernel='linear', C=1.0, random_state=1) svm.fit(X_train_kpca, y_train) svm_train.append(svm.score(X_train_kpca, y_train)) svm_test.append(svm.score(X_test_kpca, y_test)) print("gamma lr_train lr_test svm_train svm_test") for i in range(10): print('%.3f, %.3f, %.3f, %.3f, %.3f' % ( gamma_space[i],
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) #Feature Scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.fit_transform(X_test) #Applying Kernal PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=2, kernel='rbf') X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) #Fitting the logistic regression to the training set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(X_train, y_train) #Predicting the test set results y_pred = classifier.predict(X_test) #Making the confusion matrix from sklearn.metrics import confusion_matrix
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m) X_centered = X - X.mean(axis=0) U, s, Vt = np.linalg.svd(X_centered) W2 = Vt.T[:, :2] X2D = X_centered.dot(W2) pca = PCA(n_components=2) x2D = pca.fit_transform(X) print(x2D[0]) pca1 = PCA(n_components=2, svd_solver="randomized") x2D1 = pca1.fit_transform(X) print(x2D1[0]) pca2 = KernelPCA(n_components=2, kernel="sigmoid", gamma=0.04) x2D2 = pca2.fit_transform(X) print(x2D2[0]) pca3 = KernelPCA(n_components=2, kernel="rbf", gamma=0.04) x2D3 = pca2.fit_transform(X) print(x2D3[0]) pca4 = KernelPCA(n_components=2, kernel="linear") x2D4 = pca4.fit_transform(X) print(x2D4[0]) lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10, random_state=40) x2D5 = lle.fit_transform(X) print(x2D5[0])
def btnConvert_click(self): msgBox = QMessageBox() # Batch try: Batch = np.int32(ui.txtBatch.text()) except: msgBox.setText("Size of batch is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if Batch == 0: Batch = None # Kernel Kernel = ui.cbKernel.currentText() # Method Method = ui.cbMethod.currentText() # Gamma try: Gamma = np.float(ui.txtGamma.text()) except: msgBox.setText("Gamma is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Degree try: Degree = np.int32(ui.txtDegree.text()) except: msgBox.setText("Degree is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Coef0 try: Coef0 = np.float(ui.txtCoef0.text()) except: msgBox.setText("Coef0 is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Alpha try: Alpha = np.int32(ui.txtAlpha.text()) except: msgBox.setText("Alpha is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Tol try: Tol = np.float(ui.txtTole.text()) except: msgBox.setText("Tolerance is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # MaxIte try: MaxIter = np.int32(ui.txtMaxIter.text()) except: msgBox.setText("Maximum number of iterations is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if MaxIter <= 0: MaxIter = None # Number of Job try: NJob = np.int32(ui.txtJobs.text()) except: msgBox.setText("The number of parallel jobs is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NJob < -1 or NJob == 0: msgBox.setText( "The number of parallel jobs must be -1 or greater than 0!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False TrFoldErr = list() TeFoldErr = list() try: FoldFrom = np.int32(ui.txtFoldFrom.text()) FoldTo = np.int32(ui.txtFoldTo.text()) except: print("Please check fold parameters!") return if FoldTo < FoldFrom: print("Please check fold parameters!") return for fold_all in range(FoldFrom, FoldTo + 1): # Regularization try: Regularization = np.float(ui.txtRegularization.text()) except: msgBox.setText("Regularization value is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # OutFile OutFile = ui.txtOutFile.text() OutFile = OutFile.replace("$FOLD$", str(fold_all)) if not len(OutFile): msgBox.setText("Please enter out file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # InFile InFile = ui.txtInFile.text() InFile = InFile.replace("$FOLD$", str(fold_all)) if not len(InFile): msgBox.setText("Please enter input file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not os.path.isfile(InFile): msgBox.setText("Input file not found!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False InData = io.loadmat(InFile) OutData = dict() OutData["imgShape"] = InData["imgShape"] # Data if not len(ui.txtITrData.currentText()): msgBox.setText("Please enter Input Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeData.currentText()): msgBox.setText("Please enter Input Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrData.text()): msgBox.setText("Please enter Output Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeData.text()): msgBox.setText("Please enter Output Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: XTr = InData[ui.txtITrData.currentText()] XTe = InData[ui.txtITeData.currentText()] if ui.cbScale.isChecked() and not ui.rbScale.isChecked(): XTr = preprocessing.scale(XTr) XTe = preprocessing.scale(XTe) print("Whole of data is scaled X~N(0,1).") except: print("Cannot load data") return # NComponent try: NumFea = np.int32(ui.txtNumFea.text()) except: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea < 1: msgBox.setText("Number of features must be greater than zero!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(XTr)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Label if not len(ui.txtITrLabel.currentText()): msgBox.setText("Please enter Train Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeLabel.currentText()): msgBox.setText("Please enter Test Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrLabel.text()): msgBox.setText( "Please enter Train Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeLabel.text()): msgBox.setText("Please enter Test Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrLabel.text()] = InData[ ui.txtITrLabel.currentText()] OutData[ui.txtOTeLabel.text()] = InData[ ui.txtITeLabel.currentText()] except: print("Cannot load labels!") # Subject if not len(ui.txtITrSubject.currentText()): msgBox.setText( "Please enter Train Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeSubject.currentText()): msgBox.setText( "Please enter Test Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrSubject.text()): msgBox.setText( "Please enter Train Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeSubject.text()): msgBox.setText( "Please enter Test Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrSubject = InData[ui.txtITrSubject.currentText()] OutData[ui.txtOTrSubject.text()] = TrSubject TeSubject = InData[ui.txtITeSubject.currentText()] OutData[ui.txtOTeSubject.text()] = TeSubject except: print("Cannot load Subject IDs") return # Task if ui.cbTask.isChecked(): if not len(ui.txtITrTask.currentText()): msgBox.setText( "Please enter Input Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeTask.currentText()): msgBox.setText( "Please enter Input Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrTask.text()): msgBox.setText( "Please enter Output Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeTask.text()): msgBox.setText( "Please enter Output Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrTask = InData[ui.txtITrTask.currentText()] OutData[ui.txtOTrTask.text()] = TrTask TeTask = InData[ui.txtITeTask.currentText()] OutData[ui.txtOTeTask.text()] = TeTask TrTaskIndex = TrTask.copy() for tasindx, tas in enumerate(np.unique(TrTask)): TrTaskIndex[TrTask == tas] = tasindx + 1 TeTaskIndex = TeTask.copy() for tasindx, tas in enumerate(np.unique(TeTask)): TeTaskIndex[TeTask == tas] = tasindx + 1 except: print("Cannot load Tasks!") return # Run if ui.cbRun.isChecked(): if not len(ui.txtITrRun.currentText()): msgBox.setText( "Please enter Train Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeRun.currentText()): msgBox.setText( "Please enter Test Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrRun.text()): msgBox.setText( "Please enter Train Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeRun.text()): msgBox.setText( "Please enter Test Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrRun = InData[ui.txtITrRun.currentText()] OutData[ui.txtOTrRun.text()] = TrRun TeRun = InData[ui.txtITeRun.currentText()] OutData[ui.txtOTeRun.text()] = TeRun except: print("Cannot load Runs!") return # Counter if ui.cbCounter.isChecked(): if not len(ui.txtITrCounter.currentText()): msgBox.setText( "Please enter Train Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeCounter.currentText()): msgBox.setText( "Please enter Test Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrCounter.text()): msgBox.setText( "Please enter Train Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeCounter.text()): msgBox.setText( "Please enter Test Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrCounter = InData[ui.txtITrCounter.currentText()] OutData[ui.txtOTrCounter.text()] = TrCounter TeCounter = InData[ui.txtITeCounter.currentText()] OutData[ui.txtOTeCounter.text()] = TeCounter except: print("Cannot load Counters!") return # Matrix Label if ui.cbmLabel.isChecked(): if not len(ui.txtITrmLabel.currentText()): msgBox.setText( "Please enter Train Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITemLabel.currentText()): msgBox.setText( "Please enter Test Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrmLabel.text()): msgBox.setText( "Please enter Train Output Matrix Label variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTemLabel.text()): msgBox.setText( "Please enter Test Output Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrmLabel.text()] = InData[ ui.txtITrmLabel.currentText()] OutData[ui.txtOTemLabel.text()] = InData[ ui.txtITemLabel.currentText()] except: print("Cannot load matrix lables!") return # Design if ui.cbDM.isChecked(): if not len(ui.txtITrDM.currentText()): msgBox.setText( "Please enter Train Input Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeDM.currentText()): msgBox.setText( "Please enter Test Input Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrDM.text()): msgBox.setText( "Please enter Train Output Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeDM.text()): msgBox.setText( "Please enter Test Output Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrDM.text()] = InData[ ui.txtITrDM.currentText()] OutData[ui.txtOTeDM.text()] = InData[ ui.txtITeDM.currentText()] except: print("Cannot load design matrices!") return # Coordinate if ui.cbCol.isChecked(): if not len(ui.txtCol.currentText()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCol.text()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCol.text()] = InData[ ui.txtCol.currentText()] except: print("Cannot load coordinator!") return # Condition if ui.cbCond.isChecked(): if not len(ui.txtCond.currentText()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCond.text()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCond.text()] = InData[ ui.txtCond.currentText()] except: print("Cannot load conditions!") return # FoldID if ui.cbFoldID.isChecked(): if not len(ui.txtFoldID.currentText()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldID.text()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldID.text()] = InData[ ui.txtFoldID.currentText()] except: print("Cannot load Fold ID!") return # FoldInfo if ui.cbFoldInfo.isChecked(): if not len(ui.txtFoldInfo.currentText()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldInfo.text()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldInfo.text()] = InData[ ui.txtFoldInfo.currentText()] except: print("Cannot load Fold Info!") return pass # Number of Scan if ui.cbNScan.isChecked(): if not len(ui.txtITrScan.currentText()): msgBox.setText( "Please enter Number of Scan variable name for Input Train!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeScan.currentText()): msgBox.setText( "Please enter Number of Scan variable name for Input Test!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrScan.text()): msgBox.setText( "Please enter Number of Scan variable name for Output Train!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeScan.text()): msgBox.setText( "Please enter Number of Scan variable name for Output Test!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrScan.text()] = InData[ ui.txtITrScan.currentText()] OutData[ui.txtOTeScan.text()] = InData[ ui.txtITeScan.currentText()] except: print("Cannot load NScan!") return # Train Analysis Level print("Calculating Analysis Level for Training Set ...") TrGroupFold = None FoldStr = "" if ui.cbFSubject.isChecked(): if not ui.rbFRun.isChecked(): TrGroupFold = TrSubject FoldStr = "Subject" else: TrGroupFold = np.concatenate((TrSubject, TrRun)) FoldStr = "Subject+Run" if ui.cbFTask.isChecked(): TrGroupFold = np.concatenate( (TrGroupFold, TrTaskIndex)) if TrGroupFold is not None else TrTaskIndex FoldStr = FoldStr + "+Task" if ui.cbFCounter.isChecked(): TrGroupFold = np.concatenate( (TrGroupFold, TrCounter)) if TrGroupFold is not None else TrCounter FoldStr = FoldStr + "+Counter" TrGroupFold = np.transpose(TrGroupFold) TrUniqFold = np.array( list(set(tuple(i) for i in TrGroupFold.tolist()))) TrFoldIDs = np.arange(len(TrUniqFold)) + 1 TrListFold = list() for gfold in TrGroupFold: for ufoldindx, ufold in enumerate(TrUniqFold): if (ufold == gfold).all(): currentID = TrFoldIDs[ufoldindx] break TrListFold.append(currentID) TrListFold = np.int32(TrListFold) TrListFoldUniq = np.unique(TrListFold) # Test Analysis Level print("Calculating Analysis Level for Testing Set ...") TeGroupFold = None if ui.cbFSubject.isChecked(): if not ui.rbFRun.isChecked(): TeGroupFold = TeSubject else: TeGroupFold = np.concatenate((TeSubject, TeRun)) if ui.cbFTask.isChecked(): TeGroupFold = np.concatenate( (TeGroupFold, TeTaskIndex)) if TeGroupFold is not None else TeTaskIndex if ui.cbFCounter.isChecked(): TeGroupFold = np.concatenate( (TeGroupFold, TeCounter)) if TeGroupFold is not None else TeCounter TeGroupFold = np.transpose(TeGroupFold) TeUniqFold = np.array( list(set(tuple(i) for i in TeGroupFold.tolist()))) TeFoldIDs = np.arange(len(TeUniqFold)) + 1 TeListFold = list() for gfold in TeGroupFold: for ufoldindx, ufold in enumerate(TeUniqFold): if (ufold == gfold).all(): currentID = TeFoldIDs[ufoldindx] break TeListFold.append(currentID) TeListFold = np.int32(TeListFold) TeListFoldUniq = np.unique(TeListFold) # Train Partition print("Partitioning Training Data ...") TrX = list() TrShape = None if Method == "PCA": svdmodel = PCA(n_components=NumFea, copy=False, tol=Tol) elif Method == "Kernel PCA": svdmodel = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\ coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False) else: svdmodel = IncrementalPCA(n_components=NumFea, copy=False, batch_size=Batch) for foldindx, fold in enumerate(TrListFoldUniq): dat = XTr[np.where(TrListFold == fold)] if ui.cbScale.isChecked() and ui.rbScale.isChecked(): dat = preprocessing.scale(dat) print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).") dat = svdmodel.fit_transform(dat) TrX.append(dat) if TrShape is None: TrShape = np.shape(dat) else: if not (TrShape == np.shape(dat)): print("ERROR: Train, Reshape problem for Fold " + str(foldindx + 1) + ", Shape: " + str(np.shape(dat))) return print("Train: View " + str(foldindx + 1) + " is extracted. Shape: " + str(np.shape(dat))) print("Training Shape: " + str(np.shape(TrX))) # Test Partition print("Partitioning Testing Data ...") TeX = list() TeShape = None for foldindx, fold in enumerate(TeListFoldUniq): dat = XTe[np.where(TeListFold == fold)] if ui.cbScale.isChecked() and ui.rbScale.isChecked(): dat = preprocessing.scale(dat) print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).") dat = svdmodel.fit_transform(dat) TeX.append(dat) if TeShape is None: TeShape = np.shape(dat) else: if not (TeShape == np.shape(dat)): print("Test: Reshape problem for Fold " + str(foldindx + 1)) return print("Test: View " + str(foldindx + 1) + " is extracted.") print("Testing Shape: " + str(np.shape(TeX))) model = RHA(Dim=NumFea, regularization=Regularization) print("Running Hyperalignment on Training Data ...") MappedXtr, G = model.train(TrX) print("Running Hyperalignment on Testing Data ...") MappedXte = model.test(TeX) # Train Dot Product print("Producting Training Data ...") TrHX = None TrErr = None for foldindx, fold in enumerate(TrListFoldUniq): TrErr = TrErr + ( G - MappedXtr[foldindx] ) if TrErr is not None else G - MappedXtr[foldindx] TrHX = np.concatenate( (TrHX, MappedXtr[foldindx] )) if TrHX is not None else MappedXtr[foldindx] OutData[ui.txtOTrData.text()] = TrHX foldindx = foldindx + 1 TrErr = TrErr / foldindx print("Train: alignment error ", np.linalg.norm(TrErr)) TrFoldErr.append(np.linalg.norm(TrErr)) # Train Dot Product print("Producting Testing Data ...") TeHX = None TeErr = None for foldindx, fold in enumerate(TeListFoldUniq): TeErr = TeErr + ( G - MappedXte[foldindx] ) if TeErr is not None else G - MappedXte[foldindx] TeHX = np.concatenate( (TeHX, MappedXte[foldindx] )) if TeHX is not None else MappedXte[foldindx] OutData[ui.txtOTeData.text()] = TeHX foldindx = foldindx + 1 TeErr = TeErr / foldindx print("Test: alignment error ", np.linalg.norm(TeErr)) TeFoldErr.append(np.linalg.norm(TeErr)) HAParam = dict() HAParam["Method"] = Method HAParam["Kernel"] = Kernel HAParam["Share"] = G HAParam["Level"] = FoldStr OutData["FunctionalAlignment"] = HAParam print("Saving ...") io.savemat(OutFile, mdict=OutData) print("Fold " + str(fold_all) + " is DONE: " + OutFile) print("Training -> Alignment Error: mean " + str(np.mean(TrFoldErr)) + " std " + str(np.std(TrFoldErr))) print("Testing -> Alignment Error: mean " + str(np.mean(TeFoldErr)) + " std " + str(np.std(TeFoldErr))) print("Kernel/SVD Hyperalignment is done.") msgBox.setText("Kernel/SVD Hyperalignment is done.") msgBox.setIcon(QMessageBox.Information) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_()
# params for fault detection-KNN with linear pca fd_knn_linear_pca = {'classifier': FaultDetectionKNN(), 'preprocessing_method': PCA(), 'model_name': 'Fault_Detection_KNN_linear_PCA', 'sampling_method': None, 'log_normalize': False, 'variables': ['k','alpha', 'n_components'], 'distributions': ['quniform','uniform', 'quniform'], 'arguments': [(2,200,1),(0,0.01),(1,139,1)], 'variable_type': {'k': 'estimator', 'alpha': 'estimator', 'n_components': 'preprocessor'}} # params for fault detection-KNN with radial PCA fd_knn_radial_pca = {'classifier': FaultDetectionKNN(), 'preprocessing_method': KernelPCA(kernel="rbf", eigen_solver = "arpack"), 'model_name': 'Fault_Detection_KNN_Radial_PCA', 'sampling_method': None, 'log_normalize': False, 'variables': ['k','alpha', 'n_components','gamma'], 'distributions': ['quniform','uniform', 'quniform','loguniform'], 'arguments': [(2,200,1),(0,0.01),(1,100,1),(1e-6,300)], 'variable_type': {'k': 'estimator', 'alpha': 'estimator', 'n_components': 'preprocessor', 'gamma': 'preprocessor'}} # params for adaptive Mahalanobis distance-KNN mad_knn = {'classifier': MahalanobisDistanceKNN(), 'preprocessing_method': None, 'model_name': 'Mahalanobis_Distance_KNN', 'sampling_method': None, 'log_normalize': False,
# _*_ coding: utf-8 _*_ import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA, KernelPCA from sklearn.datasets import make_circles np.random.seed(0) X, y = make_circles(n_samples=400, factor=.3, noise=.05) kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA() X_pca = pca.fit_transform(X) # plot results plt.figure() plt.subplot(2, 2, 1, aspect='equal') plt.title('Original space') reds = y == 0 blues = y == 1 plt.scatter(X[reds, 0], X[reds, 1], c='red', s=20, edgecolor='k') plt.scatter(X[blues, 0], X[blues, 1], c='blue', s=20, edgecolors='k') plt.xlabel('$x_1$') plt.ylabel('$x_2$')
time = 6.8710 audio = audio[phi:int(phi+audio_fs*time)] fs = eeg_fs #This time around we're going to be iterating through all the songs for one patient #instead of all patients for one song and traing i tlike that indices = [] songs = [] for i in range(len(meta_data)): if meta_data[i]['subject'] == 'P01' and meta_data[i]['trial_type']=='perception' : indices.append(i) songs.append(meta_data[i]['stimulus_id']) #%% #Dimensionality reduction k = 1 bumble = KernelPCA(n_components = k, kernel='linear') ###################################################################### # NN Training ###################################################################### test_audio = 24 nets = [] print('Total Trials = around ' + str(len(indices))) #Iterating through each patient who has listed to that one song: for i, trial in enumerate(indices): #Read in audio: audio_fs, audio = read_audio(os.path.join(cwd, 'Audio', str(songs[i]) + ".wav")) audio = audio[:,0] phi = 82500 time = 6.8710 audio = audio[phi:int(phi+audio_fs*time)]
ax[1].scatter(x_kpca[y == 1, 0], np.zeros((500, 1)) - 0.02, marker='^', alpha=0.5) ax[0].set_xlabel('PC 1') ax[0].set_xlabel('PC 2') ax[1].set_ylim([-1, 1]) ax[1].set_yticks([]) ax[1].set_xlabel('PC 1') plt.show() from sklearn.decomposition import KernelPCA X, y = make_moons(n_samples=100, random_state=123) scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) X_skernpca = scikit_kpca.fit_transform(X) plt.scatter(X_skernpca[y == 0, 0], X_skernpca[y == 0, 1], color='red', marker='^', alpha=0.5) plt.scatter(X_skernpca[y == 1, 0], X_skernpca[y == 1, 1], color='blue', marker='o', alpha=0.5) plt.xlabel('PC1') plt.ylabel('PC2')
path='../../figs/out/%s/%s/scaled.png' % (scriptname, dataset)) #Initiate KPCAwith various kernels # As I'm using 500 variables, 0.002 is the default gamma (1/n_variables) # I only explicitly state it at this point so I can display it on graphs gamma = 10 kpcas = [] #Use standard PCA for comparison kpcas.append(('standard ', 'std_', PCA(n_components=2))) #Linear kernal has no need for gamma kpcas.append(('Linear K', 'lin_k', KernelPCA(n_components=2, kernel='linear'))) kpcas.append( ('RBF K', 'rbf_k', KernelPCA(n_components=2, kernel='rbf', gamma=gamma))) kpcas.append(('Polynomial K', 'ply_k', KernelPCA(n_components=2, kernel='poly', gamma=gamma))) kpcas.append(('Sigmoid K', 'sig_k', KernelPCA(n_components=2, kernel='sigmoid', gamma=gamma))) kpcas.append(('Cosine K', 'cos_k', KernelPCA(n_components=2, kernel='cosine', gamma=gamma))) for kernel, abbreviation, kpca in kpcas: X_kpca = kpca.fit_transform(X_scaled) plot_scatter(X_kpca, y,
for j in range(1, category): sim = np.vstack((sim, sim_n[j])) #PCA components = 0.99 #canshu if PCAflag == 1: pca = PCA(n_components=components, svd_solver='full') pca.fit(train) train_new = pca.transform(train) sim_new = pca.transform(sim) print('pca.explained_variance_ratio_', pca.explained_variance_ratio_) print('sum(pca.explained_variance_ratio_)', sum(pca.explained_variance_ratio_)) print(pca.singular_values_) else: kpca = KernelPCA(n_components=components, kernel="rbf", fit_inverse_transform=True) kpca.fit(train) train_new = kpca.transform(train) sim_new = kpca.transform(sim) print('pca.explained_variance_ratio_', pca.explained_variance_ratio_) print('sum(pca.explained_variance_ratio_)', sum(pca.explained_variance_ratio_)) print(pca.singular_values_) print('train.shape', train.shape) print('train_new.shape', train_new.shape) if plotflag == 1: plt.figure(figsize=(10, 8)) for i in range(0, category): plt.subplot(1, 2, 1)
graphList = [] label = [] for model_name in os.listdir(library_folder): print('Loading', model_name) label.append(model_name.split('_')[2]) model = cobra.io.read_sbml_model(library_folder + model_name) g = modelNet(model) graphList.append(g) print('Done') kernel = gk.WeisfeilerLehman(base_kernel=gk.VertexHistogram, normalize=True) K = pd.DataFrame(kernel.fit_transform(graphList)) # 2-D scatterplot kpca = KernelPCA(kernel="precomputed", n_components=2, n_jobs=-1) X_kpca = kpca.fit_transform(K) sns.scatterplot(x=X_kpca[:, 0], y=X_kpca[:, 1], hue=label) # 3-D scatterplot kpca = KernelPCA(kernel="precomputed", n_components=3, n_jobs=-1) X_kpca = kpca.fit_transform(K) fig = pyplot.figure(figsize=(8, 8)) ax = Axes3D(fig) # make color label td = {'old.xml': 'red', 'young.xml': 'blue'} hue = [td[l] for l in label]