def test_kernel_pca_sparse(): """Test that kPCA works on a sparse data input. Same test as ``test_kernel_pca except inverse_transform`` since it's not implemented for sparse matrices. """ rng = np.random.RandomState(0) X_fit = sp.csr_matrix(rng.random_sample((5, 4))) X_pred = sp.csr_matrix(rng.random_sample((2, 4))) for eigen_solver in ("auto", "arpack", "randomized"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA( 4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=False, random_state=0, ) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # transform new data X_pred_transformed = kpca.transform(X_pred) assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1] # inverse transform: not available for sparse matrices # XXX: should we raise another exception type here? For instance: # NotImplementedError. with pytest.raises(NotFittedError): kpca.inverse_transform(X_pred_transformed)
def KPCA(self, x_train): m_d2 = KernelPCA(n_components=2, kernel="rbf", fit_inverse_transform=True, gamma=10) x_d2 = m_d2.fit_transform(x_train) x_d2 = m_d2.inverse_transform(x_d2) m_d3 = KernelPCA(n_components=3, kernel="rbf", fit_inverse_transform=True, gamma=10) x_d3 = m_d3.fit_transform(x_train) x_d3 = m_d3.inverse_transform(x_d3) return x_d2, x_d3
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=True) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def kPCA_visualization1d(X, y): kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA(n_components=1) X_pca = pca.fit_transform(X) class_1 = [] class_0 = [] for i in range(0, len(y)): if y[i] == 1: class_1.append( list( X_kpca[i] )[0] ) else: class_0.append( list( X_kpca[i] )[0] ) print "check" print class_1[:10] import numpy from matplotlib import pyplot pyplot.hist(class_1, 50, alpha=0.5, label='class 1' ) pyplot.hist(class_0, 50, alpha=0.5, label='class 0') pyplot.legend(loc='upper right') pyplot.show()
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=True) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed, []) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly", histogram): # histogram kernel produces singular matrix inside linalg.solve # XXX use a least-squares approximation? inv = not callable(kernel) # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed.size, 0) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def dimReduction_kpca(self, dataset, kernel, fit_inverse, gamma): kpca = KernelPCA(kernel=kernel, fit_inverse_transform=fit_inverse, gamma=gamma) dataset_kpca = kpca.fit_transform(dataset) dataset_back = kpca.inverse_transform(dataset_kpca) return dataset_kpca
def ReconginitionVector(people_set): # step1: load the face image data ,get the matrix consists of all image FaceMat = people_set kpca = KernelPCA(kernel="poly", degree=1, fit_inverse_transform=True, gamma=1) X_kpca = kpca.fit_transform(FaceMat) X_back = kpca.inverse_transform(X_kpca) # step2: average the FaceMat # avgImg = np.mean(FaceMat,1) # step3: calculate the difference of avgimg and all image data(FaceMat) # diffTrain = FaceMat-avgImg #step4: calculate eigenvector of covariance matrix (because covariance matrix will cause memory error) # eigvals,eigVects = linal.eig(mat(diffTrain.T*diffTrain)) # eigSortIndex = argsort(-eigvals) # for i in xrange(shape(FaceMat)[1]): # if (eigvals[eigSortIndex[:i]]/eigvals.sum()).sum() >= selecthr: # eigSortIndex = eigSortIndex[:i] # break # covVects = diffTrain * eigVects[:,eigSortIndex] # covVects is the eigenvector of covariance matrix # avgImg 是均值图像,covVects是协方差矩阵的特征向量,diffTrain是偏差矩阵 covVects = kpca.alphas_ return X_kpca, X_back, covVects, kpca
def reconstruct(recon_pc_num): transformer = KernelPCA(n_components=recon_pc_num, kernel=self.kernel, gamma=self.gamma, fit_inverse_transform=True, n_jobs=-1, random_state=self.random_state) X_transformed = transformer.fit_transform(self.matrix) recon_matrix = transformer.inverse_transform(X_transformed) assert_description = 'The shape of the reconstruction matrix should be equal to that of the initial matrix.' assert recon_matrix.shape == self.matrix.shape, assert_description return recon_matrix
def reconstruct(recon_pc_num): transformer = KernelPCA(n_components=recon_pc_num, kernel=self.kernel, gamma=self.gamma, fit_inverse_transform=True, n_jobs=-1) X_transformed = transformer.fit_transform(self.matrix) # inverse_transform方法将降维后的矩阵重新映射到原来的特征空间 recon_matrix = transformer.inverse_transform(X_transformed) assert recon_matrix.shape == self.matrix.shape, '重构矩阵的维度应与初始矩阵的维度一致' return recon_matrix
def test_kernel_pca_inverse_transform(kernel): X, *_ = make_blobs(n_samples=100, n_features=4, centers=[[1, 1, 1, 1]], random_state=0) kp = KernelPCA(n_components=2, kernel=kernel, fit_inverse_transform=True) X_trans = kp.fit_transform(X) X_inv = kp.inverse_transform(X_trans) assert_allclose(X, X_inv)
def kpca(n,x,kernel): print('Running KPCA') t0 = time() kpca = KernelPCA(n_components=n, kernel=kernel, fit_inverse_transform=True)#, gamma=10) x_kpca = kpca.fit_transform(x) x_proj_kpca = kpca.inverse_transform(x_kpca) print("done in %0.3fs" % (time() - t0)) print("(see results below)") return x_kpca, x_proj_kpca
def scikit_kpca(self, max_eigVec): kpca = KernelPCA(kernel='rbf', n_components=max_eigVec, fit_inverse_transform=True, gamma=self.C) kpca.fit(self.training_data) x_invkpca = kpca.fit_transform(self.test_data) x_inv = kpca.inverse_transform(x_invkpca) return x_inv
def plot_kpca(): X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42) lin_pca = KernelPCA(n_components=2, kernel="linear", fit_inverse_transform=True) rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True) sig_pca = KernelPCA(n_components=2, kernel="sigmoid", gamma=0.001, coef0=1, fit_inverse_transform=True) y = t > 6.9 plt.figure(figsize=(11, 4)) for subplot, pca, title in ((131, lin_pca, "Linear kernel"), (132, rbf_pca, "RBF kernel, $\gamma=0.04$"), (133, sig_pca, "Sigmoid kernel, $\gamma=10^{-3}, r=1$")): X_reduced = pca.fit_transform(X) if subplot == 132: X_reduced_rbf = X_reduced plt.subplot(subplot) # plt.plot(X_reduced[y, 0], X_reduced[y, 1], "gs") # plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], "y^") plt.title(title, fontsize=14) plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot) plt.xlabel("$z_1$", fontsize=18) if subplot == 131: plt.ylabel("$z_2$", fontsize=18, rotation=0) plt.grid(True) save_fig("kernel_pca_plot") plt.show() # 逆过程压缩 plt.figure(figsize=(6, 5)) X_inverse = rbf_pca.inverse_transform(X_reduced_rbf) ax = plt.subplot(121, projection='3d') ax.view_init(10, -70) ax.scatter(X_inverse[:, 0], X_inverse[:, 1], X_inverse[:, 2], c=t, cmap=plt.cm.hot, marker="x") ax.set_xlabel("") ax.set_ylabel("") ax.set_zlabel("") ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) save_fig("preimage_plot", tight_layout=False) plt.show()
def kpca_feature(rect_list): kpca = KernelPCA(n_components=10, kernel="rbf", fit_inverse_transform=True) kpca_feature = kpca.fit_transform(np.reshape(rect_list, (503, -1)) / 256) print( 'Explained Variance Ratio: ', explained_variance_score( np.reshape(rect_list, (503, -1)) / 256, kpca.inverse_transform(kpca_feature))) # 0.6577 return kpca_feature
def WithKernelPCA(MaxK, data, label): X = data y = label kpca = KernelPCA(n_components=6, kernel='poly', fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) accuracy = With_folds(MaxK, X_back, y) return accuracy
class KDEKPCAGen(GenBase): def __init__(self, kernel="gaussian", bandwidth=0.1, n_components=None, kernel_pca="cosine"): super().__init__() # kernel: “linear” | “poly” | “rbf” | “sigmoid” | “cosine” | self.pca = KernelPCA(n_components=n_components, kernel=kernel_pca, fit_inverse_transform=True) # , gamma=10) self.bandwidth = bandwidth self.kernel = kernel self.manifold = None def fit(self, x): x_pca = self.pca.fit_transform(x) self.manifold = KDEGen(kernel=self.kernel, bandwidth=self.bandwidth).fit(x_pca) return self def sample_radius(self, x_exp, n_min_kernels=20, r=None, n_samples=1, random_state=None): x_exp_pca = self.pca.transform(x_exp) x_sample_pca = self.manifold.sample_radius(x_exp_pca, n_min_kernels=n_min_kernels, r=r, n_samples=n_samples, random_state=random_state) x_sample = self.pca.inverse_transform(x_sample_pca) return x_sample def sample(self, n_samples=1, random_state=None): x_sample_pca = self.manifold.sample(n_samples=n_samples, random_state=random_state) x_sample = self.pca.inverse_transform(x_sample_pca) return x_sample
def reduce_kpca(X, kern, retall=False): """ reduce_kpca(X, components, kern, retall=False) Reduce dim by Kernel PCA """ kpca = KernelPCA(kernel=kern, fit_inverse_transform=True) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) if not retall: return X_kpca, X_back else: return X_kpca, X_back, kpca
def fit_kernel_pca(sz_array, alpha, gamma, nc, return_fit=True): kernel_pca = KernelPCA(alpha=alpha, gamma=gamma, n_components=nc, kernel='rbf', fit_inverse_transform=True) transformed = kernel_pca.fit_transform(sz_array) inverse_transformed = kernel_pca.inverse_transform(transformed) losses = mse(sz_array.T, inverse_transformed.T, multioutput='raw_values').T if return_fit: return losses, alpha, gamma, transformed, inverse_transformed, kernel_pca else: return losses, alpha, gamma, transformed, inverse_transformed
def test_kernel_pca_inverse_transform_reconstruction(): # Test if the reconstruction is a good approximation. # Note that in general it is not possible to get an arbitrarily good # reconstruction because of kernel centering that does not # preserve all the information of the original data. X, *_ = make_blobs(n_samples=100, n_features=4, random_state=0) kpca = KernelPCA(n_components=20, kernel='rbf', fit_inverse_transform=True, alpha=1e-3) X_trans = kpca.fit_transform(X) X_reconst = kpca.inverse_transform(X_trans) assert np.linalg.norm(X - X_reconst) / np.linalg.norm(X) < 1e-1
def fit_kernel_pca(sz_array, alpha, gamma, nc): kernel_pca = KernelPCA(alpha=alpha, gamma=gamma, n_components=n, kernel='rbf', fit_inverse_transform=True) transformed = kernel_pca.fit_transform(sz_array) inverse_transformed = kernel_pca.inverse_transform(transformed) losses = [] for i in range(transformed.shape[0]): losses.append(mse(sz_array[i, :], inverse_transformed[i, :])) return np.mean(losses), alpha, gamma, inverse_transformed
def plot_kpca_results(X, y): kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA() X_pca = pca.fit_transform(X) plt.figure(figsize=(10, 10)) plt.subplot(2, 2, 1, aspect='equal') plt.title("Original space") reds = y == 0 blues = y == 1 plt.scatter(X[reds, 0], X[reds, 1], c="red", s=1) plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=1) plt.xlabel("$x_1$") plt.ylabel("$x_2$") X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50)) X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T # projection on the first principal component (in the phi space) Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape) plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower') plt.subplot(2, 2, 2, aspect='equal') plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=1) plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=1) plt.title("Projection by PCA") plt.xlabel("1st principal component") plt.ylabel("2nd component") plt.subplot(2, 2, 3, aspect='equal') plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=1) plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=1) plt.title("Projection by KPCA") plt.xlabel(r"1st principal component in space induced by $\phi$") plt.ylabel("2nd component") plt.subplot(2, 2, 4, aspect='equal') plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=1) plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=1) plt.title("Original space after inverse transform") plt.xlabel("$x_1$") plt.ylabel("$x_2$") plt.tight_layout() plt.show()
def main(): #set the timer start = time.time() #load the data trainX = np.load('trainX.npy') testX = np.load('testX.npy') trainY = np.load('trainY.npy') testY = np.load('testY.npy') print('\n!!! Data Loading Completed !!!\n') #get the 1st digit zero and plot it zero = trainX[14].reshape(28, 28) plt.imshow(zero, cmap=cm.Greys_r) plt.savefig("original"+str(trainY[14])+".png") #plt.show() #apply kpca kpca = KernelPCA(kernel='rbf', gamma=1, fit_inverse_transform=True) kpca.fit(trainX[0:3000]) trainX_kpca = kpca.transform(trainX) testX_kpca = kpca.transform(testX) #do inverse transform and plot the result orig = kpca.inverse_transform(trainX_kpca) img = orig[14].reshape(28, 28) plt.imshow(img, cmap=cm.Greys_r) plt.savefig("reconstructed"+str(trainY[14])+".png") #plt.show() selector = SelectPercentile(f_classif, percentile=5) selector.fit(trainX_kpca, trainY) trainX = selector.transform(trainX_kpca) testX = selector.transform(testX_kpca) #fit a classifier parameters = {'n_neighbors' : list(np.arange(15)+1)} clf = GridSearchCV(KNeighborsClassifier(weights='distance', n_jobs=-1), parameters) clf.fit(trainX, trainY) pred = clf.predict(testX) print accuracy_score(testY, pred) print confusion_matrix(testY, pred) #print(clf.best_params_) print('total : %d, correct : %d, incorrect : %d\n' %(len(pred), np.sum(pred == testY), np.sum(pred != testY))) print('Test Time : %f Minutes\n' %((time.time()-start)/60))
def kpca(data, n_components, train, test, kernel='linear', gamma=None, degree=3, coef0=1, alpha=0.1, evaluation=False): # Kernel PCA kpca = KernelPCA(n_components, fit_inverse_transform=True, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, alpha=alpha).fit(data[train]) data_reduced = kpca.transform(data) if evaluation: data_rec = kpca.inverse_transform(data_reduced) loss = mean_squared_error(data[test], data_rec[test]) return loss #name = 'Kernel PCA ('+kernel+')' name = 'Kernel PCA' return data_reduced, name, kpca.inverse_transform
def kernel_pca_fit(n_components, train, test, shape, kernel="linear"): # Available kernels: # "linear", "poly", "rbf", "sigmoid", "cosine", "precomputed" # Set and fit KernelPCA kpca = KernelPCA(n_components=n_components, kernel=kernel, fit_inverse_transform=True) kpca.fit(train) # Reduce dimension test_reduced = kpca.transform(test) # Recover data from the lower dimension test_recovered = kpca.inverse_transform(test_reduced) # Calculate the MSE mse = np.mean((test_recovered - test)**2) # Reshape into a matrix test_recovered = test_recovered.reshape(shape) return kpca, test_recovered, mse
def main(): mnisttrain = fetch_mldata('MNIST original') xtrain = mnisttrain.data ytrain = mnisttrain.target # Reduce to 16 dimensions in feature n_components = 16 time_start = time.time() # Kernel PCA on MNIST Data kpca = KernelPCA(n_components=n_components, fit_inverse_transform=True, kernel='rbf', eigen_solver='arpack', n_jobs=-1) xtrain_kpca = kpca.fit_transform(xtrain) time_end = time.time() print("done in %0.3fs" % (time.time() - time_start)) xtrain_inv_proj = kpca.inverse_transform(xtrain_kpca) n = 10 plt.figure(figsize=(20, 4)) for i in range(n): # display the original image index = random.randint(1, 60000) ax = plt.subplot(2, n, i + 1) img = xtrain[index] plt.imshow(np.reshape(img, (28, 28))) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # display the decoded image ax = plt.subplot(2, n, i + 1 + n) img = xtrain_inv_proj[index] plt.imshow(np.reshape(img, (28, 28))) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.savefig('./kpca_results/kpca_mnist.png') #classify on svm with reduced data svm_classify(xtrain_kpca, ytrain)
def kPCA_visualization2d(X, y): kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA(n_components=2) X_pca = pca.fit_transform(X) class_1 = [] class_0 = [] for i in range(0, len(y)): if y[i] == 1: class_1.append( X_kpca[i] ) else: class_0.append( X_kpca[i] ) class_0_x = [] class_0_y = [] class_1_x = [] class_1_y = [] for x in class_0: class_0_x.append( x[0] ) class_0_y.append( x[1] ) for x in class_1: class_1_x.append( x[0] ) class_1_y.append( x[1] ) # Plot #print principle component plt.title("kPCA kernel = linear") plt.plot( class_0_x, class_0_y, "ro") plt.plot( class_1_x, class_1_y, "go") plt.title("Projection by PCA") plt.xlabel("1st principal component") plt.ylabel("2nd component") plt.show()
def test_kernel_pca(): """Nominal test for all solvers and all known kernels + a custom one It tests - that fit_transform is equivalent to fit+transform - that the shapes of transforms and inverse transforms are correct """ rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack", "randomized"): for kernel in ("linear", "rbf", "poly", histogram): # histogram kernel produces singular matrix inside linalg.solve # XXX use a least-squares approximation? inv = not callable(kernel) # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert X_fit_transformed.size != 0 # transform new data X_pred_transformed = kpca.transform(X_pred) assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1] # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) assert X_pred2.shape == X_pred.shape
def nonLinearPCA(): X, y = generateData(method='nonLinear') kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA() # pca = PCA(2) project from 64 to 2 dimensions X_pca = pca.fit_transform(X) X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50)) X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T # projection on the first principal component (in the phi space) Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape) plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower') reds = y == 0 blues = y == 1 visualizePCA(X1, X2, Z_grid, X_pca, reds, blues, X_kpca, X_back)
def pca_kernel(df, kernel='rbf'): if 'class' in df: X = df.drop(['class'], axis=1) else: X = df kpca = KernelPCA(kernel=kernel, fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) columns = [] for i in range(1, len(X_back[0])+1): columns.append('pca-'+ kernel +str(i)) X_back = pd.DataFrame(data = X_back, columns = columns) if 'class' in df: new = pd.concat([X_back, df['class']], axis=1) else: new = X_back return new
def hyper_parameter_tuning_kernel(): from sklearn.datasets import make_swiss_roll from sklearn.decomposition import KernelPCA from sklearn.model_selection import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42) y = t > 6.9 n_component = 2 clf = Pipeline([("kpca", KernelPCA(n_components=n_component)), ("log_reg", LogisticRegression(solver="liblinear"))]) param_grid = [{ "kpca__kernel": ["rbf", "sigmoid"], "kpca__gamma": np.linspace(0.03, 0.05, 10) }] grid_cv = GridSearchCV(clf, param_grid, cv=3, n_jobs=-1) grid_cv.fit(X, y) best_param = grid_cv.best_params_ best_kernel = best_param["kpca__kernel"] best_gamma = best_param["kpca__gamma"] best_pca = KernelPCA(n_components=n_component, kernel=best_kernel, gamma=best_gamma, fit_inverse_transform=True) reduced_x = best_pca.fit_transform(X) preimage_x = best_pca.inverse_transform(reduced_x) from sklearn.metrics import mean_squared_error preimage_error = mean_squared_error(preimage_x, X) print("Best hyperparameter : {}".format(best_param)) print("Preimage error of best model : {}".format(preimage_error))
def looking_for_param_by_grid_search(): X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42) y = t > 6.9 clf = Pipeline([("kpca", KernelPCA(n_components=2)), ("log_reg", LogisticRegression())]) param_grid = [{ "kpca__gamma": np.linspace(0.03, 0.05, 10), "kpca__kernel": ["rbf", "sigmoid"] }] grid_search = GridSearchCV(clf, param_grid, cv=3) grid_search.fit(X, y) print(grid_search.best_params_) rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True) X_reduced = rbf_pca.fit_transform(X) X_preimage = rbf_pca.inverse_transform(X_reduced) print(mean_squared_error(X, X_preimage))
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): """Histogram kernel implemented as a callable.""" assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly", histogram): # histogram kernel produces singular matrix inside linalg.solve # XXX use a least-squares approximation? inv = not callable(kernel) # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed, []) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def main(): xtrain, ytrain, filenames, label_names = get_cifar() n_components = 36 time_start = time.time() kpca = KernelPCA(n_components=n_components, fit_inverse_transform=True, kernel='rbf', eigen_solver='arpack', n_jobs=-1) xtrain_kpca = kpca.fit_transform(xtrain) time_end = time.time() print("done in %0.3fs" % (time.time() - time_start)) xtrain_inv_proj = kpca.inverse_transform(xtrain_kpca) n = 10 plt.figure(figsize=(20, 4)) for i in range(n): ax = plt.subplot(2, n, i + 1) img = xtrain[i, 0:1024] plt.imshow(np.reshape(img, (32, 32))) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax = plt.subplot(2, n, i + 1 + n) img = xtrain_inv_proj[i, 0:1024] plt.imshow(np.reshape(img, (32, 32))) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.savefig('./kpca_results/kpca_cifar_10.png') svm_classify(xtrain_kpca, ytrain) message = client.messages.create( body="Hello Good News! Your KPCA CIFAR-10 is done!", from_="+19733213685", to="+19173707991") print(message.sid)
def kpca(data, n_components, train, test, c=None, sample_weight=None, kernel='linear', gamma=None, degree=3, coef0=1, alpha=0.1, evaluation=False, overwrite=True): # Kernel PCA kpca = KernelPCA(n_components, fit_inverse_transform=True, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, alpha=alpha).fit(data[train]) data_reduced = np.zeros((data.shape[0], n_components)) data_reduced[train + test] = kpca.transform(data[train + test]) if evaluation: data_rec = kpca.inverse_transform(data_reduced[test]) loss = mean_squared_error(data[test], data_rec) return loss name = 'KPCA' if overwrite: # Save the model save_model(kpca, name, c) return data_reduced, name, kpca.inverse_transform
# Andreas Mueller # License: BSD import numpy as np import pylab as pl from sklearn.decomposition import PCA, KernelPCA from sklearn.datasets import make_circles np.random.seed(0) X, y = make_circles(n_samples=400, factor=.3, noise=.05) kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA() X_pca = pca.fit_transform(X) # Plot results pl.figure() pl.subplot(2, 2, 1, aspect='equal') pl.title("Original space") reds = y == 0 blues = y == 1 pl.plot(X[reds, 0], X[reds, 1], "ro") pl.plot(X[blues, 0], X[blues, 1], "bo") pl.xlabel("$x_1$") pl.ylabel("$x_2$")
ax.set_zticks(np.arange(small_df.prof.min().astype('int'),small_df.prof.max().astype('int')+1,10)) ax.set_xlabel('Lon') ax.set_ylabel('Lat') ax.set_zlabel('Prof') plt.show() # analysis class_data_index = [0,1,2] # which column from the data frame? small_data = small_df.ix[:,class_data_index].values # Principal components analysis : which is the data dimensionality? kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) small_data_kpca = kpca.fit_transform(small_data) small_data_kpca_back = kpca.inverse_transform(small_data_kpca) small_data_kpca.shape,small_data_kpca_back.shape fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(small_data_kpca_back[:,0], small_data_kpca_back[:,1], small_data_kpca_back[:,2], c=small_df.mag.values, s=15*small_df.mag.values, marker='o',cmap=plt.cm.RdYlBu_r) ax.set_zlim3d(small_df.prof.max(),small_df.prof.min()) ax.set_xticks(np.linspace(ax.get_xlim()[0],ax.get_xlim()[1],4)) ax.set_yticks(np.linspace(ax.get_ylim()[0],ax.get_ylim()[1],4)) ax.set_zticks(np.arange(small_df.prof.min().astype('int'),small_df.prof.max().astype('int')+1,10)) plt.title("KernelPCA reconstructed data") plt.show() pca = PCA(n_components = 'mle')
def RBF_kernel_PCA(xList, componentNum): kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(xList) X_back = kpca.inverse_transform(X_kpca) return X_back
from sklearn.datasets import make_circles # Set the seed for random number generator np.random.seed(7) # Generate samples X, y = make_circles(n_samples=500, factor=0.2, noise=0.04) # Perform PCA pca = PCA() X_pca = pca.fit_transform(X) # Perform Kernel PCA kernel_pca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kernel_pca = kernel_pca.fit_transform(X) X_inverse = kernel_pca.inverse_transform(X_kernel_pca) # Plot original data class_0 = np.where(y == 0) class_1 = np.where(y == 1) plt.figure() plt.title("Original data") plt.plot(X[class_0, 0], X[class_0, 1], "ko", mfc='none') plt.plot(X[class_1, 0], X[class_1, 1], "kx") plt.xlabel("1st dimension") plt.ylabel("2nd dimension") # Plot PCA projection of the data plt.figure() plt.plot(X_pca[class_0, 0], X_pca[class_0, 1], "ko", mfc='none') plt.plot(X_pca[class_1, 0], X_pca[class_1, 1], "kx")
def pca(self): kpca = KernelPCA(kernel="linear", fit_inverse_transform = True) utility_normal_kpca = kpca.fit_transform(self.ds.utility_normal) self.utility_normal_back = kpca.inverse_transform(utility_normal_kpca)