예제 #1
0
def test_kernel_pca_sparse():
    """Test that kPCA works on a sparse data input.

    Same test as ``test_kernel_pca except inverse_transform`` since it's not
    implemented for sparse matrices.
    """
    rng = np.random.RandomState(0)
    X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
    X_pred = sp.csr_matrix(rng.random_sample((2, 4)))

    for eigen_solver in ("auto", "arpack", "randomized"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(
                4,
                kernel=kernel,
                eigen_solver=eigen_solver,
                fit_inverse_transform=False,
                random_state=0,
            )
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1]

            # inverse transform: not available for sparse matrices
            # XXX: should we raise another exception type here? For instance:
            # NotImplementedError.
            with pytest.raises(NotFittedError):
                kpca.inverse_transform(X_pred_transformed)
예제 #2
0
 def KPCA(self, x_train):
         m_d2 = KernelPCA(n_components=2, kernel="rbf", fit_inverse_transform=True, gamma=10)
         x_d2 = m_d2.fit_transform(x_train)
         x_d2 = m_d2.inverse_transform(x_d2)
         m_d3 = KernelPCA(n_components=3, kernel="rbf", fit_inverse_transform=True, gamma=10)
         x_d3 = m_d3.fit_transform(x_train)
         x_d3 = m_d3.inverse_transform(x_d3)
         return x_d2, x_d3
예제 #3
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4,
                             kernel=kernel,
                             eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
def kPCA_visualization1d(X, y):
   
    kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA(n_components=1)
    X_pca = pca.fit_transform(X)

    class_1 = []
    class_0 = []

    for i in range(0, len(y)):
        
        if y[i] == 1:
            class_1.append(  list( X_kpca[i] )[0] )
        else:
            class_0.append(  list( X_kpca[i] )[0] )
    print "check"
    print class_1[:10]
    import numpy
    from matplotlib import pyplot
    

    pyplot.hist(class_1, 50, alpha=0.5, label='class 1' )  
    pyplot.hist(class_0, 50, alpha=0.5, label='class 0')

    pyplot.legend(loc='upper right')
    pyplot.show()
예제 #5
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed, [])

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert_equal(kwargs, {})  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed.size, 0)

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert_equal(X_pred2.shape, X_pred.shape)
 def dimReduction_kpca(self, dataset, kernel, fit_inverse, gamma):
     kpca = KernelPCA(kernel=kernel,
                      fit_inverse_transform=fit_inverse,
                      gamma=gamma)
     dataset_kpca = kpca.fit_transform(dataset)
     dataset_back = kpca.inverse_transform(dataset_kpca)
     return dataset_kpca
예제 #8
0
def ReconginitionVector(people_set):
    # step1: load the face image data ,get the matrix consists of all image
    FaceMat = people_set
    kpca = KernelPCA(kernel="poly",
                     degree=1,
                     fit_inverse_transform=True,
                     gamma=1)
    X_kpca = kpca.fit_transform(FaceMat)
    X_back = kpca.inverse_transform(X_kpca)
    # step2: average the FaceMat
    #     avgImg = np.mean(FaceMat,1)
    # step3: calculate the difference of avgimg and all image data(FaceMat)
    #     diffTrain = FaceMat-avgImg
    #step4: calculate eigenvector of covariance matrix (because covariance matrix will cause memory error)
    #     eigvals,eigVects = linal.eig(mat(diffTrain.T*diffTrain))
    #     eigSortIndex = argsort(-eigvals)
    #     for i in xrange(shape(FaceMat)[1]):
    #         if (eigvals[eigSortIndex[:i]]/eigvals.sum()).sum() >= selecthr:
    #             eigSortIndex = eigSortIndex[:i]
    #             break
    #     covVects = diffTrain * eigVects[:,eigSortIndex] # covVects is the eigenvector of covariance matrix
    # avgImg 是均值图像,covVects是协方差矩阵的特征向量,diffTrain是偏差矩阵

    covVects = kpca.alphas_
    return X_kpca, X_back, covVects, kpca
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4,
                             kernel=kernel,
                             eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed, [])

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
 def reconstruct(recon_pc_num):  
     transformer = KernelPCA(n_components=recon_pc_num, kernel=self.kernel, gamma=self.gamma, 
         fit_inverse_transform=True, n_jobs=-1, random_state=self.random_state)
     X_transformed = transformer.fit_transform(self.matrix)
     recon_matrix = transformer.inverse_transform(X_transformed)
     assert_description = 'The shape of the reconstruction matrix should be equal to that of the initial matrix.'
     assert recon_matrix.shape == self.matrix.shape, assert_description
     return recon_matrix
예제 #11
0
 def reconstruct(recon_pc_num):  
     transformer = KernelPCA(n_components=recon_pc_num, kernel=self.kernel, 
                             gamma=self.gamma, fit_inverse_transform=True, n_jobs=-1)
     X_transformed = transformer.fit_transform(self.matrix)
     # inverse_transform方法将降维后的矩阵重新映射到原来的特征空间
     recon_matrix = transformer.inverse_transform(X_transformed)
     assert recon_matrix.shape == self.matrix.shape, '重构矩阵的维度应与初始矩阵的维度一致'
     return recon_matrix
예제 #12
0
def test_kernel_pca_inverse_transform(kernel):
    X, *_ = make_blobs(n_samples=100, n_features=4, centers=[[1, 1, 1, 1]],
                       random_state=0)

    kp = KernelPCA(n_components=2, kernel=kernel, fit_inverse_transform=True)
    X_trans = kp.fit_transform(X)
    X_inv = kp.inverse_transform(X_trans)
    assert_allclose(X, X_inv)
def kpca(n,x,kernel):
    print('Running KPCA') 
    t0 = time()
    kpca = KernelPCA(n_components=n, kernel=kernel, fit_inverse_transform=True)#, gamma=10)
    x_kpca = kpca.fit_transform(x)
    x_proj_kpca = kpca.inverse_transform(x_kpca)
    print("done in %0.3fs" % (time() - t0))  
    print("(see results below)")
    return x_kpca, x_proj_kpca
 def scikit_kpca(self, max_eigVec):
     kpca = KernelPCA(kernel='rbf',
                      n_components=max_eigVec,
                      fit_inverse_transform=True,
                      gamma=self.C)
     kpca.fit(self.training_data)
     x_invkpca = kpca.fit_transform(self.test_data)
     x_inv = kpca.inverse_transform(x_invkpca)
     return x_inv
def plot_kpca():
    X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

    lin_pca = KernelPCA(n_components=2,
                        kernel="linear",
                        fit_inverse_transform=True)
    rbf_pca = KernelPCA(n_components=2,
                        kernel="rbf",
                        gamma=0.0433,
                        fit_inverse_transform=True)
    sig_pca = KernelPCA(n_components=2,
                        kernel="sigmoid",
                        gamma=0.001,
                        coef0=1,
                        fit_inverse_transform=True)
    y = t > 6.9
    plt.figure(figsize=(11, 4))
    for subplot, pca, title in ((131, lin_pca, "Linear kernel"),
                                (132, rbf_pca, "RBF kernel, $\gamma=0.04$"),
                                (133, sig_pca,
                                 "Sigmoid kernel, $\gamma=10^{-3}, r=1$")):
        X_reduced = pca.fit_transform(X)
        if subplot == 132:
            X_reduced_rbf = X_reduced

        plt.subplot(subplot)
        # plt.plot(X_reduced[y, 0], X_reduced[y, 1], "gs")
        # plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], "y^")
        plt.title(title, fontsize=14)
        plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)
        plt.xlabel("$z_1$", fontsize=18)
        if subplot == 131:
            plt.ylabel("$z_2$", fontsize=18, rotation=0)
        plt.grid(True)

    save_fig("kernel_pca_plot")
    plt.show()

    # 逆过程压缩
    plt.figure(figsize=(6, 5))
    X_inverse = rbf_pca.inverse_transform(X_reduced_rbf)
    ax = plt.subplot(121, projection='3d')
    ax.view_init(10, -70)
    ax.scatter(X_inverse[:, 0],
               X_inverse[:, 1],
               X_inverse[:, 2],
               c=t,
               cmap=plt.cm.hot,
               marker="x")
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_zlabel("")
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_zticklabels([])
    save_fig("preimage_plot", tight_layout=False)
    plt.show()
예제 #16
0
def kpca_feature(rect_list):
    kpca = KernelPCA(n_components=10, kernel="rbf", fit_inverse_transform=True)
    kpca_feature = kpca.fit_transform(np.reshape(rect_list, (503, -1)) / 256)
    print(
        'Explained Variance Ratio: ',
        explained_variance_score(
            np.reshape(rect_list, (503, -1)) / 256,
            kpca.inverse_transform(kpca_feature)))
    # 0.6577
    return kpca_feature
예제 #17
0
def WithKernelPCA(MaxK, data, label):
    X = data
    y = label
    kpca = KernelPCA(n_components=6,
                     kernel='poly',
                     fit_inverse_transform=True,
                     gamma=10)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    accuracy = With_folds(MaxK, X_back, y)
    return accuracy
예제 #18
0
class KDEKPCAGen(GenBase):
    def __init__(self,
                 kernel="gaussian",
                 bandwidth=0.1,
                 n_components=None,
                 kernel_pca="cosine"):
        super().__init__()
        # kernel: “linear” | “poly” | “rbf” | “sigmoid” | “cosine” |
        self.pca = KernelPCA(n_components=n_components,
                             kernel=kernel_pca,
                             fit_inverse_transform=True)  # , gamma=10)
        self.bandwidth = bandwidth
        self.kernel = kernel
        self.manifold = None

    def fit(self, x):
        x_pca = self.pca.fit_transform(x)
        self.manifold = KDEGen(kernel=self.kernel,
                               bandwidth=self.bandwidth).fit(x_pca)
        return self

    def sample_radius(self,
                      x_exp,
                      n_min_kernels=20,
                      r=None,
                      n_samples=1,
                      random_state=None):
        x_exp_pca = self.pca.transform(x_exp)
        x_sample_pca = self.manifold.sample_radius(x_exp_pca,
                                                   n_min_kernels=n_min_kernels,
                                                   r=r,
                                                   n_samples=n_samples,
                                                   random_state=random_state)
        x_sample = self.pca.inverse_transform(x_sample_pca)
        return x_sample

    def sample(self, n_samples=1, random_state=None):
        x_sample_pca = self.manifold.sample(n_samples=n_samples,
                                            random_state=random_state)
        x_sample = self.pca.inverse_transform(x_sample_pca)
        return x_sample
예제 #19
0
파일: recog_exps.py 프로젝트: makokal/pycss
def reduce_kpca(X, kern, retall=False):
    """ reduce_kpca(X, components, kern, retall=False)
    Reduce dim by Kernel PCA
    """

    kpca = KernelPCA(kernel=kern, fit_inverse_transform=True)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)

    if not retall:
        return X_kpca, X_back
    else:
        return X_kpca, X_back, kpca
예제 #20
0
def reduce_kpca(X, kern, retall=False):
    """ reduce_kpca(X, components, kern, retall=False)
    Reduce dim by Kernel PCA
    """

    kpca = KernelPCA(kernel=kern, fit_inverse_transform=True)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)

    if not retall:
        return X_kpca, X_back
    else:
        return X_kpca, X_back, kpca
예제 #21
0
def fit_kernel_pca(sz_array, alpha, gamma, nc, return_fit=True):
    kernel_pca = KernelPCA(alpha=alpha,
                           gamma=gamma,
                           n_components=nc,
                           kernel='rbf',
                           fit_inverse_transform=True)
    transformed = kernel_pca.fit_transform(sz_array)
    inverse_transformed = kernel_pca.inverse_transform(transformed)
    losses = mse(sz_array.T, inverse_transformed.T, multioutput='raw_values').T
    if return_fit:
        return losses, alpha, gamma, transformed, inverse_transformed, kernel_pca
    else:
        return losses, alpha, gamma, transformed, inverse_transformed
예제 #22
0
def test_kernel_pca_inverse_transform_reconstruction():
    # Test if the reconstruction is a good approximation.
    # Note that in general it is not possible to get an arbitrarily good
    # reconstruction because of kernel centering that does not
    # preserve all the information of the original data.
    X, *_ = make_blobs(n_samples=100, n_features=4, random_state=0)

    kpca = KernelPCA(n_components=20,
                     kernel='rbf',
                     fit_inverse_transform=True,
                     alpha=1e-3)
    X_trans = kpca.fit_transform(X)
    X_reconst = kpca.inverse_transform(X_trans)
    assert np.linalg.norm(X - X_reconst) / np.linalg.norm(X) < 1e-1
예제 #23
0
def fit_kernel_pca(sz_array, alpha, gamma, nc):

    kernel_pca = KernelPCA(alpha=alpha,
                           gamma=gamma,
                           n_components=n,
                           kernel='rbf',
                           fit_inverse_transform=True)
    transformed = kernel_pca.fit_transform(sz_array)
    inverse_transformed = kernel_pca.inverse_transform(transformed)
    losses = []
    for i in range(transformed.shape[0]):
        losses.append(mse(sz_array[i, :], inverse_transformed[i, :]))

    return np.mean(losses), alpha, gamma, inverse_transformed
예제 #24
0
def plot_kpca_results(X, y):

    kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA()
    X_pca = pca.fit_transform(X)

    plt.figure(figsize=(10, 10))
    plt.subplot(2, 2, 1, aspect='equal')
    plt.title("Original space")
    reds = y == 0
    blues = y == 1

    plt.scatter(X[reds, 0], X[reds, 1], c="red", s=1)
    plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=1)
    plt.xlabel("$x_1$")
    plt.ylabel("$x_2$")

    X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50),
                         np.linspace(-1.5, 1.5, 50))
    X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
    # projection on the first principal component (in the phi space)
    Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
    plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower')

    plt.subplot(2, 2, 2, aspect='equal')
    plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=1)
    plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=1)
    plt.title("Projection by PCA")
    plt.xlabel("1st principal component")
    plt.ylabel("2nd component")

    plt.subplot(2, 2, 3, aspect='equal')
    plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=1)
    plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=1)
    plt.title("Projection by KPCA")
    plt.xlabel(r"1st principal component in space induced by $\phi$")
    plt.ylabel("2nd component")

    plt.subplot(2, 2, 4, aspect='equal')
    plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=1)
    plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=1)
    plt.title("Original space after inverse transform")
    plt.xlabel("$x_1$")
    plt.ylabel("$x_2$")

    plt.tight_layout()
    plt.show()
예제 #25
0
def main():

	#set the timer
	start = time.time()

	#load the data
	trainX = np.load('trainX.npy')
	testX = np.load('testX.npy')
	trainY = np.load('trainY.npy')
	testY = np.load('testY.npy')
	print('\n!!! Data Loading Completed !!!\n')

	#get the 1st digit zero and plot it
	zero = trainX[14].reshape(28, 28)
	plt.imshow(zero, cmap=cm.Greys_r)
	plt.savefig("original"+str(trainY[14])+".png")
	#plt.show()

	#apply kpca
	kpca = KernelPCA(kernel='rbf', gamma=1, fit_inverse_transform=True)
	kpca.fit(trainX[0:3000])
	trainX_kpca = kpca.transform(trainX)
	testX_kpca = kpca.transform(testX)

	#do inverse transform and plot the result
	orig = kpca.inverse_transform(trainX_kpca)
	img = orig[14].reshape(28, 28)
	plt.imshow(img, cmap=cm.Greys_r)
	plt.savefig("reconstructed"+str(trainY[14])+".png")
	#plt.show()

	selector = SelectPercentile(f_classif, percentile=5)
	selector.fit(trainX_kpca, trainY)
	trainX = selector.transform(trainX_kpca)
	testX = selector.transform(testX_kpca)

	#fit a classifier
	parameters = {'n_neighbors' : list(np.arange(15)+1)}
	clf = GridSearchCV(KNeighborsClassifier(weights='distance', n_jobs=-1), parameters)
	clf.fit(trainX, trainY)

	pred = clf.predict(testX)
	print accuracy_score(testY, pred)
	print confusion_matrix(testY, pred)
	#print(clf.best_params_)
	print('total : %d, correct : %d, incorrect : %d\n' %(len(pred), np.sum(pred == testY), np.sum(pred != testY)))

	print('Test Time : %f Minutes\n' %((time.time()-start)/60))
def kpca(data, n_components, train, test, kernel='linear', gamma=None, degree=3, coef0=1, alpha=0.1, evaluation=False):
    # Kernel PCA
    
    kpca = KernelPCA(n_components, fit_inverse_transform=True, kernel=kernel, gamma=gamma, degree=degree, 
                     coef0=coef0, alpha=alpha).fit(data[train])
    
    data_reduced = kpca.transform(data)
    
    if evaluation:
        data_rec = kpca.inverse_transform(data_reduced)
        loss = mean_squared_error(data[test], data_rec[test])
        return loss
    
    #name = 'Kernel PCA ('+kernel+')'
    name = 'Kernel PCA'

    return data_reduced, name, kpca.inverse_transform
def kernel_pca_fit(n_components, train, test, shape, kernel="linear"):
    # Available kernels:
    # "linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"
    # Set and fit KernelPCA
    kpca = KernelPCA(n_components=n_components,
                     kernel=kernel,
                     fit_inverse_transform=True)
    kpca.fit(train)
    # Reduce dimension
    test_reduced = kpca.transform(test)
    # Recover data from the lower dimension
    test_recovered = kpca.inverse_transform(test_reduced)
    # Calculate the MSE
    mse = np.mean((test_recovered - test)**2)
    # Reshape into a matrix
    test_recovered = test_recovered.reshape(shape)
    return kpca, test_recovered, mse
예제 #28
0
def main():

    mnisttrain = fetch_mldata('MNIST original')
    xtrain = mnisttrain.data
    ytrain = mnisttrain.target

    # Reduce to 16 dimensions in feature
    n_components = 16
    time_start = time.time()

    # Kernel PCA on MNIST Data
    kpca = KernelPCA(n_components=n_components,
                     fit_inverse_transform=True,
                     kernel='rbf',
                     eigen_solver='arpack',
                     n_jobs=-1)
    xtrain_kpca = kpca.fit_transform(xtrain)
    time_end = time.time()
    print("done in %0.3fs" % (time.time() - time_start))

    xtrain_inv_proj = kpca.inverse_transform(xtrain_kpca)
    n = 10
    plt.figure(figsize=(20, 4))
    for i in range(n):
        # display the original image
        index = random.randint(1, 60000)
        ax = plt.subplot(2, n, i + 1)
        img = xtrain[index]
        plt.imshow(np.reshape(img, (28, 28)))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display the decoded image
        ax = plt.subplot(2, n, i + 1 + n)
        img = xtrain_inv_proj[index]
        plt.imshow(np.reshape(img, (28, 28)))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    plt.savefig('./kpca_results/kpca_mnist.png')

    #classify on svm with reduced data
    svm_classify(xtrain_kpca, ytrain)
def kPCA_visualization2d(X, y):
   
    kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    class_1 = []
    class_0 = []
     
    for i in range(0, len(y)):
        
        if y[i] == 1:
            class_1.append( X_kpca[i] )
        else:
            class_0.append( X_kpca[i]  )
    
    class_0_x = []
    class_0_y = []
    class_1_x = []
    class_1_y = []
    for x in class_0:
        class_0_x.append( x[0] )
        class_0_y.append( x[1] )
        
    for x in class_1:
        class_1_x.append( x[0] )
        class_1_y.append( x[1] )
        

    # Plot
    #print principle component

    plt.title("kPCA kernel = linear")
    plt.plot( class_0_x, class_0_y, "ro")
    plt.plot( class_1_x, class_1_y, "go")
    plt.title("Projection by PCA")
    plt.xlabel("1st principal component")
    plt.ylabel("2nd component")
    

    
    plt.show()
예제 #30
0
def test_kernel_pca():
    """Nominal test for all solvers and all known kernels + a custom one

    It tests
     - that fit_transform is equivalent to fit+transform
     - that the shapes of transforms and inverse transforms are correct
    """
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert kwargs == {}  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack", "randomized"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4,
                             kernel=kernel,
                             eigen_solver=eigen_solver,
                             fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert X_fit_transformed.size != 0

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1]

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert X_pred2.shape == X_pred.shape
예제 #31
0
def nonLinearPCA():

    X, y = generateData(method='nonLinear')

    kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA()  # pca = PCA(2) project from 64 to 2 dimensions
    X_pca = pca.fit_transform(X)

    X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50),
                         np.linspace(-1.5, 1.5, 50))
    X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
    # projection on the first principal component (in the phi space)
    Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
    plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower')

    reds = y == 0
    blues = y == 1

    visualizePCA(X1, X2, Z_grid, X_pca, reds, blues, X_kpca, X_back)
예제 #32
0
def pca_kernel(df, kernel='rbf'):
    if 'class' in df:
        X = df.drop(['class'], axis=1)
    else:
        X = df
   
    kpca = KernelPCA(kernel=kernel, fit_inverse_transform=True, gamma=10)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    
    columns = []
    for i in range(1, len(X_back[0])+1):
        columns.append('pca-'+ kernel +str(i))
    X_back = pd.DataFrame(data = X_back, columns = columns)
    
    if 'class' in df:
        new = pd.concat([X_back, df['class']], axis=1)
    else:
        new = X_back
    
    return new
예제 #33
0
def hyper_parameter_tuning_kernel():
    from sklearn.datasets import make_swiss_roll
    from sklearn.decomposition import KernelPCA
    from sklearn.model_selection import GridSearchCV
    from sklearn.linear_model import LogisticRegression
    from sklearn.pipeline import Pipeline

    X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)
    y = t > 6.9

    n_component = 2

    clf = Pipeline([("kpca", KernelPCA(n_components=n_component)),
                    ("log_reg", LogisticRegression(solver="liblinear"))])

    param_grid = [{
        "kpca__kernel": ["rbf", "sigmoid"],
        "kpca__gamma": np.linspace(0.03, 0.05, 10)
    }]

    grid_cv = GridSearchCV(clf, param_grid, cv=3, n_jobs=-1)
    grid_cv.fit(X, y)

    best_param = grid_cv.best_params_
    best_kernel = best_param["kpca__kernel"]
    best_gamma = best_param["kpca__gamma"]

    best_pca = KernelPCA(n_components=n_component,
                         kernel=best_kernel,
                         gamma=best_gamma,
                         fit_inverse_transform=True)

    reduced_x = best_pca.fit_transform(X)
    preimage_x = best_pca.inverse_transform(reduced_x)

    from sklearn.metrics import mean_squared_error
    preimage_error = mean_squared_error(preimage_x, X)

    print("Best hyperparameter : {}".format(best_param))
    print("Preimage error of best model : {}".format(preimage_error))
def looking_for_param_by_grid_search():
    X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)
    y = t > 6.9

    clf = Pipeline([("kpca", KernelPCA(n_components=2)),
                    ("log_reg", LogisticRegression())])

    param_grid = [{
        "kpca__gamma": np.linspace(0.03, 0.05, 10),
        "kpca__kernel": ["rbf", "sigmoid"]
    }]

    grid_search = GridSearchCV(clf, param_grid, cv=3)
    grid_search.fit(X, y)
    print(grid_search.best_params_)
    rbf_pca = KernelPCA(n_components=2,
                        kernel="rbf",
                        gamma=0.0433,
                        fit_inverse_transform=True)
    X_reduced = rbf_pca.fit_transform(X)
    X_preimage = rbf_pca.inverse_transform(X_reduced)
    print(mean_squared_error(X, X_preimage))
예제 #35
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        """Histogram kernel implemented as a callable."""
        assert_equal(kwargs, {})  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4,
                             kernel=kernel,
                             eigen_solver=eigen_solver,
                             fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed, [])

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert_equal(X_pred2.shape, X_pred.shape)
예제 #36
0
def main():
    xtrain, ytrain, filenames, label_names = get_cifar()

    n_components = 36
    time_start = time.time()
    kpca = KernelPCA(n_components=n_components,
                     fit_inverse_transform=True,
                     kernel='rbf',
                     eigen_solver='arpack',
                     n_jobs=-1)
    xtrain_kpca = kpca.fit_transform(xtrain)
    time_end = time.time()
    print("done in %0.3fs" % (time.time() - time_start))
    xtrain_inv_proj = kpca.inverse_transform(xtrain_kpca)

    n = 10
    plt.figure(figsize=(20, 4))
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        img = xtrain[i, 0:1024]
        plt.imshow(np.reshape(img, (32, 32)))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        ax = plt.subplot(2, n, i + 1 + n)
        img = xtrain_inv_proj[i, 0:1024]
        plt.imshow(np.reshape(img, (32, 32)))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    plt.savefig('./kpca_results/kpca_cifar_10.png')
    svm_classify(xtrain_kpca, ytrain)
    message = client.messages.create(
        body="Hello Good News! Your KPCA CIFAR-10 is done!",
        from_="+19733213685",
        to="+19173707991")
    print(message.sid)
def kpca(data,
         n_components,
         train,
         test,
         c=None,
         sample_weight=None,
         kernel='linear',
         gamma=None,
         degree=3,
         coef0=1,
         alpha=0.1,
         evaluation=False,
         overwrite=True):
    # Kernel PCA

    kpca = KernelPCA(n_components,
                     fit_inverse_transform=True,
                     kernel=kernel,
                     gamma=gamma,
                     degree=degree,
                     coef0=coef0,
                     alpha=alpha).fit(data[train])

    data_reduced = np.zeros((data.shape[0], n_components))
    data_reduced[train + test] = kpca.transform(data[train + test])

    if evaluation:
        data_rec = kpca.inverse_transform(data_reduced[test])
        loss = mean_squared_error(data[test], data_rec)
        return loss

    name = 'KPCA'

    if overwrite:
        # Save the model
        save_model(kpca, name, c)

    return data_reduced, name, kpca.inverse_transform
예제 #38
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
예제 #39
0
#          Andreas Mueller
# License: BSD

import numpy as np
import pylab as pl

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

np.random.seed(0)

X, y = make_circles(n_samples=400, factor=.3, noise=.05)

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)
pca = PCA()
X_pca = pca.fit_transform(X)

# Plot results

pl.figure()
pl.subplot(2, 2, 1, aspect='equal')
pl.title("Original space")
reds = y == 0
blues = y == 1

pl.plot(X[reds, 0], X[reds, 1], "ro")
pl.plot(X[blues, 0], X[blues, 1], "bo")
pl.xlabel("$x_1$")
pl.ylabel("$x_2$")
예제 #40
0
ax.set_zticks(np.arange(small_df.prof.min().astype('int'),small_df.prof.max().astype('int')+1,10))

ax.set_xlabel('Lon')
ax.set_ylabel('Lat')
ax.set_zlabel('Prof')
plt.show()

# analysis
class_data_index = [0,1,2] # which column from the data frame?
small_data = small_df.ix[:,class_data_index].values

# Principal components analysis : which is the data dimensionality?

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
small_data_kpca = kpca.fit_transform(small_data)
small_data_kpca_back = kpca.inverse_transform(small_data_kpca)
small_data_kpca.shape,small_data_kpca_back.shape


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(small_data_kpca_back[:,0], small_data_kpca_back[:,1], small_data_kpca_back[:,2], c=small_df.mag.values, s=15*small_df.mag.values, marker='o',cmap=plt.cm.RdYlBu_r)
ax.set_zlim3d(small_df.prof.max(),small_df.prof.min())
ax.set_xticks(np.linspace(ax.get_xlim()[0],ax.get_xlim()[1],4))
ax.set_yticks(np.linspace(ax.get_ylim()[0],ax.get_ylim()[1],4))
ax.set_zticks(np.arange(small_df.prof.min().astype('int'),small_df.prof.max().astype('int')+1,10))

plt.title("KernelPCA reconstructed data")
plt.show()

pca = PCA(n_components = 'mle')
예제 #41
0
def RBF_kernel_PCA(xList, componentNum):
	kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
	X_kpca = kpca.fit_transform(xList)
	X_back = kpca.inverse_transform(X_kpca)

	return X_back
from sklearn.datasets import make_circles

# Set the seed for random number generator
np.random.seed(7)

# Generate samples
X, y = make_circles(n_samples=500, factor=0.2, noise=0.04)

# Perform PCA
pca = PCA()
X_pca = pca.fit_transform(X)

# Perform Kernel PCA
kernel_pca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kernel_pca = kernel_pca.fit_transform(X)
X_inverse = kernel_pca.inverse_transform(X_kernel_pca)

# Plot original data
class_0 = np.where(y == 0)
class_1 = np.where(y == 1)
plt.figure()
plt.title("Original data")
plt.plot(X[class_0, 0], X[class_0, 1], "ko", mfc='none')
plt.plot(X[class_1, 0], X[class_1, 1], "kx")
plt.xlabel("1st dimension")
plt.ylabel("2nd dimension")

# Plot PCA projection of the data
plt.figure()
plt.plot(X_pca[class_0, 0], X_pca[class_0, 1], "ko", mfc='none')
plt.plot(X_pca[class_1, 0], X_pca[class_1, 1], "kx")
 def pca(self):
     kpca = KernelPCA(kernel="linear", fit_inverse_transform = True)
     utility_normal_kpca = kpca.fit_transform(self.ds.utility_normal)
     self.utility_normal_back = kpca.inverse_transform(utility_normal_kpca)