def KernelPCA(self, kernel, n_comps=[], sparseversion=False):
        if sparseversion:
            if n_comps == []: n_comps = self.numdims
            self.kernelprincomp = decomp.KernelPCA(kernel,
                                                   remove_zero_eig=False)
            self.kernelprincomp.fit(self.datatrain.toarray())
            self.kernelprincompscores = [
                self.kernelprincomp.transform(self.datatrain.toarray()),
                self.kernelprincomp.transform(self.dataval.toarray()),
                self.kernelprincomp.transform(self.datatest.toarray())
            ]

        else:
            if n_comps == []: n_comps = self.numdims
            self.kernelprincomp = decomp.KernelPCA(kernel,
                                                   remove_zero_eig=False)
            self.kernelprincomp.fit(self.datatrain)
            self.kernelprincompscores = [
                self.kernelprincomp.transform(self.datatrain),
                self.kernelprincomp.transform(self.dataval),
                self.kernelprincomp.transform(self.datatest)
            ]
        self.kernelprincompscores[0] = np.nan_to_num(
            self.kernelprincompscores[0])
        self.kernelprincompscores[1] = np.nan_to_num(
            self.kernelprincompscores[1])
        self.kernelprincompscores[2] = np.nan_to_num(
            self.kernelprincompscores[2])
Beispiel #2
0
    def PCA(self, X, Y=None, ncomp=2, method='PCA'):
        """ decompose a multivariate dataset in an orthogonal
            set that explain a maximum amount of the variance

        @param X: Input dataset

        Keyword Arguments:
        ncomp  -- number or components to be kept (Default: 2)
        method -- method to be used
                  PCA(default)/Randomized/Sparse

        """
        from sklearn import decomposition
        from sklearn import cross_decomposition
        if method == 'Randomized':
            pca = decomposition.RandomizedPCA(n_components=ncomp)
        elif method == 'Sparse':
            pca = decomposition.SparsePCA(n_components=ncomp)
        elif method == 'rbf':
            pca = decomposition.KernelPCA(n_components=ncomp,
                                          fit_inverse_transform=True,
                                          gamma=10,
                                          kernel="rbf")
        elif method == 'linear':
            pca = decomposition.KernelPCA(n_components=ncomp, kernel="linear")
        elif method == 'sigmoid':
            pca = decomposition.KernelPCA(n_components=ncomp, kernel="sigmoid")
        elif method == 'SVD':
            pca = decomposition.TruncatedSVD(n_components=ncomp)
        else:
            pca = decomposition.PCA(n_components=ncomp)
            method = 'PCA'
        print('[ML] Using %s method' % method)
        pca.fit(X)
        return pca.transform(X)
Beispiel #3
0
 def PCdata(self, n_pcs = 75):
     dist, ids = self.distmatrix()
     pcafunc = skdecomp.KernelPCA(n_pcs, kernel='precomputed')
     kernel = np.exp(-dist**2 / dist.max()**2)
     pcs = pcafunc.fit_transform(kernel)
     pcsframe = pandas.DataFrame(pcs, index=ids, columns=["pc-" + str(i) for i in range(n_pcs)])
     return pcsframe
Beispiel #4
0
def main():
    # Create random data.
    # https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
    n = 200  # nb circles.
    x, y = make_circles(n, factor=.1, noise=.1)

    # Scale data to reduce weights.
    # https://openclassrooms.com/fr/courses/4444646-entrainez-un-modele-predictif-lineaire/4507801-reduisez-l-amplitude-des-poids-affectes-a-vos-variables
    std_scale = preprocessing.StandardScaler().fit(x)
    x_scaled = std_scale.transform(x)

    for i, g in enumerate([1, 10, 100]):
        # Perform kernel PCA on scaled data.
        kpca = decomposition.KernelPCA(n_components=1, kernel='rbf', gamma=g)
        kpca.fit(x_scaled)

        # Project data on principal components.
        x_kpca = kpca.transform(x_scaled)

        # Plot.
        axis = plt.subplot(3, 2, 1 + 2 * i)
        axis.scatter(x_scaled[:, 0], x_scaled[:, 1], c=x_kpca, s=50)
        axis.set_title('initial space, g %03d' % g)
        axis = plt.subplot(3, 2, 2 + 2 * i)
        axis.scatter(x_kpca, [0] * n, c=x_kpca, s=50)
        axis.set_title('redescription space, g %03d' % g)
    plt.subplots_adjust(left=0.1,
                        bottom=0.1,
                        right=0.9,
                        top=0.9,
                        wspace=0.3,
                        hspace=0.3)
    plt.suptitle('kPCA rbf')
    plt.show()
Beispiel #5
0
def test_KPCA(*args):
    x, y = args
    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    for k in kernels:
        kpca = decomposition.KernelPCA(n_components=None, kernel=k)
        kpca.fit(x, y)
        print('kernel=%s-->lambdas: %s' % (k, kpca.lambdas_))
def test_pca(weight, components_num):
    kpca = decomposition.KernelPCA(components_num,
                                   degree=3,
                                   kernel='rbf',
                                   gamma=4)
    principle_weight = kpca.fit_transform(weight)
    return (principle_weight)
Beispiel #7
0
def plot_KPCA_poly(*data):
    x, y = data
    fig = plt.figure()
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \
                  , (0, 0.6, 0.4), (0.5, 0.3, 0.2))
    Params = [(3, 1, 1), (3, 10, 1), (3, 1, 10), (3, 10, 10), (10, 1, 1),
              (10, 10, 1), (10, 1, 10), (10, 10, 10)]
    for i, (p, gamma, r) in enumerate(Params):
        kpca = decomposition.KernelPCA(n_components=2,
                                       kernel='poly',
                                       gamma=gamma,
                                       degree=p,
                                       coef0=r)
        kpca.fit(x)
        x_r = kpca.transform(x)
        ax = fig.add_subplot(2, 4, i + 1)
        for label, color in zip(np.unique(y), colors):
            position = y == label  # 这里的position类型为bool类型
            ax.scatter(x_r[position, 0],
                       x_r[position, 1],
                       label="target=%d" % label,
                       color=color)
        ax.set_xlabel("x[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("x[1]")
        ax.legend(loc="best")
        ax.set_title(r"$(%s (x \cdot z+1)+%s)^{%s}$" % (gamma, r, p))
    plt.suptitle("KPCA-Poly")
    plt.show()
Beispiel #8
0
def plot_KPCA_rbf(*data):
    x, y = data
    fig = plt.figure()
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \
                  , (0, 0.6, 0.4), (0.5, 0.3, 0.2))
    Gammas = [0.5, 1, 4, 10]
    for i, gamma in enumerate(Gammas):
        kpca = decomposition.KernelPCA(n_components=2,
                                       kernel='rbf',
                                       gamma=gamma)
        kpca.fit(x)
        x_r = kpca.transform(x)
        ax = fig.add_subplot(2, 2, i + 1)
        for label, color in zip(np.unique(y), colors):
            position = y == label  # 这里的position类型为bool类型
            ax.scatter(x_r[position, 0],
                       x_r[position, 1],
                       label="target=%d" % label,
                       color=color)
        ax.set_xlabel("x[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("x[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-%s||x-z||^2)$" % gamma)
    plt.suptitle("KPCA-rbf")
    plt.show()
Beispiel #9
0
def plot_KPCA_rbf(*data):
    '''
    绘制经过 使用 rbf 核的KernelPCA 降维到二维之后的样本点

    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色
    Gammas=[0.5,1,4,10]# rbf 核的参数组成的列表。每个参数就是 gamma值
    for i,gamma in enumerate(Gammas):
        kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma)
        kpca.fit(X)
        X_r=kpca.transform(X)# 原始数据集转换到二维
        ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示核函数为 rbf 的 KernelPCA 一组参数的效果图
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([]) # 隐藏 x 轴刻度
        ax.set_yticks([]) # 隐藏 y 轴刻度
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-%s||x-z||^2)$"%gamma)
    plt.suptitle("KPCA-rbf")
    plt.show()
Beispiel #10
0
def plot_KPCA_poly(*data):
    '''
    绘制经过 使用 poly 核的KernelPCA 降维到二维之后的样本点

    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色
    Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # poly 核的参数组成的列表。
            # 每个元素是个元组,代表一组参数(依次为:p 值, gamma 值, r 值)
            # p 取值为:3,10
            # gamma 取值为 :1,10
            # r 取值为:1,10
            # 排列组合一共 8 种组合
    for i,(p,gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='poly'
        ,gamma=gamma,degree=p,coef0=r)  # poly 核,目标为2维
        kpca.fit(X)
        X_r=kpca.transform(X)# 原始数据集转换到二维
        ax=fig.add_subplot(2,4,i+1)## 两行四列,每个单元显示核函数为 poly 的 KernelPCA 一组参数的效果图
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([]) # 隐藏 x 轴刻度
        ax.set_yticks([]) # 隐藏 y 轴刻度
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$ (%s (x \cdot z+1)+%s)^{%s}$"%(gamma,r,p))
    plt.suptitle("KPCA-Poly")
    plt.show()
Beispiel #11
0
def plot_KPCA(*data):
    '''
    绘制经过 KernelPCA 降维到二维之后的样本点

    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记
    :return: None
    '''
    X,y=data
    kernels=['linear','poly','rbf','sigmoid']
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色

    for i,kernel in enumerate(kernels):
        kpca=decomposition.KernelPCA(n_components=2,kernel=kernel)
        kpca.fit(X)
        X_r=kpca.transform(X)# 原始数据集转换到二维
        ax=fig.add_subplot(2,2,i+1) ## 两行两列,每个单元显示一种核函数的 KernelPCA 的效果图
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title("kernel=%s"%kernel)
    plt.suptitle("KPCA")
    plt.show()
Beispiel #12
0
def plot_KPCA_sigmoid(*data):
    '''
    绘制经过 使用 sigmoid 核的KernelPCA 降维到二维之后的样本点

    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色
    Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# sigmoid 核的参数组成的列表。
        # 每个元素就是一种参数组合(依次为 gamma,coef0)
        # gamma 取值为: 0.01,0.1,0.2
        # coef0 取值为: 0.1,0.2
        # 排列组合一共有 6 种组合
    for i,(gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r)
        kpca.fit(X)
        X_r=kpca.transform(X)# 原始数据集转换到二维
        ax=fig.add_subplot(3,2,i+1)## 三行两列,每个单元显示核函数为 sigmoid 的 KernelPCA 一组参数的效果图
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([]) # 隐藏 x 轴刻度
        ax.set_yticks([]) # 隐藏 y 轴刻度
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$"%(gamma,r))
    plt.suptitle("KPCA-sigmoid")
    plt.show()
Beispiel #13
0
def plot_KPCA_rbf(*data):
    '''
    graph with kernel of rbf
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Gammas=[0.5,1,4,10]
    for i,gamma in enumerate(Gammas):
        kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-{0}||x-z||^2)$".format(gamma))
    plt.suptitle("KPCA-rbf")
    plt.show()
Beispiel #14
0
def plot_KPCA_poly(*data):
    '''
    graph after KPCA with poly kernel
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # parameter of poly
            # p , gamma , r )
            # p :3,10
            # gamma  :1,10
            # r :1,10
            # 8 combination
    for i,(p,gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='poly'
        ,gamma=gamma,degree=p,coef0=r)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,4,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$ ({0} (x \cdot z+1)+{1})^{{2}}$".format(gamma,r,p))
    plt.suptitle("KPCA-Poly")
    plt.show()
Beispiel #15
0
def plot_KPCA(*data):
    '''
    graph after KPCA
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    kernels=['linear','poly','rbf','sigmoid']
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)

    for i,kernel in enumerate(kernels):
        kpca=decomposition.KernelPCA(n_components=2,kernel=kernel)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(2,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title("kernel={0}".format(kernel))
    plt.suptitle("KPCA")
    plt.show()
Beispiel #16
0
def compare_KPCA():
    data, target = generate_circle_data3()
    pca = decomposition.PCA(n_components=2)
    data1 = pca.fit_transform(data)
    try:
        figure1 = plt.figure(1)
        ax = ax3(figure1)
        ax.scatter3D(data[:, 0],
                     data[:, 1],
                     data[:, 2],
                     c=[color[i] for i in target],
                     alpha=0.5)
        plt.title('Origin Data')
    except:
        pass

    figure2 = plt.figure(2)
    k = 1
    for kernel in ['linear', 'rbf', 'poly', 'sigmoid']:  # 线性核即是原始的pca
        plt.subplot(1, 4, k)
        k += 1
        kpca = decomposition.KernelPCA(n_components=2, kernel=kernel)
        data_reduced = kpca.fit_transform(data)
        plt.scatter(data_reduced[:, 0],
                    data_reduced[:, 1],
                    c=[color[i] for i in target])
        plt.title(kernel)
    plt.suptitle('The Comparasion Between KPCA')
    plt.show()
Beispiel #17
0
def plot_KPCA_sigmoid(*data):
    """
    绘制经过使用sigmoid核的KernelPCA降维到二维之后的样本点
    """
    x,y = data
    fig = plt.figure()
    colors = ((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0.5,0,0.5),\
            (0,0.5,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),\
            (0.5,0.3,0.2),)
    """
    下面的Params列表中的每一个元素是sigmoid核参数的排列组合
    gamma的取值为:0.01,0.1,0.2
    coef0的取值为:0.1,0.2
    因此,排列组合数为6
    """
    Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),\
            (0.2,0.2)]
    for i,(gamma,r) in enumerate(Params):
        kpca = decomposition.KernelPCA(n_components=2,kernel="sigmoid",\
                gamma=gamma,coef0=r)
        kpca.fit(x)
        x_r = kpca.transform(x)
        ax = fig.add_subplot(3,2,i+1)
        for label,color in zip(np.unique(y),colors):
            position=y==label
            ax.scatter(x_r[position,0],x_r[position,1],\
                    label="target=%d" % label,color=color)
        ax.set_xlabel("x[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("x[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$"%(gamma,r))
    plt.suptitle("KPCA-sigmoid")
    plt.show()
Beispiel #18
0
def plot_KPCA_sigmoid(*data):
    x, y = data
    fig = plt.figure()
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \
                  , (0, 0.6, 0.4), (0.5, 0.3, 0.2))
    Params = [(0.01, 0.1), (0.01, 0.2), (0.1, 0.1), (0.1, 0.2), (0.2, 0.1),
              (0.2, 0.2)]
    for i, (gamma, r) in enumerate(Params):
        kpca = decomposition.KernelPCA(n_components=2,
                                       kernel='sigmoid',
                                       gamma=gamma,
                                       coef0=r)
        kpca.fit(x)
        x_r = kpca.transform(x)
        ax = fig.add_subplot(3, 2, i + 1)
        for label, color in zip(np.unique(y), colors):
            position = y == label  # 这里的position类型为bool类型
            ax.scatter(x_r[position, 0],
                       x_r[position, 1],
                       label="target=%d" % label,
                       color=color)
        ax.set_xlabel("x[0]")
        ax.set_xticks([])  # 修改轴的刻度
        ax.set_yticks([])
        ax.set_ylabel("x[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$" % (gamma, r))
    plt.suptitle("KPCA-sigmoid")
    plt.show()
Beispiel #19
0
def plot_KPCA_rbf(*data):
    """
    绘制经过使用rbf核的kernelPCA降维到二维之后的样本点
    """
    x,y = data
    fig = plt.figure()
    colors = ((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0.5,0,0.5),\
            (0,0.5,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),\
            (0.5,0.3,0.2),)
    Gammas=[0.5,1,4,10] #rbf核的参数组成的列表,每个参数就是gamma值
    for i,gamma in enumerate(Gammas):
        kpca = decomposition.KernelPCA(n_components=2,kernel="rbf",\
                gamma=gamma)
        kpca.fit(x)
        x_r = kpca.transform(x)  #将原始数据降到二维
        ax = fig.add_subplot(2,2,i+1)
        for label,color in zip(np.unique(y),colors):
            position=y==label
            ax.scatter(x_r[position,0],x_r[position,1],\
                    label="target=%d" % label,color=color)
        ax.set_xlabel("x[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("x[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-%s||x-z||^2)$"%gamma)
    plt.suptitle("KPCA-rbf")
    plt.show()
Beispiel #20
0
def feature_analysis(data=None,
                     feature=None,
                     pca_components=None,
                     graph=False,
                     start=None,
                     end=None):
    X = data[feature].values.reshape(-1, len(feature))
    X_train = data[feature].ix[start:end].values.reshape(-1, len(feature))
    pca = decomposition.KernelPCA(n_components=pca_components)
    pca.fit(X_train)
    pcaresult = pca.transform(X)
    # print(pca.components_)
    ica = decomposition.FastICA(n_components=pca_components)
    ica.fit(X_train)
    icaresult = ica.transform(X)
    pcaresult = (pcaresult.T.reshape(pca_components, -1))
    icaresult = (icaresult.T.reshape(pca_components, -1))
    for n in range(pca_components):
        data['%s-pcomponent' % str(n + 1)] = pcaresult[n]
        data['%s-icomponent' % str(n + 1)] = icaresult[n]
    # print(pca.explained_variance_ratio_.cumsum())
    if graph is True:
        for j in range(1, pca_components + 1):
            plt.clf()
            data['%i-pcomponent' % j].plot()
            plt.legend()
            plt.plot()
            plt.show()

    return data
Beispiel #21
0
    def dimension_redu(_data, _method):
        '''
        A subfunction to transform a multi-dimensional dataset from the high-diemsnional space to a one-dimensional space
        :param _data: a multi-dimensional dataset
        :param _method: one or multiple dimensionality-reduction techniques
        :return: a one-dimensional vector
        '''
        min_max_scaler = preprocessing.MinMaxScaler()
        # print(_data[:, :-2])

        z_data = min_max_scaler.fit_transform(_data)
        # print(z_data)
        from sklearn import decomposition
        # Choose one method
        if _method == 'PCA':
            dim_redu_method = decomposition.PCA(n_components=1)
        elif _method == 'FA':
            dim_redu_method = decomposition.FactorAnalysis(n_components=1, max_iter=5000)
        elif _method == 'KernelPCA':
            dim_redu_method = decomposition.KernelPCA(kernel='cosine', n_components=1)
        elif _method == 'TruncatedSVD':
            dim_redu_method = decomposition.TruncatedSVD(1)

        dimension_redu_vector = dim_redu_method.fit_transform(z_data)

        z_dimension_redu_vector = np.ndarray.tolist(min_max_scaler.fit_transform(dimension_redu_vector))
        return z_dimension_redu_vector
def preprocess(normalization=False, standarization=False, pca=False, whitening=False, kpca=False):
    global train_inputs, test_inputs, input_size

    if normalization:
        train_inputs = preprocessing.normalize(train_inputs)
        test_inputs = preprocessing.normalize(test_inputs)
    if standarization:
        train_inputs = preprocessing.StandardScaler().fit_transform(train_inputs)
        test_inputs = preprocessing.StandardScaler().fit_transform(test_inputs)
    if pca:
        train_inputs = decomposition.PCA(n_components='mle', whiten=whitening, svd_solver='full').fit_transform(train_inputs)
        test_inputs = decomposition.PCA(n_components='mle', whiten=whitening, svd_solver='full').fit_transform(test_inputs)
    if kpca:
        train_inputs = decomposition.KernelPCA(kernel='rbf', remove_zero_eig=True).fit_transform(train_inputs)
        test_inputs = decomposition.KernelPCA(kernel='rbf', remove_zero_eig=True).fit_transform(test_inputs)
    input_size = len( train_inputs[0] )
Beispiel #23
0
def plot_KPCA_sigmoid(*data):
    '''
    graph with sigmoid kernel
    :param data: train_data, train_value
    :return: None
    '''
    X,y=data
    fig=plt.figure()
    colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
        (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
    Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# parameter of sigmoid kernel
        # gamma,coef0
        # gamma : 0.01,0.1,0.2
        # coef0 : 0.1,0.2
        # 6 combination
    for i,(gamma,r) in enumerate(Params):
        kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r)
        kpca.fit(X)
        X_r=kpca.transform(X)
        ax=fig.add_subplot(3,2,i+1)
        for label ,color in zip( np.unique(y),colors):
            position=y==label
            ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label),
            color=color)
        ax.set_xlabel("X[0]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel("X[1]")
        ax.legend(loc="best")
        ax.set_title(r"$\tanh({0}(x\cdot z)+{1})$".format(gamma,r))
    plt.suptitle("KPCA-sigmoid")
    plt.show()
 def pca(self, X):
     pca = decomposition.KernelPCA(n_components=None, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0,
               fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False,
               random_state=None, copy_X=True, n_jobs=1)
     pca.fit(X)
     X = pca.transform(X)
     return X
Beispiel #25
0
def plot_kpca(*data):
    x, y = data
    fig = plt.figure()
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1))
    gammas = [0.5, 1, 4, 10]
    for i, gamma in enumerate(gammas):
        print("-" * 50)
        print(gamma)
        kpca = decomposition.KernelPCA(n_components=2,
                                       kernel="rbf",
                                       gamma=gamma)
        kpca.fit(x)
        x_r = kpca.transform(x)

        # 展示
        ax = fig.add_subplot(2, 2, i + 1)
        for label, color in zip(np.unique(y), colors):
            print(label, " ", color)
            position = y == label
            ax.scatter(x_r[position, 0],
                       x_r[position, 1],
                       label="target=%d" % label,
                       color=color)
        ax.set_xlabel("x[0]")
        ax.set_ylabel("x[1]")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.legend(loc="best")
        ax.set_title(r"$\exp(-%s||x-z||^2)$" % gamma)

    plt.suptitle("KPCA-rbf")
    plt.show()
Beispiel #26
0
def plot_KPCA(*data):
    X, Y = data
    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    fig = plt.figure()
    colors = (
        (1, 0, 0),
        (0, 1, 0),
        (0, 0, 1),
        (0.5, 0.5, 0),
        (0, 0.5, 0.5),
        (0.5, 0, 0.5),
        (0.4, 0.6, 0),
        (0.6, 0.4, 0),
        (0, 0.6, 0.4),
        (0.5, 0.3, 0.2),
    )
    for i, kernel in enumerate(kernels):
        kpca = decomposition.KernelPCA(n_components=2, kernel=kernel)
        kpca.fit(X)
        X_r = kpca.transform(X)
        ax = fig.add_subplot(2, 2, i + 1)
        for label, color in zip(np.unique(Y), colors):
            position = Y == label
            ax.scatter(X_r[position, 0],
                       X_r[position, 1],
                       label="target=%d" % label,
                       color=color)
            ax.set_xlabel("X[0]")
            ax.set_ylabel("X[1]")
            ax.legend(loc="best")
            ax.set_title("kernel=%s" % kernel)
    plt.suptitle("KPCA")
    plt.show()
Beispiel #27
0
def test_KPCA(*data):
    X, Y = data
    kernels = ['linear', 'poly', 'rbf', 'sigmoid']
    for kernel in kernels:
        kpca = decomposition.KernelPCA(n_components=None, kernel=kernel)
        kpca.fit(X)
        print("kernel=%s-->lambdas:%s" % (kernel, kpca.lambdas_))
Beispiel #28
0
def plot_kpca(*data):
    x, y = data
    fig = plt.figure()
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1))
    kernels = ["linear", "poly", "rbf", "sigmoid"]
    for i, kernel in enumerate(kernels):
        print("-" * 50)
        kpca = decomposition.KernelPCA(n_components=None, kernel=kernel)
        kpca.fit(x)
        x_r = kpca.transform(x)

        ax = fig.add_subplot(2, 2, i + 1)

        for label, color in zip(np.unique(y), colors):
            print(label, " ", color)
            position = y == label
            ax.scatter(x_r[position, 0],
                       x_r[position, 1],
                       label="target=%d" % label,
                       color=color)

        ax.set_xlabel("x[0]")
        ax.set_ylabel("y[0]")
        ax.legend(loc="best")
        ax.set_title("kernel=%s" % kernel)

    plt.suptitle("KPCA")
    plt.show()
Beispiel #29
0
    def _gen_gbdt_features(self, pool):
        final = []
        col_gbdts = ['gbdt_' + str(i) for i in range(self.n_tree)]
        for g in pool.groupby(self.kcols):
            (keys, df) = g

            train = df[df.time_window_start < pd.datetime(2016, 10, 18)]
            train_x = train.drop([*self.timecols, *self.kcols, self.vcol], axis=1)
            train_y = train[self.vcol]

            regor = GradientBoostingRegressor(
                loss='huber',
                n_estimators=self.n_tree, max_leaf_nodes=10)
            model = regor.fit(train_x, train_y)

            gbdt_features = []
            df_x = df.drop([*self.timecols, *self.kcols, self.vcol], axis=1)
            assert len(model.estimators_) == self.n_tree, 'n_tree is not match!'
            for tree in model.estimators_:
                gbdt_features.append(tree[0].apply(df_x))

            gbdt_features = np.array(gbdt_features).T
            df_gbdts = pd.DataFrame(gbdt_features, columns=col_gbdts)

            # ignore_index will drop column names, so we do not use it here
            df.reset_index(drop=True, inplace=True)
            df_gbdts.reset_index(drop=True, inplace=True)
            final.append(pd.concat([df, df_gbdts], axis=1))

        final = pd.concat(final)

        # one-hot code
        origs = final[list(set(final.columns) - set(col_gbdts))]
        gbdts = final[col_gbdts]

        for gbdtcol in col_gbdts:
            gbdts[gbdtcol] = gbdts[gbdtcol].astype('category')

        gbdts_dummies = pd.get_dummies(gbdts).reset_index(drop=True)

        n_feature = int(origs.shape[1] / 4)

        # remove duplicated gdbt features
        gbdts = gbdts_dummies.T.drop_duplicates().T
        #gbdts = preprocessing.RobustScaler().fit_transform(gbdts)
        #gbdts = decomposition.TruncatedSVD(
        #    n_components=int(origs.shape[1] / 4), n_iter=20
        gbdts = decomposition.KernelPCA(
            n_components=n_feature
        ).fit_transform(gbdts)
        gbdts = pd.DataFrame(
            gbdts, columns=['gbdt_{0}'.format(i) for i in range(n_feature)])

        print('GDBT Feature Shape:', gbdts.shape, origs.shape)

        origs.reset_index(drop=True, inplace=True)
        gbdts.reset_index(drop=True, inplace=True)

        return pd.concat([origs, gbdts], axis=1)
Beispiel #30
0
 def add_kernel_pca_reduction(self, n_components: int = None,
                              kernel: str = Union['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'],
                              gamma: float = None, degree: int = 3, coef0: float = 1, n_jobs: int = -1,
                              remove_zero_eig: bool = False, fit_inverse_transform: bool = False, **kwargs):
     # n_components can also be 'mle' or a number in [0,1]
     self._pca.append(decomposition.KernelPCA(n_components=n_components, kernel=kernel, gamma=gamma, degree=degree,
                                              coef0=coef0, fit_inverse_transform=fit_inverse_transform, n_jobs=-1,
                                              remove_zero_eig=remove_zero_eig, **kwargs))