def KernelPCA(self, kernel, n_comps=[], sparseversion=False): if sparseversion: if n_comps == []: n_comps = self.numdims self.kernelprincomp = decomp.KernelPCA(kernel, remove_zero_eig=False) self.kernelprincomp.fit(self.datatrain.toarray()) self.kernelprincompscores = [ self.kernelprincomp.transform(self.datatrain.toarray()), self.kernelprincomp.transform(self.dataval.toarray()), self.kernelprincomp.transform(self.datatest.toarray()) ] else: if n_comps == []: n_comps = self.numdims self.kernelprincomp = decomp.KernelPCA(kernel, remove_zero_eig=False) self.kernelprincomp.fit(self.datatrain) self.kernelprincompscores = [ self.kernelprincomp.transform(self.datatrain), self.kernelprincomp.transform(self.dataval), self.kernelprincomp.transform(self.datatest) ] self.kernelprincompscores[0] = np.nan_to_num( self.kernelprincompscores[0]) self.kernelprincompscores[1] = np.nan_to_num( self.kernelprincompscores[1]) self.kernelprincompscores[2] = np.nan_to_num( self.kernelprincompscores[2])
def PCA(self, X, Y=None, ncomp=2, method='PCA'): """ decompose a multivariate dataset in an orthogonal set that explain a maximum amount of the variance @param X: Input dataset Keyword Arguments: ncomp -- number or components to be kept (Default: 2) method -- method to be used PCA(default)/Randomized/Sparse """ from sklearn import decomposition from sklearn import cross_decomposition if method == 'Randomized': pca = decomposition.RandomizedPCA(n_components=ncomp) elif method == 'Sparse': pca = decomposition.SparsePCA(n_components=ncomp) elif method == 'rbf': pca = decomposition.KernelPCA(n_components=ncomp, fit_inverse_transform=True, gamma=10, kernel="rbf") elif method == 'linear': pca = decomposition.KernelPCA(n_components=ncomp, kernel="linear") elif method == 'sigmoid': pca = decomposition.KernelPCA(n_components=ncomp, kernel="sigmoid") elif method == 'SVD': pca = decomposition.TruncatedSVD(n_components=ncomp) else: pca = decomposition.PCA(n_components=ncomp) method = 'PCA' print('[ML] Using %s method' % method) pca.fit(X) return pca.transform(X)
def PCdata(self, n_pcs = 75): dist, ids = self.distmatrix() pcafunc = skdecomp.KernelPCA(n_pcs, kernel='precomputed') kernel = np.exp(-dist**2 / dist.max()**2) pcs = pcafunc.fit_transform(kernel) pcsframe = pandas.DataFrame(pcs, index=ids, columns=["pc-" + str(i) for i in range(n_pcs)]) return pcsframe
def main(): # Create random data. # https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html n = 200 # nb circles. x, y = make_circles(n, factor=.1, noise=.1) # Scale data to reduce weights. # https://openclassrooms.com/fr/courses/4444646-entrainez-un-modele-predictif-lineaire/4507801-reduisez-l-amplitude-des-poids-affectes-a-vos-variables std_scale = preprocessing.StandardScaler().fit(x) x_scaled = std_scale.transform(x) for i, g in enumerate([1, 10, 100]): # Perform kernel PCA on scaled data. kpca = decomposition.KernelPCA(n_components=1, kernel='rbf', gamma=g) kpca.fit(x_scaled) # Project data on principal components. x_kpca = kpca.transform(x_scaled) # Plot. axis = plt.subplot(3, 2, 1 + 2 * i) axis.scatter(x_scaled[:, 0], x_scaled[:, 1], c=x_kpca, s=50) axis.set_title('initial space, g %03d' % g) axis = plt.subplot(3, 2, 2 + 2 * i) axis.scatter(x_kpca, [0] * n, c=x_kpca, s=50) axis.set_title('redescription space, g %03d' % g) plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.3, hspace=0.3) plt.suptitle('kPCA rbf') plt.show()
def test_KPCA(*args): x, y = args kernels = ['linear', 'poly', 'rbf', 'sigmoid'] for k in kernels: kpca = decomposition.KernelPCA(n_components=None, kernel=k) kpca.fit(x, y) print('kernel=%s-->lambdas: %s' % (k, kpca.lambdas_))
def test_pca(weight, components_num): kpca = decomposition.KernelPCA(components_num, degree=3, kernel='rbf', gamma=4) principle_weight = kpca.fit_transform(weight) return (principle_weight)
def plot_KPCA_poly(*data): x, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \ , (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Params = [(3, 1, 1), (3, 10, 1), (3, 1, 10), (3, 10, 10), (10, 1, 1), (10, 10, 1), (10, 1, 10), (10, 10, 10)] for i, (p, gamma, r) in enumerate(Params): kpca = decomposition.KernelPCA(n_components=2, kernel='poly', gamma=gamma, degree=p, coef0=r) kpca.fit(x) x_r = kpca.transform(x) ax = fig.add_subplot(2, 4, i + 1) for label, color in zip(np.unique(y), colors): position = y == label # 这里的position类型为bool类型 ax.scatter(x_r[position, 0], x_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("x[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("x[1]") ax.legend(loc="best") ax.set_title(r"$(%s (x \cdot z+1)+%s)^{%s}$" % (gamma, r, p)) plt.suptitle("KPCA-Poly") plt.show()
def plot_KPCA_rbf(*data): x, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \ , (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Gammas = [0.5, 1, 4, 10] for i, gamma in enumerate(Gammas): kpca = decomposition.KernelPCA(n_components=2, kernel='rbf', gamma=gamma) kpca.fit(x) x_r = kpca.transform(x) ax = fig.add_subplot(2, 2, i + 1) for label, color in zip(np.unique(y), colors): position = y == label # 这里的position类型为bool类型 ax.scatter(x_r[position, 0], x_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("x[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("x[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-%s||x-z||^2)$" % gamma) plt.suptitle("KPCA-rbf") plt.show()
def plot_KPCA_rbf(*data): ''' 绘制经过 使用 rbf 核的KernelPCA 降维到二维之后的样本点 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 Gammas=[0.5,1,4,10]# rbf 核的参数组成的列表。每个参数就是 gamma值 for i,gamma in enumerate(Gammas): kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma) kpca.fit(X) X_r=kpca.transform(X)# 原始数据集转换到二维 ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示核函数为 rbf 的 KernelPCA 一组参数的效果图 for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) # 隐藏 x 轴刻度 ax.set_yticks([]) # 隐藏 y 轴刻度 ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-%s||x-z||^2)$"%gamma) plt.suptitle("KPCA-rbf") plt.show()
def plot_KPCA_poly(*data): ''' 绘制经过 使用 poly 核的KernelPCA 降维到二维之后的样本点 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # poly 核的参数组成的列表。 # 每个元素是个元组,代表一组参数(依次为:p 值, gamma 值, r 值) # p 取值为:3,10 # gamma 取值为 :1,10 # r 取值为:1,10 # 排列组合一共 8 种组合 for i,(p,gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='poly' ,gamma=gamma,degree=p,coef0=r) # poly 核,目标为2维 kpca.fit(X) X_r=kpca.transform(X)# 原始数据集转换到二维 ax=fig.add_subplot(2,4,i+1)## 两行四列,每个单元显示核函数为 poly 的 KernelPCA 一组参数的效果图 for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) # 隐藏 x 轴刻度 ax.set_yticks([]) # 隐藏 y 轴刻度 ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$ (%s (x \cdot z+1)+%s)^{%s}$"%(gamma,r,p)) plt.suptitle("KPCA-Poly") plt.show()
def plot_KPCA(*data): ''' 绘制经过 KernelPCA 降维到二维之后的样本点 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 :return: None ''' X,y=data kernels=['linear','poly','rbf','sigmoid'] fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 for i,kernel in enumerate(kernels): kpca=decomposition.KernelPCA(n_components=2,kernel=kernel) kpca.fit(X) X_r=kpca.transform(X)# 原始数据集转换到二维 ax=fig.add_subplot(2,2,i+1) ## 两行两列,每个单元显示一种核函数的 KernelPCA 的效果图 for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("kernel=%s"%kernel) plt.suptitle("KPCA") plt.show()
def plot_KPCA_sigmoid(*data): ''' 绘制经过 使用 sigmoid 核的KernelPCA 降维到二维之后的样本点 :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# sigmoid 核的参数组成的列表。 # 每个元素就是一种参数组合(依次为 gamma,coef0) # gamma 取值为: 0.01,0.1,0.2 # coef0 取值为: 0.1,0.2 # 排列组合一共有 6 种组合 for i,(gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r) kpca.fit(X) X_r=kpca.transform(X)# 原始数据集转换到二维 ax=fig.add_subplot(3,2,i+1)## 三行两列,每个单元显示核函数为 sigmoid 的 KernelPCA 一组参数的效果图 for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) # 隐藏 x 轴刻度 ax.set_yticks([]) # 隐藏 y 轴刻度 ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$"%(gamma,r)) plt.suptitle("KPCA-sigmoid") plt.show()
def plot_KPCA_rbf(*data): ''' graph with kernel of rbf :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Gammas=[0.5,1,4,10] for i,gamma in enumerate(Gammas): kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label), color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-{0}||x-z||^2)$".format(gamma)) plt.suptitle("KPCA-rbf") plt.show()
def plot_KPCA_poly(*data): ''' graph after KPCA with poly kernel :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)] # parameter of poly # p , gamma , r ) # p :3,10 # gamma :1,10 # r :1,10 # 8 combination for i,(p,gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='poly' ,gamma=gamma,degree=p,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,4,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$ ({0} (x \cdot z+1)+{1})^{{2}}$".format(gamma,r,p)) plt.suptitle("KPCA-Poly") plt.show()
def plot_KPCA(*data): ''' graph after KPCA :param data: train_data, train_value :return: None ''' X,y=data kernels=['linear','poly','rbf','sigmoid'] fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) for i,kernel in enumerate(kernels): kpca=decomposition.KernelPCA(n_components=2,kernel=kernel) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("kernel={0}".format(kernel)) plt.suptitle("KPCA") plt.show()
def compare_KPCA(): data, target = generate_circle_data3() pca = decomposition.PCA(n_components=2) data1 = pca.fit_transform(data) try: figure1 = plt.figure(1) ax = ax3(figure1) ax.scatter3D(data[:, 0], data[:, 1], data[:, 2], c=[color[i] for i in target], alpha=0.5) plt.title('Origin Data') except: pass figure2 = plt.figure(2) k = 1 for kernel in ['linear', 'rbf', 'poly', 'sigmoid']: # 线性核即是原始的pca plt.subplot(1, 4, k) k += 1 kpca = decomposition.KernelPCA(n_components=2, kernel=kernel) data_reduced = kpca.fit_transform(data) plt.scatter(data_reduced[:, 0], data_reduced[:, 1], c=[color[i] for i in target]) plt.title(kernel) plt.suptitle('The Comparasion Between KPCA') plt.show()
def plot_KPCA_sigmoid(*data): """ 绘制经过使用sigmoid核的KernelPCA降维到二维之后的样本点 """ x,y = data fig = plt.figure() colors = ((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0.5,0,0.5),\ (0,0.5,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),\ (0.5,0.3,0.2),) """ 下面的Params列表中的每一个元素是sigmoid核参数的排列组合 gamma的取值为:0.01,0.1,0.2 coef0的取值为:0.1,0.2 因此,排列组合数为6 """ Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),\ (0.2,0.2)] for i,(gamma,r) in enumerate(Params): kpca = decomposition.KernelPCA(n_components=2,kernel="sigmoid",\ gamma=gamma,coef0=r) kpca.fit(x) x_r = kpca.transform(x) ax = fig.add_subplot(3,2,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(x_r[position,0],x_r[position,1],\ label="target=%d" % label,color=color) ax.set_xlabel("x[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("x[1]") ax.legend(loc="best") ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$"%(gamma,r)) plt.suptitle("KPCA-sigmoid") plt.show()
def plot_KPCA_sigmoid(*data): x, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \ , (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Params = [(0.01, 0.1), (0.01, 0.2), (0.1, 0.1), (0.1, 0.2), (0.2, 0.1), (0.2, 0.2)] for i, (gamma, r) in enumerate(Params): kpca = decomposition.KernelPCA(n_components=2, kernel='sigmoid', gamma=gamma, coef0=r) kpca.fit(x) x_r = kpca.transform(x) ax = fig.add_subplot(3, 2, i + 1) for label, color in zip(np.unique(y), colors): position = y == label # 这里的position类型为bool类型 ax.scatter(x_r[position, 0], x_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("x[0]") ax.set_xticks([]) # 修改轴的刻度 ax.set_yticks([]) ax.set_ylabel("x[1]") ax.legend(loc="best") ax.set_title(r"$\tanh(%s(x\cdot z)+%s)$" % (gamma, r)) plt.suptitle("KPCA-sigmoid") plt.show()
def plot_KPCA_rbf(*data): """ 绘制经过使用rbf核的kernelPCA降维到二维之后的样本点 """ x,y = data fig = plt.figure() colors = ((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0.5,0,0.5),\ (0,0.5,0.5),(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),\ (0.5,0.3,0.2),) Gammas=[0.5,1,4,10] #rbf核的参数组成的列表,每个参数就是gamma值 for i,gamma in enumerate(Gammas): kpca = decomposition.KernelPCA(n_components=2,kernel="rbf",\ gamma=gamma) kpca.fit(x) x_r = kpca.transform(x) #将原始数据降到二维 ax = fig.add_subplot(2,2,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(x_r[position,0],x_r[position,1],\ label="target=%d" % label,color=color) ax.set_xlabel("x[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("x[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-%s||x-z||^2)$"%gamma) plt.suptitle("KPCA-rbf") plt.show()
def feature_analysis(data=None, feature=None, pca_components=None, graph=False, start=None, end=None): X = data[feature].values.reshape(-1, len(feature)) X_train = data[feature].ix[start:end].values.reshape(-1, len(feature)) pca = decomposition.KernelPCA(n_components=pca_components) pca.fit(X_train) pcaresult = pca.transform(X) # print(pca.components_) ica = decomposition.FastICA(n_components=pca_components) ica.fit(X_train) icaresult = ica.transform(X) pcaresult = (pcaresult.T.reshape(pca_components, -1)) icaresult = (icaresult.T.reshape(pca_components, -1)) for n in range(pca_components): data['%s-pcomponent' % str(n + 1)] = pcaresult[n] data['%s-icomponent' % str(n + 1)] = icaresult[n] # print(pca.explained_variance_ratio_.cumsum()) if graph is True: for j in range(1, pca_components + 1): plt.clf() data['%i-pcomponent' % j].plot() plt.legend() plt.plot() plt.show() return data
def dimension_redu(_data, _method): ''' A subfunction to transform a multi-dimensional dataset from the high-diemsnional space to a one-dimensional space :param _data: a multi-dimensional dataset :param _method: one or multiple dimensionality-reduction techniques :return: a one-dimensional vector ''' min_max_scaler = preprocessing.MinMaxScaler() # print(_data[:, :-2]) z_data = min_max_scaler.fit_transform(_data) # print(z_data) from sklearn import decomposition # Choose one method if _method == 'PCA': dim_redu_method = decomposition.PCA(n_components=1) elif _method == 'FA': dim_redu_method = decomposition.FactorAnalysis(n_components=1, max_iter=5000) elif _method == 'KernelPCA': dim_redu_method = decomposition.KernelPCA(kernel='cosine', n_components=1) elif _method == 'TruncatedSVD': dim_redu_method = decomposition.TruncatedSVD(1) dimension_redu_vector = dim_redu_method.fit_transform(z_data) z_dimension_redu_vector = np.ndarray.tolist(min_max_scaler.fit_transform(dimension_redu_vector)) return z_dimension_redu_vector
def preprocess(normalization=False, standarization=False, pca=False, whitening=False, kpca=False): global train_inputs, test_inputs, input_size if normalization: train_inputs = preprocessing.normalize(train_inputs) test_inputs = preprocessing.normalize(test_inputs) if standarization: train_inputs = preprocessing.StandardScaler().fit_transform(train_inputs) test_inputs = preprocessing.StandardScaler().fit_transform(test_inputs) if pca: train_inputs = decomposition.PCA(n_components='mle', whiten=whitening, svd_solver='full').fit_transform(train_inputs) test_inputs = decomposition.PCA(n_components='mle', whiten=whitening, svd_solver='full').fit_transform(test_inputs) if kpca: train_inputs = decomposition.KernelPCA(kernel='rbf', remove_zero_eig=True).fit_transform(train_inputs) test_inputs = decomposition.KernelPCA(kernel='rbf', remove_zero_eig=True).fit_transform(test_inputs) input_size = len( train_inputs[0] )
def plot_KPCA_sigmoid(*data): ''' graph with sigmoid kernel :param data: train_data, train_value :return: None ''' X,y=data fig=plt.figure() colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]# parameter of sigmoid kernel # gamma,coef0 # gamma : 0.01,0.1,0.2 # coef0 : 0.1,0.2 # 6 combination for i,(gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(3,2,i+1) for label ,color in zip( np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= {0}".format(label), color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\tanh({0}(x\cdot z)+{1})$".format(gamma,r)) plt.suptitle("KPCA-sigmoid") plt.show()
def pca(self, X): pca = decomposition.KernelPCA(n_components=None, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=1) pca.fit(X) X = pca.transform(X) return X
def plot_kpca(*data): x, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1)) gammas = [0.5, 1, 4, 10] for i, gamma in enumerate(gammas): print("-" * 50) print(gamma) kpca = decomposition.KernelPCA(n_components=2, kernel="rbf", gamma=gamma) kpca.fit(x) x_r = kpca.transform(x) # 展示 ax = fig.add_subplot(2, 2, i + 1) for label, color in zip(np.unique(y), colors): print(label, " ", color) position = y == label ax.scatter(x_r[position, 0], x_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("x[0]") ax.set_ylabel("x[1]") ax.set_xticks([]) ax.set_yticks([]) ax.legend(loc="best") ax.set_title(r"$\exp(-%s||x-z||^2)$" % gamma) plt.suptitle("KPCA-rbf") plt.show()
def plot_KPCA(*data): X, Y = data kernels = ['linear', 'poly', 'rbf', 'sigmoid'] fig = plt.figure() colors = ( (1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2), ) for i, kernel in enumerate(kernels): kpca = decomposition.KernelPCA(n_components=2, kernel=kernel) kpca.fit(X) X_r = kpca.transform(X) ax = fig.add_subplot(2, 2, i + 1) for label, color in zip(np.unique(Y), colors): position = Y == label ax.scatter(X_r[position, 0], X_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("kernel=%s" % kernel) plt.suptitle("KPCA") plt.show()
def test_KPCA(*data): X, Y = data kernels = ['linear', 'poly', 'rbf', 'sigmoid'] for kernel in kernels: kpca = decomposition.KernelPCA(n_components=None, kernel=kernel) kpca.fit(X) print("kernel=%s-->lambdas:%s" % (kernel, kpca.lambdas_))
def plot_kpca(*data): x, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1)) kernels = ["linear", "poly", "rbf", "sigmoid"] for i, kernel in enumerate(kernels): print("-" * 50) kpca = decomposition.KernelPCA(n_components=None, kernel=kernel) kpca.fit(x) x_r = kpca.transform(x) ax = fig.add_subplot(2, 2, i + 1) for label, color in zip(np.unique(y), colors): print(label, " ", color) position = y == label ax.scatter(x_r[position, 0], x_r[position, 1], label="target=%d" % label, color=color) ax.set_xlabel("x[0]") ax.set_ylabel("y[0]") ax.legend(loc="best") ax.set_title("kernel=%s" % kernel) plt.suptitle("KPCA") plt.show()
def _gen_gbdt_features(self, pool): final = [] col_gbdts = ['gbdt_' + str(i) for i in range(self.n_tree)] for g in pool.groupby(self.kcols): (keys, df) = g train = df[df.time_window_start < pd.datetime(2016, 10, 18)] train_x = train.drop([*self.timecols, *self.kcols, self.vcol], axis=1) train_y = train[self.vcol] regor = GradientBoostingRegressor( loss='huber', n_estimators=self.n_tree, max_leaf_nodes=10) model = regor.fit(train_x, train_y) gbdt_features = [] df_x = df.drop([*self.timecols, *self.kcols, self.vcol], axis=1) assert len(model.estimators_) == self.n_tree, 'n_tree is not match!' for tree in model.estimators_: gbdt_features.append(tree[0].apply(df_x)) gbdt_features = np.array(gbdt_features).T df_gbdts = pd.DataFrame(gbdt_features, columns=col_gbdts) # ignore_index will drop column names, so we do not use it here df.reset_index(drop=True, inplace=True) df_gbdts.reset_index(drop=True, inplace=True) final.append(pd.concat([df, df_gbdts], axis=1)) final = pd.concat(final) # one-hot code origs = final[list(set(final.columns) - set(col_gbdts))] gbdts = final[col_gbdts] for gbdtcol in col_gbdts: gbdts[gbdtcol] = gbdts[gbdtcol].astype('category') gbdts_dummies = pd.get_dummies(gbdts).reset_index(drop=True) n_feature = int(origs.shape[1] / 4) # remove duplicated gdbt features gbdts = gbdts_dummies.T.drop_duplicates().T #gbdts = preprocessing.RobustScaler().fit_transform(gbdts) #gbdts = decomposition.TruncatedSVD( # n_components=int(origs.shape[1] / 4), n_iter=20 gbdts = decomposition.KernelPCA( n_components=n_feature ).fit_transform(gbdts) gbdts = pd.DataFrame( gbdts, columns=['gbdt_{0}'.format(i) for i in range(n_feature)]) print('GDBT Feature Shape:', gbdts.shape, origs.shape) origs.reset_index(drop=True, inplace=True) gbdts.reset_index(drop=True, inplace=True) return pd.concat([origs, gbdts], axis=1)
def add_kernel_pca_reduction(self, n_components: int = None, kernel: str = Union['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'], gamma: float = None, degree: int = 3, coef0: float = 1, n_jobs: int = -1, remove_zero_eig: bool = False, fit_inverse_transform: bool = False, **kwargs): # n_components can also be 'mle' or a number in [0,1] self._pca.append(decomposition.KernelPCA(n_components=n_components, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, fit_inverse_transform=fit_inverse_transform, n_jobs=-1, remove_zero_eig=remove_zero_eig, **kwargs))