#カーネルトリックを使った分類 from sklearn import datasets from sklearn.svm import SVC import numpy as np import matplotlib.pyplot as plt #データセットの生成 iris = datasets.load_iris() X = iris.data[:,[2,3]] y = iris.target #Xを正規化しておく X_std = (X - X.mean()) /X.std() #ハイパーパラメータによって決定領域がどのように変わるかを見る param = [0.10,1.0,10.0,100.0] label = ['0.1','1.0','10.0','100.0'] for i in range(0,len(param)): #線形SVMのインスタンス生成 svm = SVC(kernel='rbf',C=1.0,gamma=param[i],random_state=0) svm.fit(X_std,y) #境界面のプロット from mlxtend import plot_decision_regions plot_decision_regions(X_std,y,svm) plt.xlabel("petal length") plt.ylabel("petal width") plt.legend(loc="upper left") plt.title("Gausian Kernel gamma= " + label[i]) plt.show()
#Wineデータセットの分析を行う wine = datasets.load_wine() feature_names = wine.feature_names X = wine.data #特徴量 y = wine.target #正解ラベル y = y + 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0) #インスタンスの生成 pca = PCA() pca.fit(X_train, y_train) pca.tot(X_train, y_train) pca.plot(X_train, y_train) X_train_pca = pca.fit_transform(X_train, y_train) #続いて、次元削減したデータを線形分離する from mlxtend import plot_decision_regions lrGD = LogisticRegression() lrGD.fit(X_train_pca, y_train) plot_decision_regions(X_train_pca, y_train, classifier=lrGD) plt.xlabel('PC1') plt.ylabel('PC2') plt.legend(loc="lower left") plt.show()
#Wineデータセットの分析を行う wine = datasets.load_wine() feature_names = wine.feature_names X = wine.data #特徴量 y = wine.target #正解ラベル X = X y = y + 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) #インスタンスの生成を行う lda = LDA() lda.fit(X_train, y_train) lda.plot(X_train, y_train) #線形分離を行う X_train_lda = lda.fit_transform(X_train, y_train) from mlxtend import plot_decision_regions Lr = LogisticRegression(penalty='l2') Lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=Lr) plt.xlabel('LD1') plt.ylabel('LD2') plt.legend(loc="lower right") plt.show()
y_raw = iris.target X = [] y = [] for i in range(0, len(y_raw)): if (y_raw[i] == 0) | (y_raw[i] == 1): X.append(X_raw[i]) y.append(y_raw[i]) X = np.array(X) y = np.array(y) #データセットの分割 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) #境界面のプロット lrgd = LrGD(eta=0.05, n_iter=1000, random_state=1) lrgd.fit(X_train, y_train) import mlxtend mlxtend.plot_decision_regions(X_train, y_train, lrgd) plt.xlabel("Sepal Length (train data)") plt.ylabel("Petal Length (test data)") plt.title("Logistic Regression - Gradient Decent") plt.legend(loc="upper left") plt.show()
kpca = Kernel_PCA(n_components=2,gamma=15) kpca.fit(X,y) X_kpca = kpca.fit_transform(X,y) fig,ax = plt.subplots(1,2,figsize=(7,3)) ax[0].scatter(X_kpca[y==0,0],X_kpca[y==0,1],c='red',marker='o',edgecolor='k',label='0') ax[0].scatter(X_kpca[y==1,0],X_kpca[y==1,1],c='blue',marker='o',edgecolor='k',label='1') ax[1].scatter(X_kpca[y==0,0],np.zeros((50,1))+0.02,c='red',marker='o',edgecolor='k',label='0') ax[1].scatter(X_kpca[y==1,0],np.zeros((50,1))-0.02,c='blue',marker='o',edgecolor='k',label='1') ax[0].set_xlabel('PC1') ax[0].set_ylabel('PC2') ax[1].set_xlabel('PC1') ax[1].set_ylabel('PC2') ax[1].set_ylim([-1,1]) plt.show() #処理したデータが線形分離可能かを確かめてみる from sklearn.linear_model import LogisticRegression lr = LogisticRegression(penalty='l2') lr.fit(X_kpca,y) from mlxtend import plot_decision_regions plot_decision_regions(X_kpca,y,classifier=lr) plt.xlabel("PC1") plt.ylabel("PC2") plt.title("Linear seperation - Kernel PCA") plt.legend(loc="upper right") plt.show()