예제 #1
0
#カーネルトリックを使った分類
from sklearn import datasets 
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt 

#データセットの生成
iris = datasets.load_iris()
X = iris.data[:,[2,3]]
y = iris.target

#Xを正規化しておく
X_std = (X - X.mean()) /X.std()

#ハイパーパラメータによって決定領域がどのように変わるかを見る
param = [0.10,1.0,10.0,100.0]
label = ['0.1','1.0','10.0','100.0']
for i in range(0,len(param)):
    #線形SVMのインスタンス生成
    svm = SVC(kernel='rbf',C=1.0,gamma=param[i],random_state=0)
    svm.fit(X_std,y)

    #境界面のプロット
    from mlxtend import plot_decision_regions
    plot_decision_regions(X_std,y,svm)
    plt.xlabel("petal length")
    plt.ylabel("petal width")
    plt.legend(loc="upper left")
    plt.title("Gausian Kernel gamma= " + label[i])
    plt.show()
#Wineデータセットの分析を行う
wine = datasets.load_wine()
feature_names = wine.feature_names

X = wine.data  #特徴量
y = wine.target  #正解ラベル
y = y + 1

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=0)

#インスタンスの生成
pca = PCA()
pca.fit(X_train, y_train)
pca.tot(X_train, y_train)
pca.plot(X_train, y_train)

X_train_pca = pca.fit_transform(X_train, y_train)

#続いて、次元削減したデータを線形分離する
from mlxtend import plot_decision_regions

lrGD = LogisticRegression()
lrGD.fit(X_train_pca, y_train)
plot_decision_regions(X_train_pca, y_train, classifier=lrGD)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend(loc="lower left")
plt.show()
예제 #3
0

#Wineデータセットの分析を行う
wine = datasets.load_wine()
feature_names = wine.feature_names

X = wine.data  #特徴量
y = wine.target  #正解ラベル
X = X
y = y + 1

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1)

#インスタンスの生成を行う
lda = LDA()
lda.fit(X_train, y_train)
lda.plot(X_train, y_train)

#線形分離を行う
X_train_lda = lda.fit_transform(X_train, y_train)
from mlxtend import plot_decision_regions
Lr = LogisticRegression(penalty='l2')
Lr.fit(X_train_lda, y_train)
plot_decision_regions(X_train_lda, y_train, classifier=Lr)
plt.xlabel('LD1')
plt.ylabel('LD2')
plt.legend(loc="lower right")
plt.show()
y_raw = iris.target
X = []
y = []

for i in range(0, len(y_raw)):
    if (y_raw[i] == 0) | (y_raw[i] == 1):
        X.append(X_raw[i])
        y.append(y_raw[i])

X = np.array(X)
y = np.array(y)

#データセットの分割
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)

#境界面のプロット
lrgd = LrGD(eta=0.05, n_iter=1000, random_state=1)
lrgd.fit(X_train, y_train)

import mlxtend
mlxtend.plot_decision_regions(X_train, y_train, lrgd)
plt.xlabel("Sepal Length (train data)")
plt.ylabel("Petal Length (test data)")
plt.title("Logistic Regression - Gradient Decent")
plt.legend(loc="upper left")
plt.show()
예제 #5
0
kpca = Kernel_PCA(n_components=2,gamma=15)
kpca.fit(X,y)
X_kpca = kpca.fit_transform(X,y)

fig,ax = plt.subplots(1,2,figsize=(7,3))

ax[0].scatter(X_kpca[y==0,0],X_kpca[y==0,1],c='red',marker='o',edgecolor='k',label='0')
ax[0].scatter(X_kpca[y==1,0],X_kpca[y==1,1],c='blue',marker='o',edgecolor='k',label='1')
ax[1].scatter(X_kpca[y==0,0],np.zeros((50,1))+0.02,c='red',marker='o',edgecolor='k',label='0')
ax[1].scatter(X_kpca[y==1,0],np.zeros((50,1))-0.02,c='blue',marker='o',edgecolor='k',label='1')
ax[0].set_xlabel('PC1')
ax[0].set_ylabel('PC2')
ax[1].set_xlabel('PC1')
ax[1].set_ylabel('PC2')
ax[1].set_ylim([-1,1])
plt.show()

#処理したデータが線形分離可能かを確かめてみる
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(penalty='l2')
lr.fit(X_kpca,y)

from mlxtend import plot_decision_regions

plot_decision_regions(X_kpca,y,classifier=lr)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("Linear seperation - Kernel PCA")
plt.legend(loc="upper right")
plt.show()