def s3_7(): knn = KNeighborsClassifier(n_neighbors=5, p=2, metric="minkowski") knn.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=knn, test_idx=range(105, 150)) plt.xlabel("petal length [standardized]") plt.ylabel("petal width [standardized]") plt.legend(loc="upper left") plt.show()
def s3_6_3(): rf = RandomForestClassifier(criterion="entropy", n_estimators=10, random_state=1, n_jobs=2) rf.fit(X_train, y_train) plot_decision_regions(X_combined, y_combined, classifier=rf, test_idx=range(105, 150)) plt.xlabel("petal length [cm]") plt.ylabel("petal width [cm]") plt.legend(loc="upper left") plt.show()
def s3_6_2(): tree = DecisionTreeClassifier(criterion="entropy", max_depth=3, random_state=0) tree.fit(X_train, y_train) plot_decision_regions(X_combined, y_combined, classifier=tree, test_idx=range(105, 150)) plt.xlabel("petal length [cm]") plt.ylabel("petal width [cm]") plt.legend(loc="upper left") plt.show() export_graphviz(tree, out_file="tree.dot", feature_names=["petal length", "petal width"])
def s5_2_4(): lda = LinearDiscriminantAnalysis(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel("LD 1") plt.ylabel("LD 2") plt.legend(loc="lower left") plt.show() X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel("LD 1") plt.ylabel("LD 2") plt.legend(loc="lower left") plt.show()
def s5_1_3(): pca = PCA(n_components=2) lr = LogisticRegression() X_train_pca = pca.fit_transform(X_train_std) X_test_pca = pca.transform(X_test_std) lr.fit(X_train_pca, y_train) plot_decision_regions(X_train_pca, y_train, classifier=lr) plt.xlabel("PC1") plt.ylabel("PC2") plt.legend(loc="lower left") plt.show() plot_decision_regions(X_test_pca, y_test, classifier=lr) plt.xlabel("PC1") plt.ylabel("PC2") plt.legend(loc="lower left") plt.show() pca = PCA(n_components=None) X_train_pca = pca.fit_transform(X_train_std) print(pca.explained_variance_ratio_)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(max_iter=40, eta0=0.1, random_state=0, shuffle=True) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print("Misclassified samples: {0:d}".format((y_test != y_pred).sum())) print("Accuracy: {0:.2f}".format(accuracy_score(y_test, y_pred))) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel("petal length [standardized]") plt.ylabel("petal width [standardized]") plt.legend(loc="upper left") plt.show()
import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from util import plot_decision_regions iris = datasets.load_iris() x = iris.data[:, [2,3]] y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(x_train_std, y_train) x_combined_std = np.vstack((x_train_std, x_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(x_combined_std, y_combined, knn) plt.show()
plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c="r", marker="s", label="-1") plt.xlim([-3, 3]) plt.ylim([-3, 3]) plt.legend(loc="best") plt.show() # ---- svm = SVC(kernel="rbf", random_state=0, gamma=0.10, C=10.0) svm.fit(X_xor, y_xor) plot_decision_regions(X_xor, y_xor, classifier=svm, test_idx=range(105, 150)) plt.legend(loc="upper left") plt.show() # ---- iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train)
y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values plt.figure(100) plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') plt.xlabel('petal length') plt.ylabel('sepal length') plt.legend(loc="upper left") ppn = SimplePerceptron(learning_rate=10, n_iterations=100) ppn.fit(X, y) plt.figure(200) plt.plot(range(1, len(ppn.errors) + 1), ppn.errors, marker='o') plt.xlabel('epochs') plt.ylabel('number of misclassifications') plt.figure(300) plot_decision_regions(X, y, classifier=ppn) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.legend(loc='upper left') plt.show()
from sklearn.svm import SVC from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from util import plot_decision_regions iris = datasets.load_iris() x = iris.data[:, [2, 3]] y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) svm = SVC(kernel='linear', C=10.0, random_state=0) svm.fit(x_train_std, y_train) x_combined_std = np.vstack((x_train_std, x_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(x_combined_std, y_combined, svm) plt.show()
print('Labels counts in y_test:', np.bincount(y_test)) # GaussianNB # https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html # gnb_model = GaussianNB() gnb_model.fit(X_train, y_train) # test prediction y_pred = gnb_model.predict(X_test) print('Misclassified samples: %d' % (y_test != y_pred).sum()) print('Accuracy: %.2f%%' % (100.0 * gnb_model.score(X_test, y_test))) # decision boundary X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) train_len = X_train.shape[0] combined_len = X_combined.shape[0] plt.figure(figsize=(3, 3), dpi=300) plot_decision_regions(X=X_combined, y=y_combined, classifier=gnb_model, test_idx=range(train_len, combined_len)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() #plt.savefig('images/03_01.png', dpi=300) plt.show()
import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import numpy as np from sklearn.svm import SVC from util import plot_decision_regions np.random.seed(0) x_xor = np.random.randn(200, 2) y_xor = np.logical_xor(x_xor[:, 0] > 0, x_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) svm = SVC(kernel='rbf', random_state=0, gamma=0.1, C=10.0) svm.fit(x_xor, y_xor) plot_decision_regions(x_xor, y_xor, classifier=svm) svm = SVC(kernel='rbf', random_state=0, gamma=100.0, C=10.0) svm.fit(x_xor, y_xor) plot_decision_regions(x_xor, y_xor, classifier=svm) plt.show()
from sklearn.preprocessing import StandardScaler from util import plot_decision_regions iris = datasets.load_iris() x = iris.data[:, [2, 3]] y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) forest = RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=1, n_jobs=2) forest.fit(x_train_std, y_train) x_combined_std = np.vstack((x_train_std, x_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(x_combined_std, y_combined, forest) plt.show()
import numpy as np import pandas as pd import matplotlib.pyplot as plt from .perceptron import AdalinePreceptron from util import plot_decision_regions df = pd.read_csv( 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalinePreceptron(n_iter=15) ada = ada.fit(X_std, y) plt.plot(range(1, len(ada.cost) + 1), ada.cost, marker='o') plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
import numpy as np from sklearn.tree import DecisionTreeClassifier from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from util import plot_decision_regions iris = datasets.load_iris() x = iris.data[:, [2,3]] y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) tree.fit(x_train_std, y_train) x_combined_std = np.vstack((x_train_std, x_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(x_combined_std, y_combined, tree) plt.show()