def plot_knn_classification(n_neighbors=1): X, y = make_forge() x_test = np.arange(8, 12, 0.5) y_test = (x_test - 8) * 1.4 X_test = np.c_[x_test, y_test] clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y) test_points = discrete_scatter(X_test[:, 0], X_test[:, 1], clf.predict(X_test), markers="*") training_points = discrete_scatter(X[:, 0], X[:, 1], y) plt.legend( training_points + test_points, ["training class 0", "training class 1", "test pred 0", "test pred 1"]) dist = euclidean_distances(X, X_test) closest = np.argsort(dist, axis=0) for x, neighbors in zip(X_test, closest.T): for neighbor in neighbors[:n_neighbors]: plt.arrow(x[0], x[1], X[neighbor, 0] - x[0], X[neighbor, 1] - x[1], head_width=0, fc='k', ec='k')
def create_forge(): X, y = datasets.make_forge() mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.legend(['Class 0', 'Class 1'], loc=4) plt.xlabel('First feature') plt.ylabel('Second feature') print('X.shape: {}'.format(X.shape)) print('y.shape: {}'.format(y.shape)) plt.plot(X[:, 0], X[:, 1], 'x') plt.suptitle("图2-2:forge数据集的散点图")
from sklearn.model_selection import train_test_split from mglearn.datasets import make_forge from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt X, y = make_forge() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = KNeighborsClassifier(5).fit(X_train, y_train) preds = clf.predict(X_test) # Scores 0.86 print("Score {:.2f}".format(clf.score(X_test, y_test)))
def main(): #TEST from mglearn.datasets import make_forge from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split X, y = make_forge() X_train,X_test, y_train, y_test = train_test_split(X,y, random_state=0) n = 5 md = KnnClassifier(n_neighbors=n) md.fit(X_train,y_train) y_pred_my = md.predict(X_test) md = KNeighborsClassifier(n_neighbors=n) md.fit(X_train,y_train) y_pred_sk = md.predict(X_test) ab = y_pred_my == y_pred_sk print(ab.all()) ######################## import numpy as np X = np.random.randint(0,100, size=(50,3)) y = np.random.randint(0,3, size=len(X)) X_train,X_test, y_train, y_test = train_test_split(X,y, random_state=0) n = 7 md = KnnClassifier(n_neighbors=n) md.fit(X_train,y_train) y_pred_my = md.predict(X_test) md = KNeighborsClassifier(n_neighbors=n) md.fit(X_train,y_train) y_pred_sk = md.predict(X_test) ab = y_pred_my == y_pred_sk print(ab.all()) ############################################ from pylab import scatter, plot, show from sklearn.neighbors import KNeighborsRegressor from mglearn.datasets import make_wave X,y = make_wave() import numpy as np X_test = np.arange(-3,3,0.001).reshape(-1,1) n=4 md = KNeighborsRegressor(n_neighbors=n).fit(X,y) scatter(X.ravel(), y, marker='.', color='blue') y_pred = md.predict(X_test) plot(X_test.ravel(), y_pred, linewidth=0.4) md = KnnRegressor(n_neighbors=n).fit(X,y) y_pred = md.predict(X_test) plot(X_test.ravel(), y_pred, linewidth=0.6, linestyle='--', alpha=0.5, color='red') show() #------------------------------------ X,y = np.split(np.random.randint(-10,10, size=(10,5)), axis=1, indices_or_sections=[4]) y = y.ravel() n = 5 md = KnnRegressor(n_neighbors=n).fit(X,y) X_test = np.random.randint(-10,10,size=(25,4)) y_pred_my = md.predict(X_test) md = KNeighborsRegressor(n_neighbors=n).fit(X,y) y_pred_sk = md.predict(X_test) b = np.allclose(y_pred_my, y_pred_sk) print(b)
from sklearn.model_selection import train_test_split from mglearn import datasets from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt import mglearn X, y = datasets.make_forge() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = KNeighborsClassifier(n_neighbors=5) clf.fit(X_train, y_train) print(f"Test set predicitons: {clf.predict(X_test)}") print(f"Test set accuracy: {clf.score(X_test, y_test)}") fig, axes = plt.subplots(1, 3, figsize=(10, 3)) for n_neighbors, ax in zip([1, 3, 9], axes): clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y) mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=.4) mglearn.discrete_scatter(X[:, 0])
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019-08-17 18:10 # @Author : liupan # @Site : # @File : test_make_forge.py # @Software: PyCharm from mglearn.datasets import make_forge make_forge()
@author: Sven ''' import pandas as pd import numpy as np import matplotlib.pyplot as plt import mglearn import mglearn.datasets as mgd #from IPython.display import display if __name__ == '__main__': pass # generate dataset X, y = mgd.make_forge() print(X) print(y) print("X.shape: {}".format(X.shape)) # plot dataset mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.legend(["Class 0", "Class 1"], loc=4) plt.xlabel("First feature") plt.ylabel("Second feature") # KNN from sklearn.model_selection import train_test_split