def plot_knn_classification(n_neighbors=1):
    X, y = make_forge()
    x_test = np.arange(8, 12, 0.5)
    y_test = (x_test - 8) * 1.4
    X_test = np.c_[x_test, y_test]

    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y)
    test_points = discrete_scatter(X_test[:, 0],
                                   X_test[:, 1],
                                   clf.predict(X_test),
                                   markers="*")
    training_points = discrete_scatter(X[:, 0], X[:, 1], y)
    plt.legend(
        training_points + test_points,
        ["training class 0", "training class 1", "test pred 0", "test pred 1"])

    dist = euclidean_distances(X, X_test)
    closest = np.argsort(dist, axis=0)

    for x, neighbors in zip(X_test, closest.T):
        for neighbor in neighbors[:n_neighbors]:
            plt.arrow(x[0],
                      x[1],
                      X[neighbor, 0] - x[0],
                      X[neighbor, 1] - x[1],
                      head_width=0,
                      fc='k',
                      ec='k')
def create_forge():
    X, y = datasets.make_forge()
    mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
    plt.legend(['Class 0', 'Class 1'], loc=4)
    plt.xlabel('First feature')
    plt.ylabel('Second feature')
    print('X.shape: {}'.format(X.shape))
    print('y.shape: {}'.format(y.shape))
    plt.plot(X[:, 0], X[:, 1], 'x')
    plt.suptitle("图2-2:forge数据集的散点图")
Exemple #3
0
from sklearn.model_selection import train_test_split
from mglearn.datasets import make_forge
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

X, y = make_forge()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = KNeighborsClassifier(5).fit(X_train, y_train)
preds = clf.predict(X_test)

# Scores 0.86
print("Score {:.2f}".format(clf.score(X_test, y_test)))
Exemple #4
0
def main():     #TEST
    from mglearn.datasets import make_forge
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import train_test_split

    X, y = make_forge()
    X_train,X_test, y_train, y_test = train_test_split(X,y, random_state=0)

    n = 5
    md = KnnClassifier(n_neighbors=n)
    md.fit(X_train,y_train)
    y_pred_my = md.predict(X_test)

    md = KNeighborsClassifier(n_neighbors=n)
    md.fit(X_train,y_train)
    y_pred_sk = md.predict(X_test)

    ab = y_pred_my == y_pred_sk
    print(ab.all())

    ########################
    import numpy as np
    X = np.random.randint(0,100, size=(50,3))
    y = np.random.randint(0,3, size=len(X))

    X_train,X_test, y_train, y_test = train_test_split(X,y, random_state=0)

    n = 7
    md = KnnClassifier(n_neighbors=n)
    md.fit(X_train,y_train)
    y_pred_my = md.predict(X_test)

    md = KNeighborsClassifier(n_neighbors=n)
    md.fit(X_train,y_train)
    y_pred_sk = md.predict(X_test)

    ab = y_pred_my == y_pred_sk
    print(ab.all())

    ############################################

    from pylab import scatter, plot, show
    from sklearn.neighbors import KNeighborsRegressor
    from mglearn.datasets import make_wave
    X,y = make_wave()
    import numpy as np
    X_test = np.arange(-3,3,0.001).reshape(-1,1)

    n=4
    md = KNeighborsRegressor(n_neighbors=n).fit(X,y)
    scatter(X.ravel(), y, marker='.', color='blue')

    y_pred = md.predict(X_test)
    plot(X_test.ravel(), y_pred, linewidth=0.4)

    md = KnnRegressor(n_neighbors=n).fit(X,y)
    y_pred = md.predict(X_test)
    plot(X_test.ravel(), y_pred, linewidth=0.6, linestyle='--', alpha=0.5, color='red')

    show()

    #------------------------------------

    X,y = np.split(np.random.randint(-10,10, size=(10,5)), axis=1, indices_or_sections=[4])
    y = y.ravel()

    n = 5
    md = KnnRegressor(n_neighbors=n).fit(X,y)
    X_test = np.random.randint(-10,10,size=(25,4))
    y_pred_my = md.predict(X_test)

    md = KNeighborsRegressor(n_neighbors=n).fit(X,y)
    y_pred_sk = md.predict(X_test)

    b = np.allclose(y_pred_my, y_pred_sk)
    print(b)
Exemple #5
0
from sklearn.model_selection import train_test_split
from mglearn import datasets
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import mglearn

X, y = datasets.make_forge()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf = KNeighborsClassifier(n_neighbors=5)

clf.fit(X_train, y_train)
print(f"Test set predicitons: {clf.predict(X_test)}")
print(f"Test set accuracy: {clf.score(X_test, y_test)}")

fig, axes = plt.subplots(1, 3, figsize=(10, 3))

for n_neighbors, ax in zip([1, 3, 9], axes):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y)
    mglearn.plots.plot_2d_separator(clf,
                                    X,
                                    fill=True,
                                    eps=0.5,
                                    ax=ax,
                                    alpha=.4)
    mglearn.discrete_scatter(X[:, 0])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019-08-17 18:10
# @Author  : liupan
# @Site    :
# @File    : test_make_forge.py
# @Software: PyCharm

from mglearn.datasets import make_forge

make_forge()
@author: Sven
'''

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mglearn
import mglearn.datasets as mgd
#from IPython.display import display

if __name__ == '__main__':
    pass

# generate dataset
X, y = mgd.make_forge()
print(X)
print(y)

print("X.shape: {}".format(X.shape))

# plot dataset
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)

plt.legend(["Class 0", "Class 1"], loc=4)
plt.xlabel("First feature")
plt.ylabel("Second feature")

# KNN

from sklearn.model_selection import train_test_split