def ejemplo_clasificacion(): from sklearn.datasets import make_classification from sklearn.neighbors import KNeighborsClassifier from adspy_shared_utilities import plot_two_class_knn X,y=make_classification(n_samples=300,n_features=2,n_redundant=0,n_informative=2,n_clusters_per_class=1,flip_y=0.1,class_sep=0.5,random_state=0) X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0) plot_two_class_knn(X_train,y_train,3,'uniform',X_test,y_test)
def binary_data(): plt.figure() plt.title( 'Sample binary classification problem with two informative features') X_C2, y_C2 = make_classification(n_samples=100, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, flip_y=0.1, class_sep=0.5, random_state=0) plt.scatter(X_C2[:, 0], X_C2[:, 1], c=y_C2, marker='o', s=50, cmap=cmap_bold) plt.show() X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0) plot_two_class_knn(X_train, y_train, 1, 'uniform', X_test, y_test) plot_two_class_knn(X_train, y_train, 3, 'uniform', X_test, y_test) plot_two_class_knn(X_train, y_train, 11, 'uniform', X_test, y_test) plt.show()
(X_cancer, y_cancer) = load_breast_cancer(return_X_y=True) # Communities and Crime dataset (X_crime, y_crime) = load_crime_dataset() # ## K-Nearest Neighbors # ### Classification # In[ ]: from adspy_shared_utilities import plot_two_class_knn X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0) plot_two_class_knn(X_train, y_train, 1, 'uniform', X_test, y_test) plot_two_class_knn(X_train, y_train, 3, 'uniform', X_test, y_test) plot_two_class_knn(X_train, y_train, 11, 'uniform', X_test, y_test) # ### Regression # In[ ]: from sklearn.neighbors import KNeighborsRegressor X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0) knnreg = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train) print(knnreg.predict(X_test)) print('R-squared test score: {:.3f}'.format(knnreg.score(X_test, y_test)))
# Breast cancer dataset for classification cancer = load_breast_cancer() X_cancer, y_cancer = load_breast_cancer(return_X_y=True) # Communities and Crime dataset X_crime, y_crime = load_crime_dataset() # Target valuje to predict: per capita violent crime rate. """ ====K-Nearest Neighbors==== """ # Classification from adspy_shared_utilities import plot_two_class_knn X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0) # Figures 5-7 plot_two_class_knn(X_train, y_train, 1, "uniform", X_test, y_test) # Overfitting for complex model b/c too much variance. plot_two_class_knn(X_train, y_train, 3, "uniform", X_test, y_test) # General trend more properly captured. Less accuracy in training set, plot_two_class_knn(X_train, y_train, 11, "uniform", X_test, y_test) # But more accuracy in test set. # Regression from sklearn.neighbors import KNeighborsRegressor X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0) knnreg = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train) print(knnreg.predict(X_test)) print("R-squared test score:{:.3f}\n".format(knnreg.score(X_test, y_test))) fig, subaxes = plt.subplots(1, 2, figsize=(8,4)) X_predict_input = np.linspace(-3, 3, 50).reshape(-1, 1) # Linspace uses # of samples instead of step size, then reshapes it to (inferred, 1 column) X_train, X_test, y_train, y_test = train_test_split(X_R1[0::5], y_R1[0::5], random_state=0) # Only take a sample amount of the data.