def test(): import numpy as np x = np.array([[-1,1], [-2, 1], [-3, -2], [1, 1],[2, 1],[3, 2]]) y = np.array([1, 1, 1, 2, 2, 2]) from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(x,y) print(clf.predict([-0.8, -1])) from prep_terrain_data import makeTerrainData a = makeTerrainData() print len(a[3])
def classify(features_train, labels_train): ### import the sklearn module for GaussianNB ### create classifier ### fit the classifier on the training features and labels ### return the fit classifier ### your code goes here! from sklearn.naive_bayes import GaussianNB from prep_terrain_data import makeTerrainData a = makeTerrainData() clf = GaussianNB() clf.fit(a[0],a[1]) return clf
#!/usr/bin/python import matplotlib.pyplot as plt from prep_terrain_data import makeTerrainData from class_vis import prettyPicture features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast") plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis()] X, y ,a,b=makeTerrainData() datasets =[ [X, y ] ] figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for ds in datasets: # preprocess dataset, split into training and test part X, y=ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
#!/usr/bin/python from __future__ import print_function import matplotlib.pyplot as plt from time import time from prep_terrain_data import makeTerrainData from class_vis import prettyPicture from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" ### points mixed together--separate them so we can give them different colors ### in the scatterplot and identify them visually grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast") plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################
def __init__(self, N_points): self.features_train, self.labels_train, self.features_test, self.labels_test = makeTerrainData(N_points)
import numpy as np from prep_terrain_data import makeTerrainData from matplotlib import pyplot as plt from sklearn import svm import pandas from class_vis import prettyPicture x_train, y_train, x_test, y_test = makeTerrainData() df = pandas.DataFrame(data=x_test, columns=['f_1', 'f_2']) df['label'] = pandas.Series(y_test) dat_pos = df[df.label == 1] dat_neg = df[df.label == 0] plt.scatter(dat_pos['f_1'], dat_pos['f_2'], c='blue') plt.scatter(dat_neg['f_1'], dat_neg['f_2'], c='red') #plt.show() clf = svm.SVC(kernel='rbf', C=0.01) clf.fit(x_train, y_train) prettyPicture(clf, x_test, y_test) #w = clf.coef_[0] #a = -w[0]/w[1] #m = clf.intercept_[0] / w[1] #x_ = np.linspace(0.0,1.0) #y_ = a*x_ - m #plt.plot(x_,y_)
import numpy as np from prep_terrain_data import makeTerrainData from matplotlib import pyplot as plt from sklearn import svm import pandas from class_vis import prettyPicture x_train,y_train,x_test,y_test = makeTerrainData() df = pandas.DataFrame(data=x_test,columns=['f_1','f_2']) df['label'] = pandas.Series(y_test) dat_pos = df[df.label==1] dat_neg = df[df.label==0] plt.scatter(dat_pos['f_1'],dat_pos['f_2'],c='blue') plt.scatter(dat_neg['f_1'],dat_neg['f_2'],c='red') #plt.show() clf = svm.SVC(kernel='rbf',C=0.01) clf.fit(x_train,y_train) prettyPicture(clf,x_test,y_test) #w = clf.coef_[0] #a = -w[0]/w[1] #m = clf.intercept_[0] / w[1] #x_ = np.linspace(0.0,1.0) #y_ = a*x_ - m
def classify(features_train, labels_train): ### import the sklearn module for GaussianNB ### create classifier ### fit the classifier on the training features and labels ### return the fit classifier ### your code goes here! import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())