def ranforest(n_estimators, min_samples_split): from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score clf = RandomForestClassifier(n_estimators = n_estimators, min_samples_split = min_samples_split, bootstrap = True) clf.fit(features_train, labels_train) t_fit = time() clf.fit(features_train, labels_train) print "training time:", round(time()-t_fit, 3), "s" t_pred = time() pred = clf.predict(features_test) print "predict time:", round(time()-t_pred, 3), "s" print accuracy_score(pred, labels_test) try: prettyPicture(clf, features_test, labels_test) except NameError: pass
def supportvector(C, gamma = 'default'): from sklearn.svm import SVC from sklearn.metrics import accuracy_score if gamma == 'default': clf = SVC(kernel="rbf", C = C) else: clf = SVC(kernel="rbf", C = C, gamma = gamma) clf.fit(features_train, labels_train) t_fit = time() clf.fit(features_train, labels_train) print "training time:", round(time()-t_fit, 3), "s" t_pred = time() pred = clf.predict(features_test) print "predict time:", round(time()-t_pred, 3), "s" print accuracy_score(pred, labels_test) try: prettyPicture(clf, features_test, labels_test) except NameError: pass
def process(method,name,param_grid,**argument): t0=time() clf=grid_search.GridSearchCV(method(**argument),param_grid) clf.fit(features_train,labels_train) pred=clf.predict(features_test) print(clf.best_estimator_) print("accuracy",metrics.accuracy_score(labels_test,pred)) print("done in %0.3fs" % (time()-t0)) prettyPicture(clf, features_test, labels_test,name)
def runClassifier(clf, clfName): from time import time t0 = time() clf.fit(features_train, labels_train) print clfName, "training time:", round(time()-t0, 3), "s" ### measure the accuracy accuracy = clf.score(features_test, labels_test) print "accuracy:",accuracy*100,"%" ### visualization code (prettyPicture) to show you the decision boundary try: prettyPicture(clf, features_test, labels_test,clfName) except NameError: print "no pic"
def k_nearest_neughbours(): from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from time import time cls = KNeighborsClassifier(n_neighbors=8) t0 = time() cls.fit(features_train, labels_train) print "training time:", round(time()-t0, 3), "s" t0 = time() pred = cls.predict(features_test) print "prediction time:", round(time()-t0, 3), "s" print "accuracy: ", accuracy_score(labels_test, pred) try: prettyPicture(cls, features_test, labels_test) except NameError: pass
def KNearestNeigh(k): from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors = k) start_time = time() clf.fit(features_train, labels_train) elapsed = time()-start_time text = "Training time (kNearestNeigh:{0}): {1}s".format(k, round(elapsed, 3)) writeToFile("ChooseYourOwn_output.txt", text, "a") acc = clf.score(features_test, labels_test) text = "Accuracy (kNearestNeigh:{0}): {1}".format(k, round(acc, 3)) writeToFile("ChooseYourOwn_output.txt", text, "a") try: prettyPicture(clf, features_test, labels_test) except NameError: pass
def RandomForest(k): from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators = k) start_time = time() clf.fit(features_train, labels_train) elapsed = time()-start_time text = "Training time (RandomForest:{0}): {1}s".format(k, round(elapsed, 3)) writeToFile("ChooseYourOwn_output.txt", text, "a") acc = clf.score(features_test, labels_test) text = "Accuracy (RandomForest:{0}): {1}".format(k, round(acc, 3)) writeToFile("ChooseYourOwn_output.txt", text, "a") try: prettyPicture(clf, features_test, labels_test) except NameError: pass
def adaboost(n_estimators, learning_rate): from sklearn.ensemble import AdaBoostClassifier from sklearn.metrics import accuracy_score clf = AdaBoostClassifier(n_estimators = n_estimators, learning_rate = learning_rate) clf.fit(features_train, labels_train) t_fit = time() clf.fit(features_train, labels_train) print "training time:", round(time()-t_fit, 3), "s" t_pred = time() pred = clf.predict(features_test) print "predict time:", round(time()-t_pred, 3), "s" print accuracy_score(pred, labels_test) try: prettyPicture(clf, features_test, labels_test) except NameError: pass
def testClassifier(clf): print "number of features in train=", len(features_train[0]) t0 = time() clf.fit(features_train, labels_train) tt = time() prettyPicture(clf, features_test, labels_test) #output_image("test.png", "png", open("test.png", "rb").read()) display(Image("test.png")) print "training time:", round(tt-t0, 3), "s" t1 = time() pred = clf.predict(features_test) print "predict time:", round(time()-t1, 3), "s" #print "answer10=", pred[10] #print "answer26=", pred[26] #print "answer50=", pred[50] import numpy as np #print "# of 1's(sum)=", np.sum(pred) print "# of 1's(count_nonzero)=", np.count_nonzero(pred) import collections print "# of 1's(Counter)=", collections.Counter(pred) # of 1's(Counter)= Counter({0: 881, 1: 877}) print "len(pred)=", len(pred), " len(labes_test)=", len(labels_test) from sklearn.metrics import accuracy_score print "accuracy_score:", (accuracy_score(labels_test, pred)) return
def drawDecisionBoundary(clf): print 'Classifier: ', clf.__class__.__name__ t0 = time() clf.fit(features_train, labels_train) print 'time training: ', time()-t0 t1 = time() pred = clf.predict(features_test) print 'time predicting: ', time()-t1 accuracy = accuracy_score(labels_test, pred) print 'accuracy: ', accuracy try: plot = prettyPicture(clf, features_test, labels_test) plot.show() except NameError: print 'something wront' pass
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] def classify(clf, features_train, labels_train, **kwargs): clf = clf(**kwargs) clf.fit(features_train, labels_train) return clf def classifyAdaboost(features_train, labels_train, n_estimators=100): return classify(AdaBoostClassifier, features_train, labels_train, n_estimators=n_estimators) def classifyKNN(features_train, labels_train, n_neighbors=8): return classify(KNeighborsClassifier, features_train, labels_train, n_neighbors=n_neighbors) def classifyRF(features_train, labels_train, n_estimators=100): return classify(RandomForestClassifier, features_train, labels_train, n_estimators=n_estimators) if __name__ == "__main__": clf_dict = {"knn": classifyKNN, "adaboost": classifyAdaboost, "randomforest": classifyRF} for name, clf in clf_dict.iteritems(): print(name, ":") clf_fitted = clf(features_train, labels_train) pred = clf_fitted.predict(features_test) print("Accuracy:", accuracy_score(labels_test, pred)) prettyPicture(clf_fitted, features_test, labels_test)
x_train, y_train, x_test, y_test = makeTerrainData() df = pandas.DataFrame(data=x_test, columns=['f_1', 'f_2']) df['label'] = pandas.Series(y_test) dat_pos = df[df.label == 1] dat_neg = df[df.label == 0] plt.scatter(dat_pos['f_1'], dat_pos['f_2'], c='blue') plt.scatter(dat_neg['f_1'], dat_neg['f_2'], c='red') #plt.show() clf = svm.SVC(kernel='rbf', C=0.01) clf.fit(x_train, y_train) prettyPicture(clf, x_test, y_test) #w = clf.coef_[0] #a = -w[0]/w[1] #m = clf.intercept_[0] / w[1] #x_ = np.linspace(0.0,1.0) #y_ = a*x_ - m #plt.plot(x_,y_) plt.show() #feature_1 = [x[0] for x in x_train] #feature_2 = [x[1] for x in x_train] #pos = [i for i,x in enumerate(y_train) if x == 1] #neg = [i for i,x in enumerate(y_train) if x != 1]
title = "Learning Curves (Naive Bayes)" # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) estimator = GaussianNB() plot_learning_curve(estimator, title, nfeatures, nlabels, ylim=(0.7, 1.01), cv=cv, n_jobs=4) try: prettyPicture(clf, features_test, labels_test, feature_1, feature_2, "naive.png") except NameError: pass # decision tree from sklearn import tree clf = tree.DecisionTreeClassifier() clf = clf.fit(features_train, labels_train) pred = clf.predict(features_test) print "decision tree: ", clf.score(features_test, labels_test) print "decision tree: precision score: ", metrics.precision_score( labels_test, pred) print "decision tree: recall score: ", metrics.recall_score(labels_test, pred) title = "Learning Curves (Decision tree)" # Cross validation with 100 iterations to get smoother mean test and train
#### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color="b", label="fast") plt.scatter(grade_slow, bumpy_slow, color="r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################ ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score as acc clf = RandomForestClassifier(n_jobs=-1, criterion="gini", n_estimators=100, min_samples_leaf=5, max_features=1) clf.fit(features_train, labels_train) pred = clf.predict(features_test) print "Accuracy", acc(pred, labels_test) try: prettyPicture(clf, features_train, labels_train) except NameError: pass
plt.xlabel("bumpiness") plt.ylabel("grade") #plt.show() ################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score clf = KNeighborsClassifier(n_neighbors=100) print "Start training" clf.fit(features_train, labels_train) print "End training" print "Beging prediction" pred = clf.predict(features_test) print "End prediction" acc = accuracy_score(labels_test, pred) print acc #print "{0} {1} {2}".format(len(clf.predict(features_test)),len(features_test), len(labels_test)) try: my_return = prettyPicture(clf, features_test, labels_test) print my_return except NameError: print "Oops!"
# k-nearest neighbor from sklearn.neighbors import KNeighborsClassifier neigh = KNeighborsClassifier(n_neighbors=1) neigh.fit(features_train, labels_train) print "KNN Accuracy:", neigh.score(features_test, labels_test) # Random Forest from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier(n_estimators=200) rfc.fit(features_train, labels_train) print "Random Forest Accuracy:", rfc.score(features_test, labels_test) # AdaBoost from sklearn.ensemble import AdaBoostClassifier abc = AdaBoostClassifier() abc.fit(features_train, labels_train) print "AdaBoost Accuracy:", abc.score(features_test, labels_test) prettyPicture(neigh, features_test, labels_test, "neigh.png") prettyPicture(rfc, features_test, labels_test, "rfc.png") prettyPicture(abc, features_test, labels_test, "abc.png") # for clf in [neigh, rfc, abc]: # try: # print "plotting" # prettyPicture(clf, features_test, labels_test) # except NameError: # print "passed" # pass
def show_plot(clf, features_test, labels_test): try: prettyPicture(clf, features_test, labels_test) except NameError: pass
### visualization code (prettyPicture) to show you the decision boundary for algo in ["adaboost", "random_forest", "KNN"]: clf = 0 if algo == "adaboost": from sklearn.ensemble import AdaBoostClassifier clf = AdaBoostClassifier().fit(features_train, labels_train) if algo == "random_forest": from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier().fit(features_train, labels_train) if algo == "KNN": from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=8).fit(features_train, labels_train) try: prettyPicture(clf, features_test, labels_test, name=algo) except NameError: pass from sklearn.metrics import accuracy_score print("%s accuracy: %f" % (algo, accuracy_score(labels_test, clf.predict(features_test)))) # for n_estimators in [50,100,150,200,250,300,350,400,460,500]: # clf = 0 # from sklearn.ensemble import AdaBoostClassifier # clf = AdaBoostClassifier(n_estimators=n_estimators).fit(features_train, labels_train) # print("%d adaBoost accuracy: %f"%(n_estimators,accuracy_score(labels_test,clf.predict(features_test)))) maxAccuracy = 0 for k in range(2, 200): clf = 0
'n_estimators': param_estimators, 'accuracy': acc_ada } #import pandas as pd #df = pd.DataFrame(many_ada_versions()) #df_pivot = df.pivot(index='n_estimators', columns='learning_rate', values='accuracy') #df_pivot.plot() #print(df_pivot.max()) #the best score: ADA : n=20, rate=2 ada = getAdaBoost(features_train, labels_train, n_estimators=20, learning_rate=2) y_pred_ada = ada.predict(features_test) acc_ada = accuracy_score(labels_test, y_pred_ada) acc = { # "acc_kmeans": round(acc_kmeans,3), # "acc_forest": round(acc_forest,3), "acc_ada": round(acc_ada, 3), } print(acc) try: prettyPicture(ada, features_test, labels_test) plt.show() except NameError: pass
################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary n_neighbors = 15 print "Loading %iNN library" % n_neighbors from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors) print "Training algorithm" clf.fit(features_train, labels_train) print "Predicting results" pred = clf.predict(features_test) print "Computing algorithm accuracy" from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print "Accuracy: %.4f" % acc # Accuracy 93.6% for 3NN # Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better? # Accuracy 92.0% for 5NN # Accuracy 93.6% for 7NN outputfile = "test_%iNN.png" % n_neighbors print "Saving output plot as %s" % outputfile prettyPicture(clf, features_test, labels_test, outputfile) #output_image(outputfile, "png", open("test.png"", "rb").read()) output_image(outputfile, "png", open(outputfile, "rb").read())
learning_rate=1.0, algorithm='SAMME.R', random_state=None) clfAdaBoost.fit(features_train, labels_train) predAdaBoost = clfAdaBoost.predict(features_test) from sklearn.svm import SVC clfSVM = SVC(C=1000.0, kernel='rbf') clfSVM.fit(features_train, labels_train) predSVM_rbf = clfSVM.predict(features_test) clfSVM = SVC(C=1000.0, kernel='poly', degree=1) clfSVM.fit(features_train, labels_train) predSVM_polyFirst = clfSVM.predict(features_test) try: prettyPicture(clfKNN, features_test, labels_test) prettyPicture(clfRandomForest, features_test, labels_test) prettyPicture(clfAdaBoost, features_test, labels_test) except NameError: pass from sklearn.metrics import accuracy_score print "KNN-Accuracy: ", accuracy_score(predKnn, labels_test) print "Random Forest-Accuracy: ", accuracy_score(predRandomForest, labels_test) print "AdaBoost-Accuracy: ", accuracy_score(predAdaBoost, labels_test) print "SVM-RBF Kernel: ", accuracy_score(predSVM_rbf, labels_test) print "SVM-Poly Kernel: ", accuracy_score(predSVM_polyFirst, labels_test)
for sample in min_samples: clf = tree.DecisionTreeClassifier(min_samples_split=sample) clf = clf.fit(features_train, labels_train) # clf = classify(features_train, labels_train) pred = clf.predict(features_test) accuracy = accuracy_score(pred, labels_test) acc_samples[f'acc_min_samples_split_{sample}'] = accuracy print(f'Accuracy for min_samples_split = {sample}: {accuracy}') prettyPicture(clf, features_test, labels_test, pic_name=f'test_{sample}') output_image(f"test_{sample}.png", "png", open(f"test_{sample}.png", "rb").read()) print('\n') def submit_accuracies(): return acc_samples if __name__ == "__main__": pp(submit_accuracies())
### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=10), n_estimators=10) clf = clf.fit(features_train, labels_train) pred = clf.predict(features_test) print "AdaBoost+tree_accuracy" print accuracy_score(labels_test, pred) clf = AdaBoostClassifier(base_estimator=SVC(random_state=1), algorithm="SAMME", n_estimators=1) clf = clf.fit(features_train, labels_train) pred = clf.predict(features_test) print "svc_accuracy" print accuracy_score(labels_test, pred) clf_randomforest = RandomForestClassifier(n_estimators=100) clf_randomforest.fit(features_train, labels_train) prettyPicture(clf_randomforest, features_test, labels_test) #score.append(["randomforest", clf_randomforest.score(features_test, labels_test)]) pred = clf_randomforest.predict(features_test) print "randomforest_accuracy" print accuracy_score(labels_test, pred) try: prettyPicture(clf, features_test, labels_test) except NameError: pass
features_train, labels_train, features_test, labels_test = makeTerrainData() ########################## SVM ################################# # we handle the import statement and SVC creation for you here from sklearn.svm import SVC clf = SVC(kernel="linear") # now your job is to fit the classifier # using the training features/labels, and to # make a set of predictions on the test data clf.fit(features_train, labels_train) # store your predictions in a list named pred pred = clf.predict(features_test) prettyPicture(clf, features_test, labels_test, f_name="svm_lin.png") Image.open('svm_lin.png').show() acc = accuracy_score(pred, labels_test) print "SVM accuracy: %r" % acc clf = SVC(kernel="rbf") clf.fit(features_train, labels_train) pred = clf.predict(features_test) prettyPicture(clf, features_test, labels_test, f_name="svm_rbf.png") def submitAccuracy(): return acc
features_train, labels_train, features_test, labels_test = makeTerrainData() # the classify() function in classifyDT is where the magic happens clf = classify(features_train, labels_train) # predict pred = clf.predict(features_test) # compute accuracy acc = clf.score(features_test, labels_test) print "accuracy for min_sample_split=2:", acc # build and save the scatter plot to the file prettyPicture(clf, features_test, labels_test, "test_min_sample_split2.png") output_image("test_min_sample_split2.png", "png", open("test_min_sample_split2.png", "rb").read()) # get classifier with higher min_sample_split clf = classify(features_train, labels_train, 50) # predict pred = clf.predict(features_test) # compute accuracy acc = clf.score(features_test, labels_test) print "accuracy for min_sample_split=50:", acc # build and save the scatter plot to the file prettyPicture(clf, features_test, labels_test)
print "features_train: {}".format(len(features_train)) print "features_test: {}".format(len(features_test)) print "labels_train: {}".format(len(labels_train)) print "labels_test: {}".format(len(labels_test)) from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score try: clf = AdaBoostClassifier(n_estimators=100) clf = clf.fit(features_train, labels_train) accuracy = accuracy_score(labels_test, clf.predict(features_test)) print "AdaBoostClassifier accuracy: {}".format(accuracy) prettyPicture(clf, features_test, labels_test, "adaboost_test.png") except NameError: pass try: clf = RandomForestClassifier() clf = clf.fit(features_train, labels_train) accuracy = accuracy_score(labels_test, clf.predict(features_test)) print "RandomForestClassifier accuracy: {}".format(accuracy) prettyPicture(clf, features_test, labels_test, "randomforest_test.png") except NameError: pass try: clf = GradientBoostingClassifier() clf = clf.fit(features_train, labels_train)
#!/usr/bin/python """ lecture and example code for decision tree unit """ import sys sys.path.insert(1, '../naive_bayes') import class_vis from prep_terrain_data import makeTerrainData # import matplotlib.pyplot as plt # import numpy as np # import pylab as pl from classifyDT import classify features_train, labels_train, features_test, labels_test = makeTerrainData() ### the classify() function in classifyDT is where the magic ### happens--fill in this function in the file 'classifyDT.py'! clf = classify(features_train, labels_train, features_test, labels_test) #### grader code, do not modify below this line class_vis.prettyPicture(clf, features_test, labels_test, "test.png") #output_image("test.png", "png", open("test.png", "rb").read()) # importing Image class from PIL package from PIL import Image # creating a object im = Image.open("test.png") im.show()
#!/usr/bin/python """ lecture and example code for decision tree unit """ import sys sys.path.append('../Lesson2_Naive_Bayes') from class_vis import prettyPicture, output_image from prep_terrain_data import makeTerrainData import matplotlib.pyplot as plt import numpy as np import pylab as pl from classifyDT import classify features_train, labels_train, features_test, labels_test = makeTerrainData() ### the classify() function in classifyDT is where the magic ### happens--fill in this function in the file 'classifyDT.py'! from sklearn import tree from sklearn.metrics import accuracy_score clf = tree.DecisionTreeClassifier(min_samples_split=40, criterion='gini') clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc = accuracy_score(pred, labels_test) ''' #### grader code, do not modify below this line prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read()) '''
from class_vis import prettyPicture, show_img from clear import clear features_train, labels_train, features_test, labels_test = makeTerrainData() ######################################################### ### your code goes here ### from sklearn.svm import SVC from sklearn import metrics clear() # C makes more training points correct # Gamma makes decision boundaries much closer or much far classifier = SVC(kernel="linear", C=1) classifier.fit(features_train, labels_train) prediction = classifier.predict(features_test) accuracy = metrics.accuracy_score(labels_test, prediction) print(accuracy) pictureName = "svm.png" prettyPicture(classifier, features_test, labels_test, pictureName) show_img(pictureName) #########################################################
import sys sys.path.append("../choose_your_own/") from class_vis import prettyPicture import numpy as np x = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) print x y = np.array([1, 1, 2, 2]) print y from sklearn.svm import SVC clf = SVC() plt = clf.fit(x, y) print plt test_data = clf.predict([[-0.8, -1]]) print test_data prettyPicture(clf, x, y)
t0 = time() adaBoostClf = AdaBoostClassifier(n_estimators=30,learning_rate=0.4) adaBoostClf.fit(features_train, labels_train) print "default adaBoost training time:", round(time()-t0, 3), "s" #t0 = time() #rfClf = RandomForestClassifier() #rfClf.fit(features_train, labels_train) #print "default randomForest training time:", round(time()-t0, 3), "s" #knnPred = knnClf.predict(features_test) #knnacc = accuracy_score(knnPred, labels_test) adaBoostPred = adaBoostClf.predict(features_test) adaBoostacc = accuracy_score(adaBoostPred, labels_test) #rfPred = rfClf.predict(features_test) #rfacc = accuracy_score(rfPred, labels_test) # print "default knn accuracy:", knnacc print "default adaBoost accuracy:", adaBoostacc # print "default rf accuracy:", rfacc 8 try: prettyPicture(adaBoostClf, features_test, labels_test) except NameError: print "unable to produce boundary" pass
import sys sys.path.append("../JumpToMachineLearning/Helpers/") from prep_data import makeTerrainData from class_vis import prettyPicture, Show_Image from ClassifyHelper import Accuracy picture_name = "SVMclf.png" features_train, labels_train, features_test, labels_test = makeTerrainData() ########################## SVM ################################# from sklearn.svm import SVC clf = SVC(C=1000.0, kernel="rbf", gamma=10) # large C means more trainig points uncluded, # kernel see in sklearn documentation # gamma defines how far the influence of a single trainig example reaches, high gamma means very curvy decision boundary clf.fit(features_train, labels_train) pred = clf.predict(features_test) prettyPicture(clf, features_test, labels_test, picture_name) Show_Image(picture_name) accuracy = Accuracy(clf, features_test, labels_test) print("Accuracy score of svm for terrain data is : {}".format(accuracy))
#### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color="b", label="fast") plt.scatter(grade_slow, bumpy_slow, color="r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") # plt.show() ################################################################################ ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary ######################################################### ### your code goes here ### # from sklearn.ensemble import RandomForestClassifier # clf = RandomForestClassifier() # clf.fit(features_train, labels_train) # print " acc: ", clf.score(features_test, labels_test) from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=4, algorithm='auto', weights='distance') clf.fit(features_train, labels_train) print "acc: ", clf.score(features_test, labels_test) try: prettyPicture(clf, features_test, labels_test) except NameError: pass
features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast") plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() clf = AdaBoostClassifier() clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc = accuracy_score(pred, labels_test) print "Decision Tree accuracy: %r" % acc try: prettyPicture(clf, features_test, labels_test, f_name="ada_boost.png") except NameError: pass
plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################ ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary ## K Nearest Neighbors knnclf = KNeighborsClassifier() knnclf.fit(features_train, labels_train) pred = knnclf.predict(features_test) knnacc = accuracy_score(labels_test, pred) print(knnacc) prettyPicture(knnclf, features_test, pred) # accuracy = 0.92 rfclf = RandomForestClassifier() rfclf.fit(features_train, labels_train) pred = rfclf.predict(features_test) rfacc = accuracy_score(labels_test, pred) print(rfacc) prettyPicture(rfclf, features_test, pred) #accuracy = 0.92 abclf = AdaBoostClassifier() abclf.fit(features_train, labels_train) pred = abclf.predict(features_test) abacc = accuracy_score(labels_test, pred) print(abacc)
clf = AdaBoostClassifier(n_estimators=n) clf = clf.fit(features_train, labels_train) pred = clf.predict(features_test) return accuracy_score(pred, labels_test) def optimal_adaboost(): print("optimal AdaBoost") optimal_n = 0 optimal_accuracy = 0 for i in range(1, 30): acc = classify_adaboost(i) if acc > optimal_accuracy: optimal_accuracy = acc optimal_n = i print("estimators: " + str(optimal_n)) print("accuracy: " + str(optimal_accuracy)) print("") clf = AdaBoostClassifier(n_estimators=optimal_n) clf.fit(features_train, labels_train) return clf clf = optimal_KNN() prettyPicture(clf, features_test, labels_test, 'optimal_knn.png') clf = optimal_forest() prettyPicture(clf, features_test, labels_test, 'optimal_forest.png') clf = optimal_adaboost() prettyPicture(clf, features_test, labels_test, 'optimal_adaboost.png')
features_train, labels_train, features_test, labels_test = makeTerrainData() clf = DecisionTreeClassifier(min_samples_split=50) # now your job is to fit the classifier # using the training features/labels, and to # make a set of predictions on the test data clf.fit(features_train, labels_train) # store your predictions in a list named pred pred = clf.predict(features_test) prettyPicture(clf, features_test, labels_test, f_name="dec_tree.png") Image.open('dec_tree.png').show() acc = accuracy_score(pred, labels_test) print "Decision Tree accuracy: %r" % acc """ clf = DecisionTreeClassifier(min_samples_split=2) clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc_min_samples_split_2 = accuracy_score(pred, labels_test) clf = DecisionTreeClassifier(min_samples_split=50) clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc_min_samples_split_50 = accuracy_score(pred, labels_test)
### Predicting print 'start predicting...' t2 = time() labels_pred = clf.predict(features_test) str2 = "predicting time: " + str(round(time() - t2, 3)) + "s" print str2 + '\n' ### Accuracy from sklearn.metrics import accuracy_score accuracy = accuracy_score(labels_test, labels_pred) print 'accuracy: ' + str(accuracy) ### Write to Log file = open('log.txt', 'a') file.write('******************************************************\n\n') file.write('DateTime: ' + date_time.strftime("%m/%d/%y %H:%M:%S") + '\n\n') file.write('Method: ' + str(clf) + '\n\n') file.write(str1 + '\n') file.write(str2 + '\n') file.write('Accuracy: ' + str(accuracy) + '\n\n\n') ### Predicted boundary try: prettyPicture(clf, features_test, labels_test, date_time.strftime("%m%d%y_%H%M%S")) except NameError: pass
import pylab as pl from classifyDT import classify features_train, labels_train, features_test, labels_test = makeTerrainData() from sklearn import tree from sklearn.metrics import accuracy_score clf_split_2 = tree.DecisionTreeClassifier(min_samples_split = 2) clf_split_50 = tree.DecisionTreeClassifier(min_samples_split = 50) clf_split_2.fit(features_train, labels_train) clf_split_50.fit(features_train, labels_train) pred_split_2 = clf_split_2.predict(features_test) pred_split_50 = clf_split_50.predict(features_test) acc_min_samples_split_2 = accuracy_score(pred_split_2, labels_test) acc_min_samples_split_50 = accuracy_score(pred_split_50, labels_test) ### be sure to compute the accuracy on the test set def submitAccuracies(): return {"acc":round(acc,3)} #### grader code, do not modify below this line prettyPicture(clf_split_2, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read()) prettyPicture(clf_split_50, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
clf = DecisionTreeClassifier(min_samples_split=40) t0 = time() ### features_train = features_train[:int(len(features_train)/100)] ### labels_train = labels_train[:int(len(labels_train)/100)] clf.fit(features_train, labels_train) print("Time to train:", round(time() - t0, 3), "s") t0 = time() pred = clf.predict(features_test) print("Time to make prediction:", round(time() - t0, 3), "s") ### calculate and return the accuracy on the test data from sklearn.metrics import accuracy_score accuracy = accuracy_score(labels_test, pred) print("Accuracy of Decision Tree predictor is: {}".format(accuracy)) ################################################################## ### draw the decision boundary with the text points overlaid ### we only take the first two features. new_features_train = features_train[:, :2] new_features_test = features_test[:, :2] new_clf = DecisionTreeClassifier(min_samples_split=40) new_clf.fit(new_features_train, labels_train) import matplotlib.pyplot as plt plt = prettyPicture(new_clf, new_features_test, labels_test) plt.show() ###output_image("test.png", "png", open("test.png", "rb").read()) ######################################################### #########################################################
y_fit = clf.fit(features_train, labels_train) pred = y_fit.predict(features_test) #### store your predictions in a list named pred ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] ### draw the decision boundary with the text points overlaid image_name = "SVM_"+kernel_type+"_cval_"+ str(cval) prettyPicture(clf, features_test, labels_test, image_name) #output_image(image_name, "png", open(image_name+".png", "rb").read()) from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print acc def submitAccuracy(): return acc
for algorithmParam in algorithmParamArray: # Train Data print "*******Algortihm used:", algorithmParam print "----------Training Phase (KNeighbors)---------" t0 = time() clfKNN=KNeighborsClassifier(n_neighbors=10,algorithm=algorithmParam) clfKNN.fit(features_train, labels_train) print "training time (KNeighbors):", round(time()-t0, 3), "s" # Test Data print "----------Testing Phase (KNeighbors)---------" accuracy=clfKNN.score(features_test,labels_test) print "Accuracy (KNeighbors):", accuracy, '\n' try: prettyPicture(clfKNN, features_test, labels_test) except NameError: pass ##2.- Using Random Forest # Parameters #algorithmParamArray=['auto','ball_tree','kd_tree','brute'] algorithmParam='Random Forest' # Train Data print "*******Algortihm used:", algorithmParam print "----------Training Phase (Random Forest)----" t0 = time() clfRF=RandomForestClassifier(n_estimators=10) clfRF.fit(features_train, labels_train) print "training time (Random Forest):", round(time()-t0, 3), "s"
features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast") plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() clf = KNeighborsClassifier() clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc = accuracy_score(pred, labels_test) print "Decision Tree accuracy: %r" % acc try: prettyPicture(clf, features_test, labels_test, f_name="knn.png") except NameError: pass
print("tempo de predição:", round(time() - t0, 3), "s") acc = accuracy_score(labels_test, pred_adaboost) print(acc) print("RandomForest =======================") from sklearn.ensemble import RandomForestClassifier rnd_clf = RandomForestClassifier(n_estimators=100, max_leaf_nodes=4, n_jobs=2, random_state=0) t0 = time() rnd_clf = rnd_clf.fit(features_train, labels_train) print("tempo de treinamento:", round(time() - t0, 3), "s") t0 = time() pred_rnd = rnd_clf.predict(features_test) print("tempo de predição:", round(time() - t0, 3), "s") #print(rnd_clf.predict_proba(features_test))[0:10] acc = accuracy_score(labels_test, pred_rnd) print(acc) try: #prettyPicture(clf, features_test, labels_test) prettyPicture(clf_adaboost, features_test, labels_test) except NameError: pass
### visualization code (prettyPicture) to show you the decision boundary from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import AdaBoostRegressor from sklearn.metrics import accuracy_score from sklearn.metrics import r2_score # abc = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, # algorithm='SAMME.R', random_state=None) # # abc.fit(features_train, labels_train) # predicted = abc.predict(features_test) # accuracy = accuracy_score(labels_test, predicted) # print accuracy abr = AdaBoostRegressor(base_estimator=None, n_estimators=500, learning_rate=1.0, loss='linear', random_state=None) abr.fit(features_train, labels_train) predicted_test = abr.predict(features_test) test_score = r2_score(labels_test, predicted_test) print test_score try: prettyPicture(abr, features_test, labels_test) except NameError: pass
### and testing datasets, respectively ### labels_train and labels_test are the corresponding item labels features_train, features_test, labels_train, labels_test = preprocess() ######################################################### ### your code goes here ### from sklearn import tree clf = tree.DecisionTreeClassifier(min_samples_split=40) t0 = time() clf.fit(features_train, labels_train) print "training time:", round( time() - t0, 3), "s" #round() function rounding up the time to 3 decimal places t0 = time() pred = clf.predict(features_test) print "predecting time:", round(time() - t0, 3), "s" from sklearn.metrics import accuracy_score acc = accuracy_score(labels_test, pred) print(acc) print(len(features_train[0])) #visiualization of decision tree from class_vis import output_image, prettyPicture prettyPicture(clf, features_train, labels_train) output_image("test.png", "png", open("test.png", "rb")).read() #########################################################
def display_picture(clf): try: prettyPicture(clf, features_test, labels_test) except NameError: pass
if labels_train[ii] == 1 ] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color="b", label="fast") plt.scatter(grade_slow, bumpy_slow, color="r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################ ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary ##### K nearestNEIGHBOR ALGORITHM function_name = "KNeighborsRegressor" #clf = KNeighborsRegressor(n_neighbors=2,leaf_size=1) clf = KNeighborsRegressor() clf.fit(features_train, labels_train) pred = clf.predict(features_test) pred_mat = [round(pred[i]) for i in pred] pred_rate = accuracy_score(pred_mat, labels_test) print pred_rate ### draw the decision boundary with the text points overlaid prettyPicture(function_name, clf, features_test, labels_test)
from sklearn.naive_bayes import GaussianNB from sklearn import metrics from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] NBclassifier = GaussianNB() NBclassifier.fit(features_train, labels_train) NBpreditiction = NBclassifier.predict(features_test) print(metrics.accuracy_score(labels_test, NBpreditiction)) #print NBclassifier.score(features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(NBclassifier, features_test, labels_test, "naive_bayes/naive_bayes.png") output_image("naive_bayes/naive_bayes.png", "png", open("naive_bayes/naive_bayes.png", "rb").read())
def print_picture(prefix, clf): try: filename = prefix + ".png" prettyPicture(clf, features_test, labels_test, filename) except NameError: pass
from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. classifier = classify(features_train, labels_train) print classifier.score(features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(classifier, features_test, labels_test) # output_image("test.png", "png", open("test.png", "rb").read())
if labels_train[ii] == 1 ] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color="b", label="fast") plt.scatter(grade_slow, bumpy_slow, color="r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################ ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary from sklearn.neighbors import KDTree kdt = KDTree(features_train, leaf_size=30, metric='euclidean') t0 = time() kdt.query(features_train, k=2, return_distance=False) print("training time:", round(time() - t0, 3), "s") t0 = time() print(kdt.score(features_test)) try: # prettyPicture(clf, features_test, labels_test) prettyPicture(kdt, features_test, labels_test) except NameError: pass
clf = KNeighborsClassifier(n_neighbors=1) t0 = time() clf.fit(features_train, labels_train) print "training time:", round(time()-t0, 3), "s" t0 = time() prediction = clf.predict(features_test) print "prediction time:", round(time()-t0, 3), "s" from sklearn.metrics import accuracy_score accuracy = accuracy_score(prediction, labels_test) print "Accuracy:", accuracy try: prettyPicture(clf, features_test, labels_test, "KNN.png") except NameError: pass ### Adaboost print "Adaboost:" from sklearn.ensemble import AdaBoostClassifier clf = AdaBoostClassifier() t0 = time() clf.fit(features_train, labels_train) print "training time:", round(time()-t0, 3), "s" t0 = time() prediction = clf.predict(features_test) print "prediction time:", round(time()-t0, 3), "s"
# sklearn.metrics.accuracy_score(pred,labels_test), accuracy method used by instructor in video # accuracy of Naive Bayes Terrain Classifier method 3 of 3 myGaussianNBTerrainClassifierAAccuracy = sklearn.metrics.accuracy_score( pred, labels_test) print("\tmyGaussianNBTerrainClassifierAAccuracy - {}".format( myGaussianNBTerrainClassifierAAccuracy)) # print("\ttype(myGaussianNB_Classifier_Accuracy) - {}\n".format(type(myGaussianNB_Classifier_Accuracy))) # sklearn.metrics.accuracy_score(pred,labels_test), accuracy method used by instructor in video # accuracy of Support Vector Machines - SVM - Terrain Classifier method 3 of 3 SupportVectorMachinesSVMTerraiClassifieAccuracy = sklearn.metrics.accuracy_score( SVMpred, labels_test) print("\tSupportVectorMachinesSVMTerraiClassifieAccuracy - {}\n".format( SupportVectorMachinesSVMTerraiClassifieAccuracy)) # print("\ttype(SupportVectorMachinesSVMTerraiClassifieAccuracy) - {}\n".format(type(SupportVectorMachinesSVMTerraiClassifieAccuracy))) ### draw Naive Bayes Gaussian Classifier ### draw the decision boundary with the text points overlaid myPrettyPicture = prettyPicture(clf, features_test, labels_test) ### draw SVM SupportVectorMachines Classifier ### draw the decision boundary with the text points overlaid myPrettyPicture = prettyPicture(SVMclf, features_test, labels_test) # print("\ttype(myPrettyPicture) - {}\n".format(type(myPrettyPicture))) # output_image("test.png", "png", open('/Users/Menfi/Documents/workspace/zzzzz/src/test.png', "rb").read()) print('End studentMain.py')
plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() ################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary ### importing random forest classfier and accuracy from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score ### declare, train and predict classifier clf = RandomForestClassifier(n_estimators = 5, min_samples_split = 30, random_state = 90) clf.fit(features_train,labels_train) pred = clf.predict(features_test) ### print accuracy acc = accuracy_score(labels_test,pred) print acc try: prettyPicture(clf, features_test, labels_test) except NameError: pass
from class_vis import prettyPicture, output_image from prep_terrain_data import makeTerrainData import matplotlib.pyplot as plt import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() from sklearn.svm import SVC clf = SVC(C=1.0, kernel="rbf") clf.fit(features_train, labels_train) pred = clf.predict(features_test) from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print "accuracy: ", acc prettyPicture(clf, features_test, labels_test, "test1.png")
#plt.show() ################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary n_neighbors = 15 print "Loading %iNN library" % n_neighbors from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors) print "Training algorithm" clf.fit(features_train,labels_train) print "Predicting results" pred = clf.predict(features_test) print "Computing algorithm accuracy" from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print "Accuracy: %.4f" % acc # Accuracy 93.6% for 3NN # Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better? # Accuracy 92.0% for 5NN # Accuracy 93.6% for 7NN outputfile = "test_%iNN.png" % n_neighbors print "Saving output plot as %s" % outputfile prettyPicture(clf, features_test, labels_test,outputfile) #output_image(outputfile, "png", open("test.png"", "rb").read()) output_image(outputfile, "png", open(outputfile, "rb").read())
from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test, "test_100.png") #output_image("test.png", "png", open("test.png", "rb").read())
def classify(features_train, labels_train): ### import the sklearn module for GaussianNB ### create classifier ### fit the classifier on the training features and labels ### return the fit classifier ### your code goes here! import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier clfKNN = KNeighborsClassifier(n_neighbors=9, weights='uniform', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None, n_jobs=1) clfADA = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=0.1, algorithm='SAMME.R', random_state=None) clfRFC = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None) which = raw_input("Enter the classifier to use: ") #which = "ADA" if which == "KNN": clfKNN.fit(features_train, labels_train) print "KNN Accuracy = ", clfKNN.score(features_test, labels_test) clf = clfKNN prettyPicture(clfKNN, features_test, labels_test) elif which == "ADA": clfADA.fit(features_train, labels_train) clf = clfADA print "Adaboost Accuracy = ", clfADA.score(features_test, labels_test) prettyPicture(clfADA, features_test, labels_test) else: clfRFC.fit(features_train, labels_train) clf = clfRFC print "RandomForestClassifier Accuracy = ", clfRFC.score(features_test, labels_test) prettyPicture(clfRFC, features_test, labels_test) try: prettyPicture(clf, features_test, labels_test) except NameError: