예제 #1
0
def test():
    import numpy as np
    x = np.array([[-1,1], [-2, 1], [-3, -2], [1, 1],[2, 1],[3, 2]])
    y = np.array([1, 1, 1, 2, 2, 2])
    from sklearn.naive_bayes import GaussianNB
    clf = GaussianNB()
    clf.fit(x,y)
    print(clf.predict([-0.8, -1]))
    from prep_terrain_data import makeTerrainData
    a = makeTerrainData()
    print len(a[3])
예제 #2
0
def classify(features_train, labels_train):
    ### import the sklearn module for GaussianNB
    ### create classifier
    ### fit the classifier on the training features and labels
    ### return the fit classifier


    ### your code goes here!

    from sklearn.naive_bayes import GaussianNB
    from prep_terrain_data import makeTerrainData
    a = makeTerrainData()
    clf = GaussianNB()
    clf.fit(a[0],a[1])

    return clf
예제 #3
0
#!/usr/bin/python

import matplotlib.pyplot as plt
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture
features_train, labels_train, features_test, labels_test = makeTerrainData()


### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
#################################################################################


### your code here!  name your classifier object clf if you want the 
### visualization code (prettyPicture) to show you the decision boundary
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
         "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
         "Quadratic Discriminant Analysis"]
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    AdaBoostClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

X, y ,a,b=makeTerrainData()

datasets =[ [X, y ] ]

figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds in datasets:
    # preprocess dataset, split into training and test part

    X, y=ds
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=.4)

    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
예제 #5
0
#!/usr/bin/python
from __future__ import print_function
import matplotlib.pyplot as plt
from time import time
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

features_train, labels_train, features_test, labels_test = makeTerrainData()


### the training data (features_train, labels_train) have both "fast" and "slow"
### points mixed together--separate them so we can give them different colors
### in the scatterplot and identify them visually
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################
 def __init__(self, N_points):
     self.features_train, self.labels_train, self.features_test, self.labels_test = makeTerrainData(N_points)
예제 #7
0
import numpy as np
from prep_terrain_data import makeTerrainData
from matplotlib import pyplot as plt
from sklearn import svm
import pandas
from class_vis import prettyPicture

x_train, y_train, x_test, y_test = makeTerrainData()

df = pandas.DataFrame(data=x_test, columns=['f_1', 'f_2'])
df['label'] = pandas.Series(y_test)

dat_pos = df[df.label == 1]
dat_neg = df[df.label == 0]

plt.scatter(dat_pos['f_1'], dat_pos['f_2'], c='blue')
plt.scatter(dat_neg['f_1'], dat_neg['f_2'], c='red')
#plt.show()

clf = svm.SVC(kernel='rbf', C=0.01)
clf.fit(x_train, y_train)

prettyPicture(clf, x_test, y_test)

#w = clf.coef_[0]
#a = -w[0]/w[1]
#m = clf.intercept_[0] / w[1]
#x_ = np.linspace(0.0,1.0)
#y_ = a*x_ - m

#plt.plot(x_,y_)
예제 #8
0
import numpy as np
from prep_terrain_data import makeTerrainData
from matplotlib import pyplot as plt
from sklearn import svm
import pandas
from class_vis import prettyPicture


x_train,y_train,x_test,y_test = makeTerrainData()


df = pandas.DataFrame(data=x_test,columns=['f_1','f_2'])
df['label'] = pandas.Series(y_test)

dat_pos = df[df.label==1]
dat_neg = df[df.label==0]

plt.scatter(dat_pos['f_1'],dat_pos['f_2'],c='blue')
plt.scatter(dat_neg['f_1'],dat_neg['f_2'],c='red')
#plt.show()

clf = svm.SVC(kernel='rbf',C=0.01)
clf.fit(x_train,y_train)

prettyPicture(clf,x_test,y_test)

#w = clf.coef_[0]
#a = -w[0]/w[1]
#m = clf.intercept_[0] / w[1]
#x_ = np.linspace(0.0,1.0)
#y_ = a*x_ - m
예제 #9
-1
def classify(features_train, labels_train):   
    ### import the sklearn module for GaussianNB
    ### create classifier
    ### fit the classifier on the training features and labels
    ### return the fit classifier
    
        
    ### your code goes here!
import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)



### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())