Example #1
0
### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)
accu = submitAccuracy(clf, features_test, labels_test)
print "Accuracy:", accu


### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
#output_image("test.png", "png", open("test.png", "rb").read())

Example #2
0
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)
pred = clf.predict(features_test)

accuracy = sum(pred == labels_test) / len(pred)
# print clf.score(features_test, labels_test)

### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
# output_image("test.png", "png", open("test.png", "rb").read())
Example #3
0
#!/usr/bin/python

import pickle

from classifyNB import classify

### Task 1: Select what features you'll use.
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
labels_list = ['poi']
features_list = ['poi', 'salary',
                 'total_payments']  # You will need to use more features

### Load the dictionary containing the dataset
data_dict = pickle.load(open("final_project_dataset.pkl", "r"))

### Task 2: Remove outliers
### Task 3: Create new feature(s)
### Store to my_dataset for easy export below.
my_dataset = data_dict

clf = classify(my_dataset, features_list)