### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] #### initial visualization plt.xlim(0.0, 1.0) plt.ylim(0.0, 1.0) plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast") plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.show() # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) accu = submitAccuracy(clf, features_test, labels_test) print "Accuracy:", accu ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) #output_image("test.png", "png", open("test.png", "rb").read())
### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) pred = clf.predict(features_test) accuracy = sum(pred == labels_test) / len(pred) # print clf.score(features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) # output_image("test.png", "png", open("test.png", "rb").read())
#!/usr/bin/python import pickle from classifyNB import classify ### Task 1: Select what features you'll use. ### features_list is a list of strings, each of which is a feature name. ### The first feature must be "poi". labels_list = ['poi'] features_list = ['poi', 'salary', 'total_payments'] # You will need to use more features ### Load the dictionary containing the dataset data_dict = pickle.load(open("final_project_dataset.pkl", "r")) ### Task 2: Remove outliers ### Task 3: Create new feature(s) ### Store to my_dataset for easy export below. my_dataset = data_dict clf = classify(my_dataset, features_list)