def main(): features_train, features_test, labels_train, labels_test = preprocess() clf = GaussianNB() timer = timing.Timer() with timer: clf.fit(features_train, labels_train) print('fit took', timer()) with timer: predictions = clf.predict(features_test) print('predict took', timer()) with timer: score = metrics.accuracy_score(labels_test, predictions) print('score took', timer()) print('SCORE', score)
Chris has label 1 """ import sys from time import time sys.path.append("../tools/") from tools.email_preprocess import preprocess from sklearn import tree from sklearn.metrics import accuracy_score ### features_train and features_test are the features for the training ### and testing datasets, respectively ### labels_train and labels_test are the corresponding item labels features_train, features_test, labels_train, labels_test = preprocess() ######################################################### ### your code goes here ### clf = tree.DecisionTreeClassifier(min_samples_split=40) clf = clf.fit(features_train, labels_train) pred = clf.predict(features_test) acc = accuracy_score(labels_test, pred) print 'number of features:{}, accuracy:{}'.format(len(features_train[0]), acc) #########################################################