Ejemplo n.º 1
0
def main():
    features_train, features_test, labels_train, labels_test = preprocess()
    clf = GaussianNB()
    timer = timing.Timer()
    with timer:
        clf.fit(features_train, labels_train)
    print('fit took', timer())
    with timer:
        predictions = clf.predict(features_test)
    print('predict took', timer())
    with timer:
        score = metrics.accuracy_score(labels_test, predictions)
    print('score took', timer())
    print('SCORE', score)
Ejemplo n.º 2
0
    Chris has label 1
"""
    
import sys
from time import time
sys.path.append("../tools/")
from tools.email_preprocess import preprocess

from sklearn import tree
from sklearn.metrics import accuracy_score


### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()




#########################################################
### your code goes here ###

clf = tree.DecisionTreeClassifier(min_samples_split=40)
clf = clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(labels_test, pred)
print 'number of features:{}, accuracy:{}'.format(len(features_train[0]), acc)

#########################################################