def test(): with closing(connect_db()) as db: svm = SVM(C=1) fill_negative_votes() X, Y = get_feature_vecs(db) # Divide up X into S chunks N = len(X) S = N # Cross validation, and get the averate number of misclassified count = 0 total_incorrect = 0 for s in range(S): print "iter:", count size_of_fold = math.ceil(1.0*N/S) start = s*size_of_fold end = start + size_of_fold if end > N: print "end > N" end = N print "range:", start, end holdoutX = X[start:end,:] trainingX = np.concatenate( (X[0:start], X[end:N]) ) holdoutY = Y[start:end] trainingY = np.concatenate( (Y[0:start], Y[end:N]) ) print "len holdoutX:", len(holdoutX) print "len trainingX;", len(trainingX) print "len holdoutY:", len(holdoutY) print "len trainingY;", len(trainingY) svm.train_dual(trainingX, trainingY) num_misclass = svm.num_incorrect(holdoutX, holdoutY) total_incorrect += num_misclass print "Num incorrect:", num_misclass count +=1 print "Total misclassified with SVM:", 1.0 * total_incorrect # Now use keyword classification votes = get_all_votes() num_incorrect = 0 for vote, event, doc in votes: classify = keyword_classify(event, doc) if classify != vote: print "Classified as:", classify, "actual:", vote num_incorrect += 1 else: print "Classified as:", classify, "actual:", vote print "Total misclassified with keyword approach:", num_incorrect