Esempio n. 1
0
def test():
    with closing(connect_db()) as db:
        svm = SVM(C=1)
        fill_negative_votes()
        X, Y = get_feature_vecs(db)

        # Divide up X into S chunks
        N = len(X)
        S = N

        # Cross validation, and get the averate number of misclassified
        count = 0
        total_incorrect = 0
        for s in range(S):
            print "iter:", count
            size_of_fold = math.ceil(1.0*N/S)
            start = s*size_of_fold
            end = start + size_of_fold
            if end > N:
                print "end > N"
                end = N
            print "range:", start, end
            holdoutX = X[start:end,:]
            trainingX = np.concatenate( (X[0:start], X[end:N]) )

            holdoutY = Y[start:end]
            trainingY = np.concatenate( (Y[0:start], Y[end:N]) )

            print "len holdoutX:", len(holdoutX)
            print "len trainingX;", len(trainingX)
            print "len holdoutY:", len(holdoutY)
            print "len trainingY;", len(trainingY)

            svm.train_dual(trainingX, trainingY)

            num_misclass = svm.num_incorrect(holdoutX, holdoutY)
            total_incorrect += num_misclass
            print "Num incorrect:", num_misclass

            count +=1
        print "Total misclassified with SVM:", 1.0 * total_incorrect

        # Now use keyword classification
        votes = get_all_votes()
        num_incorrect = 0
        for vote, event, doc in votes:
            classify = keyword_classify(event, doc)
            if classify != vote:
                print "Classified as:", classify, "actual:", vote
                num_incorrect += 1
            else:
                print "Classified as:", classify, "actual:", vote

        print "Total misclassified with keyword approach:", num_incorrect