def rate(review):
    review = SVM.asciify(review)

    inputData = []

    # 1st element = bayes with unigram
    bayesClassifier.loadData("U")
    inputData.append(bayesClassifier.percentPositive(review))
    # bayes with adjective
    bayesClassifier.loadData("A")
    inputData.append(bayesClassifier.percentPositive(review))
    # bayes with POS
    bayesClassifier.loadData("P")
    bayesClassifier.partOfSpeech = True
    inputData.append(bayesClassifier.percentPositive(review))

    # SVM with unigram
    review = review.split()
    SVM.loadModule("U")
    SVM.loadWords("U")
    X = SVM.intersection(SVM.wordList, review)
    # SVM with adjective
    inputData.append(SVM.movieReviewer.predict(X)[0])
    SVM.loadModule("A")
    SVM.loadWords("A")
    X = SVM.intersection(SVM.wordList, review)
    inputData.append(SVM.movieReviewer.predict(X)[0])

    return Tree.predict(inputData)[0]
Esempio n. 2
0
#!/usr/bin/python

import bayesClassifier
import os
from sys import argv

total = 0
done = 0
accurate = 0
keywordList = {}

bayesClassifier.loadData(argv[1])

files = os.listdir('./test/pos')

# comment this line to increase sample size
files = files[:3000]

total = len(files)

progress = 0

for i in files:
    done += 1
    if ((done * 100 / float(total)) > (progress + 10)):
        progress += 10
        #os.system("echo -n '='")
    f = bayesClassifier.asciify(open('./test/pos/' + i, 'r').read())
    print bayesClassifier.percentPositive(f)
    if bayesClassifier.percentPositive(f) > 0.5:
        accurate += 1