def sentiment_test(lt, nb, directory, output): try: f = open(output, 'w') except IOError: print 'Failed to open output file' sys.exit() f.write('<html><body><table border="1" cellpadding="2"><tbody>') f.write('<tr><td>Run</td>') f.write('<td>False Positive Rate (%)</td>') f.write('<td>False Negative Rate (%)</td>') f.write('<td>Accuracy (%)</td></tr>') run = 1 fp = [] fn = [] acc = [] while True: classifier = PostClassifier() train_glob = directory + str(run) + '_tr*' print 'train_glob: ' + train_glob names = glob.glob(train_glob) if len(names) == 0: break assert len(names) == 1, 'invalid naming schema in dir' classifier.sentiment_train(names[0], lt, nb) test_glob = directory + str(run) + '_test*' names = glob.glob(test_glob) assert len(names) == 1, 'invalid naming schema in dir' result = classifier.sentiment_test(names[0]) print 'Run {0}'.format(run) print 'Errors: ' for elem in result[3]: print elem print '\n\n\n' f.write('<tr><td>{0}</td>'.format(run)) f.write('<td>{0}</td>'.format(result[0] * 100)) f.write('<td>{0}</td>'.format(result[1] * 100)) f.write('<td>{0}</td></tr>'.format(result[2] * 100)) fp.append(result[0] * 100) fn.append(result[1] * 100) acc.append(result[2] * 100) run += 1 f.write('<tr><td>AVG</td>') f.write('<td>{0}</td>'.format(sum(fp)/len(fp))) f.write('<td>{0}</td>'.format(sum(fn)/len(fn))) f.write('<td>{0}</td></tr>'.format(sum(acc)/len(acc))) f.write('</tbody></table></html>') f.close()
from classify import PostClassifier train_path = raw_input("Path to training data: ") lower_threshold = int(raw_input("Lower threshold: ")) negative_bucket = raw_input("Use negative bucket feature? (True/False): ") == "True" classifier = PostClassifier() classifier.sentiment_train(train_path, lower_threshold, negative_bucket) while True: sentence = raw_input("Sentence to classify: ") print "Guess: " + classifier.classify_sentiment(sentence)