Ejemplo n.º 1
0
def sentiment_test(lt, nb, directory, output):
	try:
		f = open(output, 'w')
	except IOError:
		print 'Failed to open output file'
		sys.exit()

	f.write('<html><body><table border="1" cellpadding="2"><tbody>')
	f.write('<tr><td>Run</td>')
	f.write('<td>False Positive Rate (%)</td>')
	f.write('<td>False Negative Rate (%)</td>')
	f.write('<td>Accuracy (%)</td></tr>')

	run = 1
	fp = []
	fn = []
	acc = []
	while True:
		classifier = PostClassifier()

		train_glob = directory + str(run) + '_tr*'
		print 'train_glob: ' + train_glob
		names = glob.glob(train_glob)
		if len(names) == 0:
			break
		assert len(names) == 1, 'invalid naming schema in dir'
		classifier.sentiment_train(names[0], lt, nb)	

		test_glob = directory + str(run) + '_test*'
		names = glob.glob(test_glob)
		assert len(names) == 1, 'invalid naming schema in dir'
		result = classifier.sentiment_test(names[0])

		print 'Run {0}'.format(run)
		print 'Errors: '
		for elem in result[3]:
			print elem 
			print '\n\n\n'

		f.write('<tr><td>{0}</td>'.format(run))
		f.write('<td>{0}</td>'.format(result[0] * 100))
		f.write('<td>{0}</td>'.format(result[1] * 100))
		f.write('<td>{0}</td></tr>'.format(result[2] * 100))

		fp.append(result[0] * 100)
		fn.append(result[1] * 100)
		acc.append(result[2] * 100)
		run += 1
	f.write('<tr><td>AVG</td>')
	f.write('<td>{0}</td>'.format(sum(fp)/len(fp)))
	f.write('<td>{0}</td>'.format(sum(fn)/len(fn)))
	f.write('<td>{0}</td></tr>'.format(sum(acc)/len(acc)))
	f.write('</tbody></table></html>')
	f.close()
Ejemplo n.º 2
0
def classify_and_write_results(posts):
	formatter = Format()
	classifier = PostClassifier()
	output_file = raw_input("Output file (empty defaults to stdout): ")
	try:
		f = open(output_file, 'w')
	except IOError:
		print 'Invalid filename entered. Printing dumped content ... '
		f = None
	for entry in posts:
		entry.sentences = formatter.split_sentences(entry.body)
		entry.sentences = zip(
			entry.sentences, classifier.classify_sentiment(entry.sentences))
		entry.label = classifier.classify_topic_unsupervised(entry.body)
		output = '---POST---' + '\nauthor id: ' + str(entry.user_int_id) + \
			'\nclass: ' + entry.label + '\nsentences and sentiments:\n' + \
				'\n'.join(' : '.join(pair) for pair in entry.sentences)
		if (f is not None):
			f.write(output)
		else:
			print output
Ejemplo n.º 3
0
from classify import PostClassifier

train_path = raw_input("Path to training data: ")
lower_threshold = int(raw_input("Lower threshold: "))
negative_bucket  = raw_input("Use negative bucket feature? (True/False): ") == "True"

classifier = PostClassifier()
classifier.sentiment_train(train_path, lower_threshold, negative_bucket)
while True:
	sentence = raw_input("Sentence to classify: ")
	print "Guess: " + classifier.classify_sentiment(sentence)