Example #1
0
def begin_testing(filename, classifier):
	print "\nLoading the test data..."
	test_docs = docR.get_list(filename)
	data = []
	target = []
	for doc in test_docs:
		data.append(doc.vector[:-1])
		target.append(doc.vector[-1])
	np_data = np.array(data)
	np_target = np.array(target)
	results = classifier.predict(np_data)
	kp = kappa(np_target, results)
	print "\nThe Average Quadratic Weighted Kappa obtained is: ", kp, "\n"
	print "="*50
Example #2
0
	np_data = np.array(data)
	np_target = np.array(target)
	results = classifier.predict(np_data)
	kp = kappa(np_target, results)
	print "\nThe Average Quadratic Weighted Kappa obtained is: ", kp, "\n"
	print "="*50

if __name__=="__main__":
	if len(sys.argv) < 5:
		print "USAGE: $ python run.py [-n | -o] input_file model_file data_file -t test_file"
		sys.exit(0)
	print "\n"
	print "="*50
	if sys.argv[1] == "-n":
		print "\nTraining the model..."
		docs_list = docR.get_list(sys.argv[2])
		classifier = svm.SVR()
		data = []
		target = []
		for doc in docs_list:
			data.append(doc.vector[:-1])
			target.append(doc.vector[-1])
		np_data = np.array(data)
		np_target = np.array(target)
		classifier.fit(np_data, np_target)
		joblib.dump(classifier, sys.argv[3])
		save_data = data
		save_data.append(target)
		string = pickle.dumps(save_data)
		ofp = open(sys.argv[4], 'w')
		ofp.write(string)
Example #3
0
Module to extract bag of words and 
generate term-document matrix.

'''

import textmining as txtm
import fileparse as docR


def make_tdm(docs_list):
	textMatrices = []

	for x in xrange(0, 9):
		textMatrix = txtm.TermDocumentMatrix()
		textMatrices.append(textMatrix)

	for doc in docs_list:
		doc_set = int(doc.essay_set)
		textMatrices[doc_set].add_doc(doc.essay)

	return textMatrices


if __name__ == "__main__":
	docs_list = docR.get_list()
	tdMatrices = make_tdm(docs_list)
	for tdm in tdMatrices:
		for row in tdm.rows(cutoff=1):			# Here, cutoff means the number of documents in which this word has to occur for it to be placed in the 'bag of words'.
			print row