Esempio n. 1
0
File: sms2.py Progetto: fvdsn/NLP
def main():
	if len(sys.argv) < 2:
		exit()
	
	argsNb = len(sys.argv)
	trainfile = open(sys.argv[argsNb-1])
	testfile = open("smstest.txt")
	
	print "Tokenize..."
	traintokenlines = token.smartTokenizer(trainfile)
	print "Done"
	#traintokenlines = token.preprocessTraining(traintokenlines,mod.lexicon)
	#print traintokenlines
	
	#mod = mlaplace.Model(traintokenlines,3)
	for order in range(2,4):
		print "-------------------------------------------------------"
		print "ORDER:",order
		print "Token Counting ..."
		mod = mkn.Model(traintokenlines,order)
		print "Done"
		
		if(order > 1):
			print "Checking probabilities ..."
			print mod.check_proba(order,0.01)
			print "Done"
	
		print "Perplexity ..."
		print mod.perplexity(token.smartTokenizer(testfile))
		print "Done"
		
		if(order > 1):
			print "Shannon Game..."
			message = ("I","LL","LET","BOTH","OF","MY")
			test = message[-(order-1):]
			game =  mod.shannon_game(test)
			print "Done"
			i = 0
			for g in game:
				if g[1] == 'PARENTS':
					print i
				else:
					i += 1
			print "PARENT FOUND AT POS:",i
			print "TOP 10:"
			print game[:10]
Esempio n. 2
0
File: sms.py Progetto: fvdsn/NLP
def main():
	if len(sys.argv) < 2:
		exit()
	
	argsNb = len(sys.argv)
	trainfile = open(sys.argv[argsNb-1])
	#testfile = open("../smstest.txt")
	
	traintokenlines = token.smartTokenizer(trainfile)
	print traintokenlines
	
	#mod = mlaplace.Model(traintokenlines,3)
	mod = mkn.Model(traintokenlines,3)
	
	#print mod.perplexity(token.smartTokenizer(testfile))
	print mod.shannon_game(("BOTH","OF"))