def main(): if len(sys.argv) < 2: exit() argsNb = len(sys.argv) trainfile = open(sys.argv[argsNb-1]) testfile = open("smstest.txt") print "Tokenize..." traintokenlines = token.smartTokenizer(trainfile) print "Done" #traintokenlines = token.preprocessTraining(traintokenlines,mod.lexicon) #print traintokenlines #mod = mlaplace.Model(traintokenlines,3) for order in range(2,4): print "-------------------------------------------------------" print "ORDER:",order print "Token Counting ..." mod = mkn.Model(traintokenlines,order) print "Done" if(order > 1): print "Checking probabilities ..." print mod.check_proba(order,0.01) print "Done" print "Perplexity ..." print mod.perplexity(token.smartTokenizer(testfile)) print "Done" if(order > 1): print "Shannon Game..." message = ("I","LL","LET","BOTH","OF","MY") test = message[-(order-1):] game = mod.shannon_game(test) print "Done" i = 0 for g in game: if g[1] == 'PARENTS': print i else: i += 1 print "PARENT FOUND AT POS:",i print "TOP 10:" print game[:10]
def main(): if len(sys.argv) < 2: exit() argsNb = len(sys.argv) trainfile = open(sys.argv[argsNb-1]) #testfile = open("../smstest.txt") traintokenlines = token.smartTokenizer(trainfile) print traintokenlines #mod = mlaplace.Model(traintokenlines,3) mod = mkn.Model(traintokenlines,3) #print mod.perplexity(token.smartTokenizer(testfile)) print mod.shannon_game(("BOTH","OF"))