def main(): global nGramList, tokenList print "Perplexity for Dataset 4 using Laplace Smoothing" nGramList = task_one.generateNgram(N, task_one.corpus4) fileText = task_one.readFile(task_one.corpus4test) tokenList = task_one.modifyFile(fileText) extendedMain()
def main(): global nGramList, tokenList print "Perplexity for Dataset 3 using Laplace Smoothing" nGramList = task_one.generateNgram(N,task_one.corpus3) fileText = task_one.readFile(task_one.corpus3test) tokenList = task_one.modifyFile(fileText) extendedMain()
def main(): #print nGramList global N if len(sys.argv) > 1: N = int(sys.argv[1]) else: N = 2 #print N global nGramList nGramList = task_one.generateNgram(N,task_one.corpus3) # Generate Ngram for N=1, N=2, N=3 print "Dataset3" extendedmain() nGramList = [] nGramList = task_one.generateNgram(N,task_one.corpus4) # Generate Ngram for N=1, N=2, N=3 print "Dataset4" extendedmain()
def main(): #print nGramList global N if len(sys.argv) > 1: N = int(sys.argv[1]) else: N = 2 #print N global nGramList nGramList = task_one.generateNgram( N, task_one.corpus3) # Generate Ngram for N=1, N=2, N=3 print "Dataset3" extendedmain() nGramList = [] nGramList = task_one.generateNgram( N, task_one.corpus4) # Generate Ngram for N=1, N=2, N=3 print "Dataset4" extendedmain()
#Task 3 : Handle unknown words and implement smoothing from __future__ import division import math import task_one N = 2 print "Perplexity for Dataset 3 using Laplace Smoothing" nGramList = task_one.generateNgram(N,task_one.corpus3) laplaceList = [] fileText = task_one.readFile(task_one.corpus3test) tokenList = task_one.modifyFile(fileText) nGramTestList = [] def findVocabCount(): vocabCount = 0 unigram = nGramList[0] for t in unigram: vocabCount = vocabCount + unigram[t] return vocabCount def combineTokens(listWithUnknown): dictTokensWithUnknown = dict() #print listWithUnknown for n in listWithUnknown: dictTokensWithUnknown = dict(dictTokensWithUnknown.items() + n.items()) #print dictTokensWithUnknown return dictTokensWithUnknown