예제 #1
0
def main():
    global nGramList, tokenList
    print "Perplexity for Dataset 4 using Laplace Smoothing"
    nGramList = task_one.generateNgram(N, task_one.corpus4)
    fileText = task_one.readFile(task_one.corpus4test)
    tokenList = task_one.modifyFile(fileText)
    extendedMain()
def main():
  global nGramList, tokenList
  print "Perplexity for Dataset 3 using Laplace Smoothing"
  nGramList = task_one.generateNgram(N,task_one.corpus3) 
  fileText = task_one.readFile(task_one.corpus3test)
  tokenList = task_one.modifyFile(fileText)
  extendedMain()
예제 #3
0
def main():
  #print nGramList
  global N
  if len(sys.argv) > 1:
    N = int(sys.argv[1])
  else:
    N = 2
    
  #print N
  global nGramList
  nGramList = task_one.generateNgram(N,task_one.corpus3) # Generate Ngram for N=1, N=2, N=3
  print "Dataset3"
  extendedmain()
  nGramList = []
  nGramList = task_one.generateNgram(N,task_one.corpus4) # Generate Ngram for N=1, N=2, N=3
  print "Dataset4"
  extendedmain()
예제 #4
0
def main():
    #print nGramList
    global N
    if len(sys.argv) > 1:
        N = int(sys.argv[1])
    else:
        N = 2

    #print N
    global nGramList
    nGramList = task_one.generateNgram(
        N, task_one.corpus3)  # Generate Ngram for N=1, N=2, N=3
    print "Dataset3"
    extendedmain()
    nGramList = []
    nGramList = task_one.generateNgram(
        N, task_one.corpus4)  # Generate Ngram for N=1, N=2, N=3
    print "Dataset4"
    extendedmain()
예제 #5
0
#Task 3 : Handle unknown words and implement smoothing

from __future__ import division
import math

import task_one

N = 2
print "Perplexity for Dataset 3 using Laplace Smoothing"
nGramList = task_one.generateNgram(N,task_one.corpus3) 
laplaceList = []
fileText = task_one.readFile(task_one.corpus3test)
tokenList = task_one.modifyFile(fileText)
nGramTestList = []

def findVocabCount():
  vocabCount = 0
  unigram = nGramList[0]
  for t in unigram:
    vocabCount = vocabCount + unigram[t]
  
  return vocabCount

def combineTokens(listWithUnknown):
  dictTokensWithUnknown = dict()
  #print listWithUnknown
  for n in listWithUnknown:
    dictTokensWithUnknown = dict(dictTokensWithUnknown.items() + n.items())
    
  #print dictTokensWithUnknown
  return dictTokensWithUnknown