Beispiel #1
0
class KneserNeyModel(BaseNgramModel):
    """
    Implements Kneser-Ney smoothing
    """
    def __init__(self, *args):
        super(KneserNeyModel, self).__init__(*args)
        self.model = KneserNeyProbDist(self.ngrams)

    def score(self, word, context):
        """
        Use KneserNeyProbDist from NLTK to get score
        """
        trigram = tuple((context[0], context[1], word))
        return self.model.prob(trigram)

    def samples(self):
        return self.model.samples()

    def prob(self, sample):
        return self.model.prob(sample)
 def model_KN(self, contents):
     ''' function returns an unsmoothed probability distribution (n-gram model) based on parameter list
         passed:
         - contents : list containing repaired contents of file whose n-gram model is to be created
         
         Uses the KneserNeyProbDist() function from NLTK to create a Kneser-Ney smoothing based 
         language model
     '''
     ret_dict = {}
     list_ngrams = list(ngrams(contents, self.N))
     fdist = FreqDist(list_ngrams)
     kn_prob_dist = KneserNeyProbDist(fdist)
     self.discount_KN = kn_prob_dist.discount()
     for iter in kn_prob_dist.samples():
         ret_dict[iter] = kn_prob_dist.prob(iter)
     return ret_dict