class KneserNeyModel(BaseNgramModel): """ Implements Kneser-Ney smoothing """ def __init__(self, *args): super(KneserNeyModel, self).__init__(*args) self.model = KneserNeyProbDist(self.ngrams) def score(self, word, context): """ Use KneserNeyProbDist from NLTK to get score """ trigram = tuple((context[0], context[1], word)) return self.model.prob(trigram) def samples(self): return self.model.samples() def prob(self, sample): return self.model.prob(sample)
def model_KN(self, contents): ''' function returns an unsmoothed probability distribution (n-gram model) based on parameter list passed: - contents : list containing repaired contents of file whose n-gram model is to be created Uses the KneserNeyProbDist() function from NLTK to create a Kneser-Ney smoothing based language model ''' ret_dict = {} list_ngrams = list(ngrams(contents, self.N)) fdist = FreqDist(list_ngrams) kn_prob_dist = KneserNeyProbDist(fdist) self.discount_KN = kn_prob_dist.discount() for iter in kn_prob_dist.samples(): ret_dict[iter] = kn_prob_dist.prob(iter) return ret_dict