def from_train(cls, sequences, n=3): vocab = build_vocabulary(1, *sequences) counter = count_ngrams(n, vocab, sequences, pad_left=True, pad_right=False) model = MLENgramModel(counter) actions = vocab.keys() #print(model.score('init-price', ('<start>',))) #print(model.ngrams.most_common(10)) return cls(model, actions)
def build_lm(self, sequences, n): vocab = build_vocabulary(1, *sequences) counter = count_ngrams(n, vocab, sequences, pad_left=True, pad_right=False) model = MLENgramModel(counter) return model
def score_templates(self): sequences = [s.split() for s in self.templates.template.values] vocab = build_vocabulary(1, *sequences) counter = count_ngrams(3, vocab, sequences, pad_left=True, pad_right=False) model = MLENgramModel(counter) scores = [-1. * model.entropy(s) * len(s) for s in sequences] if not 'logp' in self.templates.columns: self.templates.insert(0, 'logp', 0) self.templates['logp'] = scores