class MockLM(Feature): def __init__(self, m, lmfile): Feature.__init__(self) self.stateless = False self.ngram_enum = NgramEnumerator(m) def weight(self, deduction): vars = [item.state[self.i] for item in deduction.tail] s = tuple(deduction.rule.rewrite(vars)) return self.ngram_cost(s), self.ngram_enum.elide(s) def ngram_cost(self, s): cost = 0 for ngram in self.ngram_enum.ngrams(s): cost += 0 return -cost #LM returns neg logprob
def count(self, line): """take record of ngrams in a ref line, when multiple ref lines are fed to the counter, the max count of a particular ngram is recorded""" line = line.split() self.lengths.append(len(line)) tmp_counter = {} for n in range(self.max_n): enum = NgramEnumerator(n + 1) for ngram in enum.ngrams(line): ngram = tuple(ngram) if ngram in tmp_counter: tmp_counter[ngram] += 1 else: tmp_counter[ngram] = 1 for ngram, c in tmp_counter.items(): self[ngram] = max(c, self[ngram])
class LM(Feature): def __init__(self, m, lmfile): Feature.__init__(self) self.stateless = False self.m = m self.lmfile = lmfile self.ngram_enum = NgramEnumerator(self.m) if FLAGS.use_python_lm: from python_lm import LanguageModel else: from swig_lm import LanguageModel logger.writeln('reading LM: %s' % self.lmfile) if FLAGS.use_python_lm: self.lm = LanguageModel(self.lmfile) self.getcost = self.lm.get else: self.lm = LanguageModel(self.m, self.lmfile) self.getcost = self.lm def weight(self, deduction): vars = [item.state[self.i] for item in deduction.tail] s = tuple(deduction.rule.rewrite(vars)) return self.ngram_cost(s), self.ngram_enum.elide(s) def ngram_cost(self, s): cost = 0 for ngram in self.ngram_enum.ngrams(s): cost += self.getcost(ngram) return -cost #LM returns neg logprob def heuristic(self, item): s = item.state[self.i] if item.i == 0: prefix = ('<s>',) * (self.m - 1) else: prefix = ('<unk>',) * (self.m - 1) if item.rightmost: suffix = ('</s>',) * (self.m - 1) else: suffix = () s = prefix + s + suffix h = 0 for ngram in self.ngram_enum.ngrams(s): h += self.lm(ngram) return -h
def __init__(self, m, lmfile): Feature.__init__(self) self.stateless = False self.m = m self.lmfile = lmfile self.ngram_enum = NgramEnumerator(self.m) if FLAGS.use_python_lm: from python_lm import LanguageModel else: from swig_lm import LanguageModel logger.writeln('reading LM: %s' % self.lmfile) if FLAGS.use_python_lm: self.lm = LanguageModel(self.lmfile) self.getcost = self.lm.get else: self.lm = LanguageModel(self.m, self.lmfile) self.getcost = self.lm
def __init__(self, m, lmfile): Feature.__init__(self) self.stateless = False self.ngram_enum = NgramEnumerator(m)
def __init__(self, max_n): self.max_n = max_n self.enums = [NgramEnumerator(i + 1) for i in range(max_n)]