예제 #1
0
class MockLM(Feature):
    def __init__(self, m, lmfile):
        Feature.__init__(self)
        self.stateless = False
        self.ngram_enum = NgramEnumerator(m)

    def weight(self, deduction):
        vars = [item.state[self.i] for item in deduction.tail]
        s = tuple(deduction.rule.rewrite(vars))
        return self.ngram_cost(s), self.ngram_enum.elide(s)

    def ngram_cost(self, s):
        cost = 0
        for ngram in self.ngram_enum.ngrams(s):
            cost += 0
        return -cost #LM returns neg logprob
예제 #2
0
 def count(self, line):
     """take record of ngrams in a ref line, when multiple ref lines are
     fed to the counter, the max count of a particular ngram is recorded"""
     line = line.split()
     self.lengths.append(len(line))
     tmp_counter = {}
     for n in range(self.max_n):
         enum = NgramEnumerator(n + 1)
         for ngram in enum.ngrams(line):
             ngram = tuple(ngram)
             if ngram in tmp_counter:
                 tmp_counter[ngram] += 1
             else:
                 tmp_counter[ngram] = 1
     for ngram, c in tmp_counter.items():
         self[ngram] = max(c, self[ngram])
예제 #3
0
class LM(Feature):
    def __init__(self, m, lmfile):
        Feature.__init__(self)
        self.stateless = False
        self.m = m
        self.lmfile = lmfile
        self.ngram_enum = NgramEnumerator(self.m)

        if FLAGS.use_python_lm:
            from python_lm import LanguageModel
        else:
            from swig_lm import LanguageModel

        logger.writeln('reading LM: %s' % self.lmfile)
        if FLAGS.use_python_lm:
            self.lm = LanguageModel(self.lmfile)
            self.getcost = self.lm.get
        else:
            self.lm = LanguageModel(self.m, self.lmfile)
            self.getcost = self.lm

    def weight(self, deduction):
        vars = [item.state[self.i] for item in deduction.tail]
        s = tuple(deduction.rule.rewrite(vars))
        return self.ngram_cost(s), self.ngram_enum.elide(s)

    def ngram_cost(self, s):
        cost = 0
        for ngram in self.ngram_enum.ngrams(s):
            cost += self.getcost(ngram)
        return -cost #LM returns neg logprob

    def heuristic(self, item):
        s = item.state[self.i]
        if item.i == 0:
            prefix = ('<s>',) * (self.m - 1)
        else:
            prefix = ('<unk>',) * (self.m - 1)
        if item.rightmost:
            suffix = ('</s>',) * (self.m - 1)
        else:
            suffix = ()
        s = prefix + s + suffix
        h = 0
        for ngram in self.ngram_enum.ngrams(s):
            h += self.lm(ngram)
        return -h
예제 #4
0
    def __init__(self, m, lmfile):
        Feature.__init__(self)
        self.stateless = False
        self.m = m
        self.lmfile = lmfile
        self.ngram_enum = NgramEnumerator(self.m)

        if FLAGS.use_python_lm:
            from python_lm import LanguageModel
        else:
            from swig_lm import LanguageModel

        logger.writeln('reading LM: %s' % self.lmfile)
        if FLAGS.use_python_lm:
            self.lm = LanguageModel(self.lmfile)
            self.getcost = self.lm.get
        else:
            self.lm = LanguageModel(self.m, self.lmfile)
            self.getcost = self.lm
예제 #5
0
 def __init__(self, m, lmfile):
     Feature.__init__(self)
     self.stateless = False
     self.ngram_enum = NgramEnumerator(m)
 def __init__(self, max_n):
     self.max_n = max_n
     self.enums = [NgramEnumerator(i + 1) for i in range(max_n)]