def make(self, vocabulary, counts, order): self.setVocabulary(vocabulary) self.setHighestOrder(order) coc = [ mGramCounts.countsOfCounts(mGramCounts.mGramReduceToOrder(counts, order)) for order in range(order + 1) ] self.estimateDiscounts(coc) result = Lm(order) counts = store(contract(counts)) self.build(counts, result) return result
def loadCounts(fname, vocabulary, binaryCountFile=None): try: counts = TextStorage(fname, vocabulary.index) ### work around counts = SentenceStartRemover(vocabulary, counts) counts = contract(counts) counts = store(counts, big=True, filename=binaryCountFile) except NonMonotonousHistoriesError, exc: h1, h2 = exc. args print h1, map(vocabulary.symbol, h1) print h2, map(vocabulary.symbol, h2) raise