def __init__(self): """Constructor method to load external nGram class, load words, confusion matrix and dictionary.""" self.ng = nGram(True, True, False, False, False) self.words = sorted(set(self.ng.words))[3246:] self.loadConfusionMatrix() self.dict = self.loadDict() return
def __init__(self): self.grams = pd.read_csv('3gram.csv') self.py = pinyin() self.ng = nGram() self.segment = [ ',', '。', '?', '!', ':', ';', '……', '【', '】', '(', ')', '“', '”', "《", '》', '、' ] print('init over')
for i, key in enumerate(self.dic.keys()): f_csv.writerow({ 'one': key[0], 'two': key[1], 'three': key[2], 'num': self.dic[key] }) bar.bar(i, length, "Preprocessed ") print("\nfinish write: " + self.path) if __name__ == '__main__': from ngram import nGram from visualization import Progress_bar ng = nGram() ngg = NGramGenerator(3) file_dir = 'raw_data' for files in os.listdir(file_dir)[-10:]: b = Progress_bar() with open(os.path.join(file_dir, files), 'r', encoding='utf-8') as f: data = f.read().split() l = len(data) for index, d in enumerate(data): gs = ng.ngram(d) ngg.generate(gs) b.bar(index, l, "Preprocessed " + files) print("\nfinish index: " + files) ngg.save()
import ngram import sys import pickle if (len(sys.argv) < 3): print("Error: Expected arguments: ModuleFilename, N") sys.exit() moduleFilename = sys.argv[1] n = int(sys.argv[2]) model = ngram.nGram(n) open(moduleFilename, 'w') model.printInfo() ngram.saveObject(model, moduleFilename)
import unittest from ngram import nGram ng = nGram(n=5, corpus_file=None, cache=False) class TestNgram(unittest.TestCase): def test_uni_log(self): probability = ng.sentence_probability(sentence='hold your horses', n=1, form='log') self.assertAlmostEqual(probability, -24.9337710989) def test_uni_antilog(self): probability = ng.sentence_probability(sentence='hold your horses', n=1, form='antilog') self.assertAlmostEqual(probability, 1.48388689281e-11) def test_bi_log(self): probability = ng.sentence_probability(sentence='hold your horses', n=2, form='log') self.assertAlmostEqual(probability, -18.655540764) def test_bi_antilog(self): probability = ng.sentence_probability(sentence='hold your horses', n=2, form='antilog') self.assertAlmostEqual(probability, 7.90681521418e-09) def test_tri_log(self):