def NGramLangModel(): cl = Loader(MAIN_DIR+DS_DIR) f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki w = cl.processRaw(f,to_lower=True) r = cl.rawForLangmodel(w,punct_remove=True,to_token=True) lms = NGramModels(ngram=2) # njump parameter belum bisa digunakan untuk modkn optimizer models = lms.train(r, optimizer='modkn',\ separate=False, njump=0, verbose=False) print "##########################################################"
def NGramLangModel(): cl = Loader('C:\\BimaNLP\\dataset\\') f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki w = cl.processRaw(f,to_lower=True) r = cl.rawForLangmodel(w,punct_remove=True,to_token=True) dataset=[['saya','suka','kamu'], ['kamu','suka','saya'], ['saya','tidak','suka','jika','kamu','pergi','dengan','dia'] ] lms = NGramModels(ngram=2) # njump parameter belum bisa digunakan untuk modkn optimizer models = lms.train(dataset, optimizer='modkn',\ separate=False, njump=0, verbose=True) print "##########################################################"