def evaulate_trf(model, vocab, read_model, tsize, fres): res_name = '{}:'.format(int(tsize)) + os.path.split(read_model)[-1] tskdir = '{}/'.format(tsize) # rescore config = ' -vocab {} '.format(vocab) config += ' -read {}.model '.format(read_model) config += ' -nbest {} '.format(data(tskdir)[3]) config += ' -lmscore {0}.lmscore'.format(read_model) model.use(config) # WER [read_nbest, read_templ, read_acscore, read_lmscore] = data(tskdir)[3:7] read_lmscore = read_model + '.lmscore' [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ, wb.LoadScore(read_lmscore), wb.LoadScore(read_acscore), np.linspace(0.1, 0.9, 9)) print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format( wer, lmscale, acscale)) # calculate the ppl on wsj test templ_txt = model.workdir + os.path.split(read_templ)[-1] + '.rmlabel' wb.file_rmlabel(read_templ, templ_txt) PPL_templ = model.ppl(vocab, read_model + '.model', templ_txt) LL_templ = -wb.PPL2LL(PPL_templ, templ_txt) # output the result fres.Add(res_name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ]) fres.AddWER(res_name, wer)
def main(): print(sys.argv) if len(sys.argv) == 1: print('\"python run_ngram.py -train\" train \n', '\"python run_ngram.py -rescore\" rescore nbest\n', '\"python run_ngram.py -wer\" compute WER') bindir = '../../tools/srilm/' fres = wb.FRes('result.txt') # the result file order_reg = [5] for tsize in [1, 2, 4]: tskdir = '{}/'.format(tsize) workdir = tskdir + 'ngramlm/' model = ngram.model(bindir, workdir) for order in order_reg: write_model = workdir + '{}gram.lm'.format(order) write_name = '{}:KN{}'.format(tsize, order) print(write_model) if '-train' in sys.argv or '-all' in sys.argv: if order_reg.index(order) == 0: model.prepare( data(tskdir)[0], data(tskdir)[1], data(tskdir)[2]) model.train(order, write_model) if '-test' in sys.argv or '-all' in sys.argv: PPL = [0] * 3 PPL[0] = model.ppl(write_model, order, data(tskdir)[0]) PPL[1] = model.ppl(write_model, order, data(tskdir)[1]) PPL[2] = model.ppl(write_model, order, data(tskdir)[2]) fres.AddPPL(write_name, PPL, data(tskdir)[0:3]) if '-rescore' in sys.argv or '-all' in sys.argv: model.rescore(write_model, order, data(tskdir)[3], write_model[0:-3] + '.lmscore') if '-wer' in sys.argv or '-all' in sys.argv: [nbest, templ] = data(tskdir)[3:5] lmscore = wb.LoadScore(write_model[0:-3] + '.lmscore') acscore = wb.LoadScore(data(tskdir)[5]) [wer, lmscale, acscale] = wb.TuneWER(nbest, templ, lmscore, acscore, np.linspace(0.1, 0.9, 9)) print('wer={} lmscale={} acscale={}'.format( wer, lmscale, acscale)) fres.AddWER(write_name, wer) trans_txt = workdir + os.path.split(templ)[-1] + '.txt' wb.file_rmlabel(templ, trans_txt) PPL_temp = model.ppl(write_model, order, trans_txt) LL_temp = -wb.PPL2LL(PPL_temp, trans_txt) fres.Add(write_name, ['LL-wsj', 'PPL-wsj'], [LL_temp, PPL_temp])
def main(): print(sys.argv) if len(sys.argv) == 1: print('\"python run_ngram.py -train\" train \n', '\"python run_ngram.py -rescore\" rescore nbest\n', '\"python run_ngram.py -wer\" compute WER') absdir = os.getcwd() + '/' bindir = absdir + '../../tools/srilm/' workdir = absdir + 'ngramlm/' wb.mkdir(workdir) datas = [absdir + i for i in data()] result_file = absdir + 'models_ppl.txt' # the result file model = ngram.model(bindir, workdir) order_reg = [2, 3, 4, 5] for order in order_reg: write_model = workdir + '{}gram.lm'.format(order) print(write_model) if '-train' in sys.argv: if order_reg.index(order) == 0: model.prepare(datas[0], datas[1], datas[2]) model.train(order, write_model, absdir + 'models_ppl.txt') if '-rescore' in sys.argv: model.rescore(write_model, order, datas[3], write_model[0:-3] + '.lmscore') if '-wer' in sys.argv: [nbest, templ] = datas[3:5] lmscore = wb.LoadScore(write_model[0:-3] + '.lmscore') acscore = wb.LoadScore(datas[5]) [wer, lmscale, acscale] = wb.TuneWER(nbest, templ, lmscore, acscore, np.linspace(0.1, 0.9, 9)) print('wer={} lmscale={} acscale={}'.format(wer, lmscale, acscale)) fres = wb.FRes(result_file) fres.AddWER('KN{}'.format(order), wer) trans_txt = workdir + os.path.split(templ)[-1] + '.txt' wb.file_rmlabel(templ, trans_txt) PPL_temp = model.ppl(write_model, order, trans_txt) LL_temp = -wb.PPL2LL(PPL_temp, trans_txt) fres.Add('KN{}'.format(order), ['LL-wsj', 'PPL-wsj'], [LL_temp, PPL_temp])
model.prepare(data(tskdir)[0], data(tskdir)[1], data(tskdir)[2], data(tskdir)[3]) model.train(config) else: print('exist model: ' + write_model) if '-test' in sys.argv or '-all' in sys.argv: PPL = [0]*3 PPL[0] = model.ppl(write_model, data(tskdir)[0]) PPL[1] = model.ppl(write_model, data(tskdir)[1]) PPL[2] = model.ppl(write_model, data(tskdir)[2]) fres.AddPPL(write_name, PPL, data(tskdir)[0:3]) if '-rescore' in sys.argv or '-all' in sys.argv: write_lmscore = write_model[0:-4] + '.lmscore' model.rescore(write_model, data(tskdir)[3], write_lmscore) if '-wer' in sys.argv or '-all' in sys.argv: [read_nbest, read_templ, read_acscore, read_lmscore] = data(tskdir)[3:7] read_lmscore = write_model[0:-4] + '.lmscore' [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ, read_lmscore, read_acscore, np.linspace(0.1, 0.9, 9)) print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format(wer, lmscale, acscale)) fres.AddWER(write_name, wer) write_templ_rm = workdir + os.path.split(read_templ)[1] + '.rmlabel' rnn.Nbest_rmlabel(read_templ, write_templ_rm) PPL_templ = model.ppl(write_model, write_templ_rm) LL_templ = -wb.PPL2LL(PPL_templ, write_templ_rm) fres.Add(write_name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])