def main(): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', add_end_token='</s>') nbest = reader.NBest(*reader.wsj0_nbest()) config = ngramlm.Config(data) config.res_file = 'results.txt' if wb.is_window(): bindir = 'd:\\wangbin\\tools' else: bindir = '../../tools/srilm' order_reg = [3, 4, 5] for order in order_reg: config.order = order workdir = 'ngramlm/' + str(config) m = ngramlm.Model(config, data, bindir, workdir, name=str(config)) print('train...') m.train(write_to_res=(res_file, str(config))) print('rescore...') nbest.lmscore = m.rescore(nbest.get_nbest_list(data)) nbest.write_lmscore(os.path.join(workdir, 'nbest.lmscore')) wer = nbest.wer() print('wer={} lmscale={}, acscale={}'.format(wer, nbest.lmscale, nbest.acscale)) fres.AddWER(str(config), wer)
def main(): data = reader.Data().load_raw_data([train, valid, test], add_beg_token='<s>', add_end_token='</s>') nbest = reader.NBest(*nbest_files) config = ngramlm.Config(data) config.res_file = 'results.txt' config.discount = '-wbdiscount' if wb.is_window(): bindir = 'd:\\wangbin\\tools' else: bindir = '../../tools/srilm' workdir = 'ngramlm/' + str(config) order_reg = [3, 4, 5, 6] for order in order_reg: config.order = order m = ngramlm.Model(config, data, bindir, workdir, name=str(config)) print('train...') m.train(write_to_res=(res_file, str(config))) print('rescore...') nbest.lmscore = m.rescore(nbest.get_nbest_list(data)) wer = nbest.wer() print('wer={} lmscale={}, acscale={}'.format(wer, nbest.lmscale, nbest.acscale)) fres.AddWER(str(config), wer)
def main(): data = reader.Data().load_raw_data(train_files, add_beg_token='<s>', add_end_token='</s>') # nbest_real = reader.NBest(*reader.wsj0_nbest()) # nbest_fake = reader.NBest(*nbest_files) config = ngramlm.Config(data) config.res_file = 'results.txt' if wb.is_window(): bindir = 'd:\\wangbin\\tools' else: bindir = '../../tools/srilm' order_reg = [5, 6] for order in order_reg: config.order = order workdir = 'ngramlm/' + str(config) m = ngramlm.Model(config, data, bindir, workdir, name=str(config)) print('train...') m.train(write_to_res=(res_file, str(config))) print('rescore...') nbest_eval(m, data, workdir, fres, str(config))
def main(): data = seq.Data(vocab_files=data_info['vocab'], train_list=data_info['train'], valid_list=data_info['valid'], test_list=data_info['test']) data = data.create_data() nbest = reader.NBest(*data_info['nbest']) config = ngramlm.Config(data) config.res_file = res_file if wb.is_window(): bindir = 'd:\\wangbin\\tools' else: bindir = '../../../tools/srilm' workdir = 'ngramlm/' + str(config) order_reg = [3, 4, 5, 6] for order in order_reg: config.order = order m = ngramlm.Model(config, data, bindir, workdir, name=str(config)) print('train...') m.train(write_to_res=(res_file, str(config))) print('rescore...') nbest.lmscore = m.rescore(nbest.get_nbest_list(data)) wer = nbest.wer() print('wer={} lmscale={}, acscale={}'.format(wer, nbest.lmscale, nbest.acscale)) fres.AddWER(str(config), wer)
def main(): train_files = 100 data = reader.LargeData().dynamicly_load_raw_data( sorted_vocab_file=data_info['vocab_cut3'], train_list=data_info['train_all'][0:train_files], valid_file=data_info['valid'], test_file=data_info['test'], add_beg_token='<s>', add_end_token='</s>', add_unknwon_token='<unk>', vocab_max_size=None) nbest = reader.NBest(*reader.wsj0_nbest()) config = ngramlm.Config(data) config.res_file = 'results.txt' order_reg = [5] for order in order_reg: config.order = order config.cutoff = [0, 0, 2, 2, 5] model_name = 't{}_'.format(train_files) + str(config) workdir = 'ngramlm/' + model_name sys.stdout = wb.std_log(os.path.join(workdir, 'ngram.log')) datadir = 'ngramlm/data/' m = ngramlm.Model(config, data, bindir, workdir, datadir, name=model_name) print('train...') with wb.processing('training'): m.train(write_to_res=(res_file, model_name)) print('rescore...') with wb.processing('rescoring'): nbest.lmscore = m.rescore(nbest.get_nbest_list(data)) nbest.write_lmscore(os.path.join(workdir, 'nbest.lmscore')) wer = nbest.wer() print('wer={} lmscale={}, acscale={}'.format(wer, nbest.lmscale, nbest.acscale)) fres.AddWER(model_name, wer)