def main(): data = seq.DataX( 2, vocab_files=[info['pku_vocab_chr'], info['pku_vocab_pos']], train_list=info['pku_train'], valid_list=info['pku_valid'], test_list=info['pku_test']) data = data.create_data(0) config = ngramlm.Config(data) order_reg = [3, 4, 5] for order in order_reg: config.order = order config.cutoff = [0] * order workdir = wb.mkdir('ngramlm/' + get_name(config), is_recreate=False) sys.stdout = wb.std_log(workdir + '/ngram.log') print(workdir) m = ngramlm.Model(config, data, bindir, workdir) # train with wb.processing('train...'): m.train(write_to_res=('results.txt', get_name(config))) # wer nbest_eval_chr(m, data, workdir, res_file='results.txt', res_name=get_name(config))
def main(): data = reader.Data().load_raw_data(file_list=info['hkust_train_chr'] + info['hkust_valid_chr'] + info['hkust_valid_chr'], add_beg_token='<s>', add_end_token='</s>') config = ngramlm.Config(data) order_reg = [3, 4, 5] for order in order_reg: config.order = order config.cutoff = [0] * order workdir = wb.mkdir('ngramlm/' + get_name(config), is_recreate=False) sys.stdout = wb.std_log(workdir + '/ngram.log') print(workdir) m = ngramlm.Model(config, data, bindir, workdir) # train with wb.processing('train...'): m.train(write_to_res=('results.txt', get_name(config))) # wer nbest_eval_chr(m, data, workdir, res_file='results.txt', res_name=get_name(config))
def perform(self, step, epoch): nbest_eval_chr(self.sampler, self.data, self.logdir, res_file=os.path.join(self.logdir, 'sampler_wer_per_epoch.log'), res_name='epoch%.2f' % epoch, rescore_fun=lambda x: -self.sampler.get_log_probs(x))
def perform(self, step, epoch): if 'sampler' in self.m.__dict__: nbest_eval_chr(self.m, self.m.data, self.m.logdir, res_file=os.path.join(self.m.logdir, 'sampler_wer_per_epoch.log'), res_name='epoch%.2f' % epoch, rescore_fun=lambda x: -self.m.sampler.get_log_probs(x) ) super().perform(step, epoch)
def main(_): data = reader.Data().load_raw_data(file_list=info['hkust_train_chr'] + info['hkust_valid_chr'] + info['hkust_valid_chr'], add_beg_token=None, add_end_token='</s>', add_unknwon_token='<unk>') config = small_config(data) # config = medium_config(data) # config = large_config(data) work_dir = './lstm/' + create_name(config) wb.prepare_log_dir(work_dir, 'lstm.log') config.print() data.write_vocab(work_dir + '/vocab.txt') data.write_data(data.datas[0], work_dir + '/train.id') data.write_data(data.datas[1], work_dir + '/valid.id') data.write_data(data.datas[2], work_dir + '/test.id') write_model = os.path.join(work_dir, 'model.ckpt') # lm = lstmlm.FastLM(config, device_list=['/gpu:0', '/gpu:0']) lm = lstmlm.LM(config, data, device='/gpu:0') sv = tf.train.Supervisor(logdir=os.path.join(work_dir, 'logs'), summary_op=None, global_step=lm.global_step()) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: lm.train(session, data, write_model=write_model, write_to_res=('results.txt', create_name(config)), is_shuffle=False) print('compute the WER...') nbest_eval_chr(lm, data, work_dir, 'results.txt', create_name(config), rescore_fun=lambda x: lm.rescore(session, x))