def main(_): config = get_config() q_config = run_lstmlm_withBegToken.small_config(data) name = create_name(config, q_config) logdir = wb.mkdir('./trf_nn/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # write data data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') m = trf.TRF(config, data, logdir=logdir, device='/gpu:0', simulater_device='/gpu:0', q_model=lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:0')) ops = Ops(m) sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m._global_step) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) print('load lstmlm q(x)') m.q_model.restore(session, './lstm/' + run_lstmlm_withBegToken.create_name(m.q_model.config) + '/model.ckpt') m.train(sv, session, print_per_epoch=0.05, operation=ops, model_per_epoch=None)
def main(_): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token=None, add_end_token='</s>', add_unknwon_token='<unk>') nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(reader.wsj0_nbest()[0], is_nbest=True) config = small_config(data) # config = medium_config(data) # config = large_config(data) work_dir = './lstm/' + create_name(config) wb.mkdir(work_dir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(work_dir, 'lstm.log')) print(work_dir) wb.pprint_dict(config.__dict__) data.write_vocab(work_dir + '/vocab.txt') data.write_data(data.datas[0], work_dir + '/train.id') data.write_data(data.datas[1], work_dir + '/valid.id') data.write_data(data.datas[2], work_dir + '/test.id') data.write_data(nbest_list, work_dir + '/nbest.id') write_model = os.path.join(work_dir, 'model.ckpt') with tf.Graph().as_default(): # lm = lstmlm.FastLM(config, device_list=['/gpu:0', '/gpu:0']) lm = blocklm.LM(config, device='/gpu:0') param_num = tf.add_n([tf.size(v) for v in tf.trainable_variables()]) for v in lm.train_net.variables: print(v.name) save = tf.train.Saver() # used to write ppl on valid/test set summ_bank = blocklm.layers.SummaryScalarBank(['ppl_valid', 'ppl_test']) summ_var = blocklm.layers.SummaryVariables() sv = tf.train.Supervisor(logdir=os.path.join(work_dir, 'logs'), summary_op=None, global_step=lm.global_step()) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: print('param_num={:,}'.format(session.run(param_num))) lm.train(sv, session, data.datas[0], data.datas[1], data.datas[2]) save.save(session, write_model)
def main(_): config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=False) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # word-embedding # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # config.config_trf.update_embedding = False # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0) # write data data.write_vocab(logdir + '/vocab.txt') data.write_char_vocab(logdir + '/vocab_char.txt', logdir + '/vocab_w2c.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0']) sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print(m.get_log_probs(data.datas[1][1])) # train_seqs = data.datas[0][0: config.config_trf.train_batch_size] # sample_seqs = data.datas[0][0: config.config_trf.sample_batch_size] # inputs, lengths = reader.produce_data_to_trf(train_seqs + sample_seqs) # m.net_trf.run_train(session, inputs, lengths, len(train_seqs)) # m.pre_train(sv, session, batch_size=20, max_epoch=10, lr=1e-3) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): # print(data.datas[0][0: 10]) # print(data.get_max_len()) # return config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_cnn_new/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # word-embedding # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # config.config_trf.update_embedding = False # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0) # write data data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device='/gpu:0') sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # m.pre_train(sv, session, batch_size=100, max_epoch=3, lr=1.) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): config = get_config() # config.auxiliary_shortlist = [4000, config.vocab_size] # config.sample_sub = 100 # config.multiple_trial = 10 name = create_name(config) logdir = wb.mkdir('./trf_rnn/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # write data data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device='/gpu:0', simulater_device='/gpu:0') sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m._global_step) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # write data data.write_vocab(logdir + '/vocab.txt') data.write_char_vocab(logdir + '/vocab_char.txt', logdir + '/vocab_w2c.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0']) sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print(m.get_log_probs(data.datas[1][1])) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(): nbest_cmp = task.NBestComputer() data = reader.Data().load_raw_data([task.train, task.valid, task.valid], add_beg_token='<s>', add_end_token='</s>') config = ngramlm.Config(data) config.res_file = 'results.txt' order_reg = [4, 5, 6] for order in order_reg: config.order = order config.cutoff = [0] * order workdir = wb.mkdir('ngramlm/' + str(config), is_recreate=False) sys.stdout = wb.std_log(workdir + '/ngram.log') print(workdir) m = ngramlm.Model(config, data, bindir, workdir) # train print('training...') m.train() # rescore print('rescoring...') time_beg = time.time() for nbest in nbest_cmp.nbests: nbest.lmscore = m.rescore(nbest.get_nbest_list(data)) # print(len(nbest.lmscore)) print('rescore time={:.2f}m'.format((time.time() - time_beg) / 60)) nbest_cmp.write_lmscore(workdir + '/model') # tune lm-scale print('computing wer...') nbest_cmp.cmp_wer() nbest_cmp.write_to_res(config.res_file, str(config))
def main(_): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', add_end_token='</s>', add_unknwon_token='<unk>') nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(nbest.nbest, is_nbest=True) print('nbest list info=', wb.TxtInfo(nbest.nbest)) config = trfnce.Config(data) config.structure_type = 'rnn' config.embedding_dim = 200 config.rnn_hidden_layers = 2 config.rnn_hidden_size = 200 config.batch_size = 20 config.noise_factor = 100 config.noise_sampler = 2 config.init_weight = 0.1 config.lr_param = trfbase.LearningRateTime(1e-3) config.max_epoch = 100 # config.dropout = 0.75 # config.init_zeta = config.get_initial_logz(20) config.update_zeta = False config.write_dbg = False config.pprint() name = create_name(config) logdir = 'trf_nce/' + name wb.mkdir(logdir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log')) print(logdir) data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') # wb.rmdir(logdirs) with tf.Graph().as_default(): m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0') # noise_lstm = lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:1') # m.lstm = noise_lstm sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'), global_step=m.train_net.global_step) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print('load lstmlm for noise generator') # noise_lstm.restore(session, # './lstm/' + run_lstmlm_withBegToken.create_name(noise_lstm.config) + '/model.ckpt') m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): config = trfnce.Config(data) config.structure_type = 'mix' config.embedding_dim = 128 config.cnn_filters = [(i, 128) for i in range(1, 5)] config.cnn_hidden = 128 config.cnn_layers = 1 config.cnn_skip_connection = False config.cnn_residual = True config.cnn_activation = 'relu' config.rnn_hidden_layers = 1 config.rnn_hidden_size = 128 config.attention = True config.batch_size = 100 config.noise_factor = 2 config.noise_sampler = 2 config.init_weight = 0.1 config.optimize_method = ['sgd', 'sgd'] config.lr_param = trfbase.LearningRateEpochDelay(1e-2, 0.5) config.lr_zeta = trfbase.LearningRateEpochDelay(1e-2, 0.5) config.max_epoch = 10 # config.dropout = 0.75 # config.init_zeta = config.get_initial_logz(0) config.update_zeta = True config.write_dbg = False config.pprint() q_config = run_lstmlm.small_config(data) # q_config = None name = create_name(config, q_config) logdir = 'trf_nce/' + name wb.mkdir(logdir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log')) print(logdir) data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') # wb.rmdir(logdirs) with tf.Graph().as_default(): if q_config is None: m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0') else: m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:1', q_model=lstmlm.LM(q_config, device='/gpu:1')) # noise_lstm = lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:1') # m.lstm = noise_lstm sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'), global_step=m.train_net.global_step) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) if m.q_model is not None: print('load lstmlm for q model') m.q_model.restore( session, './lstm/' + run_lstmlm.create_name(q_config) + '/model.ckpt') m.train( sv, session, print_per_epoch=0.1, operation=task.Ops(m), # nbest=nbest, # lmscale_vec=np.linspace(1, 20, 20) )
def main(_): data = reader.Data().load_raw_data(corpus.char_raw_dir(), add_beg_token='<s>', add_end_token='</s>', add_unknwon_token=None, max_length=1000) nbest = reader.NBest(*reader.wsj0_nbest()) print(nbest.wer()) config = trf.trfbase.Config(data) config.embedding_dim = 12 config.cnn_filters = [(i, 12) for i in range(1, 11)] config.cnn_layers = 3 config.cnn_hidden = 12 config.cnn_shared_over_layers = False config.cnn_residual = True config.cnn_skip_connection = True config.max_epoch = 1000 config.sample_sub = 100 config.jump_width = 10 config.init_weight = 0.1 config.opt_method = 'adam' config.lr_cnn = trf.trfbase.LearningRateTime(1, 1.5, tc=1e4) config.lr_zeta = trf.trfbase.LearningRateTime(1.0, 0.2) config.load_embedding_path = './embedding/ptb_{}x{}.emb'.format( config.vocab_size, config.embedding_dim) config.auxiliary_hidden = 12 config.auxiliary_lr = 1.0 name = create_name(config) logdir = name wb.mkdir(logdir, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') print(logdir) config.pprint() # prapare embedding if wb.is_linux() and config.load_embedding_path is not None or \ (config.feat_type_file and config.feat_cluster > 0): if config.load_embedding_path is None: fvectors = './embedding/ptb_{}x{}.emb'.format( config.vocab_size, config.embedding_dim) else: fvectors = config.load_embedding_path data.word2vec(fvectors, dim=config.embedding_dim, cnum=config.feat_cluster) else: config.load_embedding_path = None # write data data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') nbest_char_txt = logdir + '/nbest.char.txt' corpus.word_text_to_char_text(reader.wsj0_nbest()[0], nbest_char_txt, is_nbest=True) nbest_list = data.load_data(nbest_char_txt, is_nbest=False) data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device='/gpu:2', simulater_device='/gpu:1') sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m._global_step) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: # s = ['it was not black monday', 'we did n\'t even get a chance'] # eval_list = data.load_data([[data.beg_token_str] + w.split() + [data.end_token_str] for w in s]) # print(eval_list) # import sampling as sp # x_batch = [x for x in sp.SeqIter(3, config.vocab_size, # beg_token=config.beg_token, # end_token=config.end_token)] # logprobs = m.get_log_probs(x_batch, False) # logz = sp.log_sum(logprobs) # print(logprobs) # print(logz) m.train(session, sv, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): config = trfnce.Config(data) config.structure_type = 'cnn' config.embedding_dim = 200 config.cnn_filters = [(i, 100) for i in range(1, 11)] config.cnn_width = 3 config.cnn_layers = 3 config.cnn_hidden = 200 config.rnn_hidden_layers = 2 config.rnn_hidden_size = 200 config.rnn_predict = True config.batch_size = 10 config.noise_factor = 10 config.noise_sampler = 'lstm:lstm/lstm_e200_h200x2/model.ckpt' config.init_weight = 0.1 config.optimize_method = ['adam', 'adam'] config.lr_param = trfbase.LearningRateEpochDelay(0.001) config.lr_zeta = trfbase.LearningRateEpochDelay(0.01) config.max_epoch = 100 # config.dropout = 0.75 # config.init_zeta = config.get_initial_logz(20) config.update_zeta = True config.write_dbg = False config.print() # q_config = run_lstmlm.small_config(data) q_config = None name = create_name(config, q_config) logdir = 'trf_nce/' + name wb.mkdir(logdir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log')) print(logdir) data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') # wb.rmdir(logdirs) with tf.Graph().as_default(): if q_config is None: m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0') else: m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0', q_model=lstmlm.LM(q_config, device='/gpu:0')) # s1 = trfnce.NoiseSamplerNgram(config, data, 2) # s2 = trfnce.NoiseSamplerLSTMEval(config, data, config.noise_sampler.split(':')[-1]) sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'), global_step=m.train_net.global_step) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: with session.as_default(): if m.q_model is not None: print('load lstmlm for q model') m.q_model.restore( session, './lstm/' + run_lstmlm.create_name(q_config) + '/model.ckpt') m.train( sv, session, print_per_epoch=0.1, operation=task.Ops(m), )