def main(_): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token=None, add_end_token='</s>', add_unknwon_token='<unk>') nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(reader.wsj0_nbest()[0], is_nbest=True) config = small_config(data) # config = medium_config(data) # config = large_config(data) work_dir = './lstm/' + create_name(config) wb.mkdir(work_dir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(work_dir, 'lstm.log')) print(work_dir) wb.pprint_dict(config.__dict__) data.write_vocab(work_dir + '/vocab.txt') data.write_data(data.datas[0], work_dir + '/train.id') data.write_data(data.datas[1], work_dir + '/valid.id') data.write_data(data.datas[2], work_dir + '/test.id') data.write_data(nbest_list, work_dir + '/nbest.id') write_model = os.path.join(work_dir, 'model.ckpt') with tf.Graph().as_default(): # lm = lstmlm.FastLM(config, device_list=['/gpu:0', '/gpu:0']) lm = blocklm.LM(config, device='/gpu:0') param_num = tf.add_n([tf.size(v) for v in tf.trainable_variables()]) for v in lm.train_net.variables: print(v.name) save = tf.train.Saver() # used to write ppl on valid/test set summ_bank = blocklm.layers.SummaryScalarBank(['ppl_valid', 'ppl_test']) summ_var = blocklm.layers.SummaryVariables() sv = tf.train.Supervisor(logdir=os.path.join(work_dir, 'logs'), summary_op=None, global_step=lm.global_step()) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: print('param_num={:,}'.format(session.run(param_num))) lm.train(sv, session, data.datas[0], data.datas[1], data.datas[2]) save.save(session, write_model)
import tensorflow as tf import sys import os import numpy as np import time from model import reader from model import trfnnbase as trf from model import wblib as wb # [data] data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', add_end_token='</s>') data.build_char_vocab(add_beg_end_tokens=True) # bulid char vocabulary nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(nbest.nbest, is_nbest=True) def create_name(config): return 'trf_' + str(config.config_trf) + '_maxlen{}'.format(config.max_len) def get_config(): config = trf.Config(data, 'rnn_char') config.jump_width = 2 config.chain_num = 10 config.batch_size = 100 config.lr_cnn = trf.trfbase.LearningRateTime(beta=1.0, tc=1e4) config.lr_zeta = trf.trfbase.LearningRateTime(1.0, 0.2) config.max_epoch = 1000
def main(_): # [data] data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token=None, add_end_token='</s>') config = blocklm.Config() config.vocab_size = data.get_vocab_size() config.block_size = 5 config.hidden_layers = 1 m = lstmlm.LM(config) m2 = blocklm.LM(config) for v in m2.train_net.variables: print(v.name) wb.pprint_dict(config.__dict__) recoder = wb.clock() sv = tf.train.Supervisor() session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: batch = m.sample_net.config.batch_size length = 1000 initial_seqs = np.random.choice(config.vocab_size, size=(batch, 1)) with recoder.recode('sample_block'): final_saqs = m2.simulate(session, initial_seqs, length, True) # for i in range(length // config.block_size): # append_seqs, _ = m2.sample_net.run_predict(session, initial_seqs[:, -1:], m2.sample_net.draw) with recoder.recode('sample_lstm'): final_saqs = m.simulate(session, initial_seqs, length, True) # for i in range(length): # append_seqs, _ = m.sample_net.run_predict(session, initial_seqs[:, -1:], m.sample_net.draw) # with recoder.recode('sample'): # m.sample_net.set_zero_state(session) # for i in range(length): # append_seqs, _ = m.sample_net.run_predict(session, initial_seqs[:, -1:], m.sample_net.draw) # initial_seqs = np.concatenate([initial_seqs, append_seqs], axis=-1) # # print(initial_seqs) # # with recoder.recode('probs'): # m.sample_net.set_zero_state(session) # for i in range(length): # probs = m.sample_net.run_predict(session, initial_seqs[:, i:i+1], [m.sample_net.softmax.probs]) # # print(initial_seqs) # # with recoder.recode('condition'): # m.sample_net.set_zero_state(session) # for i in range(length): # m.sample_net.run(session, initial_seqs[:, i:i+1], initial_seqs[:, i+1:i+2], [m.sample_net.cost]) # # print(initial_seqs) for key, t in sorted(recoder.items(), key=lambda x:x[0]): print('{}={:.2f}'.format(key, t * 60))
def main(_): data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', add_end_token='</s>', add_unknwon_token='<unk>') nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(nbest.nbest, is_nbest=True) print('nbest list info=', wb.TxtInfo(nbest.nbest)) config = trfnce.Config(data) config.structure_type = 'rnn' config.embedding_dim = 200 config.rnn_hidden_layers = 2 config.rnn_hidden_size = 200 config.batch_size = 20 config.noise_factor = 100 config.noise_sampler = 2 config.init_weight = 0.1 config.lr_param = trfbase.LearningRateTime(1e-3) config.max_epoch = 100 # config.dropout = 0.75 # config.init_zeta = config.get_initial_logz(20) config.update_zeta = False config.write_dbg = False config.pprint() name = create_name(config) logdir = 'trf_nce/' + name wb.mkdir(logdir, is_recreate=True) sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log')) print(logdir) data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') # wb.rmdir(logdirs) with tf.Graph().as_default(): m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0') # noise_lstm = lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:1') # m.lstm = noise_lstm sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'), global_step=m.train_net.global_step) sv.summary_writer.add_graph( tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print('load lstmlm for noise generator') # noise_lstm.restore(session, # './lstm/' + run_lstmlm_withBegToken.create_name(noise_lstm.config) + '/model.ckpt') m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
import tensorflow as tf import sys import os import numpy as np import time from model import reader from model import trfnnbase as trf from model import wblib as wb # [data] data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', add_end_token='</s>') # data.cut_train_to_length(50) nbest = reader.NBest(*reader.wsj0_nbest()) nbest_list = data.load_data(nbest.nbest, is_nbest=True) def create_name(config): return 'trf_' + str(config.config_trf) + '_maxlen{}'.format(config.max_len) def get_config(): config = trf.Config(data, 'cnn') config.jump_width = 2 config.chain_num = 20 config.batch_size = 100 config.lr_cnn = trf.trfbase.LearningRateTime(beta=1., tc=1e4) config.lr_zeta = trf.trfbase.LearningRateTime(1.0, 0.2) config_trf = config.config_trf config_trf.opt_method = 'adam'