def main(_): config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=False) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # word-embedding # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # config.config_trf.update_embedding = False # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0) # write data data.write_vocab(logdir + '/vocab.txt') data.write_char_vocab(logdir + '/vocab_char.txt', logdir + '/vocab_w2c.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0']) sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print(m.get_log_probs(data.datas[1][1])) # train_seqs = data.datas[0][0: config.config_trf.train_batch_size] # sample_seqs = data.datas[0][0: config.config_trf.sample_batch_size] # inputs, lengths = reader.produce_data_to_trf(train_seqs + sample_seqs) # m.net_trf.run_train(session, inputs, lengths, len(train_seqs)) # m.pre_train(sv, session, batch_size=20, max_epoch=10, lr=1e-3) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): # print(data.datas[0][0: 10]) # print(data.get_max_len()) # return config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_cnn_new/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # word-embedding # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt') # config.config_trf.update_embedding = False # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0) # write data data.write_vocab(logdir + '/vocab.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device='/gpu:0') sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # m.pre_train(sv, session, batch_size=100, max_epoch=3, lr=1.) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)
def main(_): config = get_config() name = create_name(config) logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=True) sys.stdout = wb.std_log(logdir + '/trf.log') config.pprint() print(logdir) # write data data.write_vocab(logdir + '/vocab.txt') data.write_char_vocab(logdir + '/vocab_char.txt', logdir + '/vocab_w2c.txt') data.write_data(data.datas[0], logdir + '/train.id') data.write_data(data.datas[1], logdir + '/valid.id') data.write_data(data.datas[2], logdir + '/test.id') data.write_data(nbest_list, logdir + '/nbest.id') with tf.Graph().as_default(): m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0']) sv = tf.train.Supervisor(logdir=logdir + '/logs', summary_op=None, global_step=m.global_steps) # sv.summary_writer.add_graph(tf.get_default_graph()) # write the graph to logs session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True with sv.managed_session(config=session_config) as session: m.set_session(session) # print(m.get_log_probs(data.datas[1][1])) m.train(sv, session, print_per_epoch=0.1, nbest=nbest, nbest_list=nbest_list)