def dynamic_rnn(): # load data train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) embedding = tf.get_variable("embedding", [FLAGS.emb_size, FLAGS.word_dim], tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [None]) mask = tf.placeholder(tf.int32, [None]) output_keep_prob = tf.placeholder(tf.float32) # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(embedding, x_) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden, state_is_tuple=True, activation=tf.nn.relu) lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout) # Get lstm cell output outputs, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32) outputs = tf.reshape(outputs, [-1, FLAGS.n_hidden]) # define weights and biases of logistic layer with tf.variable_scope('linear'): weights = tf.get_variable("weight", [FLAGS.n_hidden, FLAGS.n_classes], tf.float32) biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32) logits = tf.matmul(outputs, weights) + biases #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits,y_) * tf.cast(mask,tf.float32)) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_)) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask correct = tf.reduce_sum(y) with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.initialize_all_variables()) FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(FLAGS.epoch_size * FLAGS.epoch_step): batch_x, batch_y, mask_feed = util.padding( *train_data.next_batch(FLAGS.batch_size)) sess.run(train_op, feed_dict={ x_: batch_x, y_: batch_y, output_keep_prob: 1 - FLAGS.dropout, mask: mask_feed }) if step % FLAGS.epoch_size == 0: evalution(sess, correct, x_, y_, mask, output_keep_prob, dev_data)
def train(): global max_f1 with tf.Session(config=util.gpu_config()) as session: session.run(tf.global_variables_initializer()) model.start_enqueue_thread(session) accumulated_loss = 0.0 ckpt = tf.train.get_checkpoint_state(log_dir) if ckpt and ckpt.model_checkpoint_path: print("Restoring from: {}".format(ckpt.model_checkpoint_path)) saver.restore(session, ckpt.model_checkpoint_path) initial_time = time.time() while True: tf_loss, tf_global_step, _ = session.run( [model.loss, model.global_step, model.train_op]) accumulated_loss += tf_loss if tf_global_step % report_frequency == 0: total_time = time.time() - initial_time steps_per_second = tf_global_step / total_time average_loss = accumulated_loss / report_frequency print("[{}] loss={:.2f}, steps/s={:.2f}".format( tf_global_step, average_loss, steps_per_second)) writer.add_summary( util.make_summary({"loss": average_loss}), tf_global_step) accumulated_loss = 0.0 if tf_global_step % save_frequency == 0: saver.save(session, os.path.join(log_dir, "model"), global_step=tf_global_step) if tf_global_step % eval_frequency == 0: eval_summary, eval_f1 = model.evaluate(session) if eval_f1 > max_f1: max_f1 = eval_f1 util.copy_checkpoint( os.path.join(log_dir, "model-{}".format(tf_global_step)), os.path.join(log_dir, "model.max.ckpt")) writer.add_summary(eval_summary, tf_global_step) writer.add_summary( util.make_summary({"max_eval_f1": max_f1}), tf_global_step) print("[{}] evaL_f1={:.2f}, max_f1={:.2f}".format( tf_global_step, eval_f1, max_f1))
def eval(model, vocab): datapath = model.hps.data_path.replace('train', 'test') data_loader = Batcher(vocab, datapath, args) assert 'eval' in model.hps.mode assert data_loader.single_pass acc_list = [] with tf.Session(config=util.gpu_config()) as sess: util.load_ckpt(model.hps, model.saver, sess) print("Running evaluation...\n") while True: print(len(acc_list), np.mean(acc_list)) batch = data_loader.next_batch() if batch == 'FINISH': break res = model.run_eval(batch, sess) acc = float(res['accuracy']) acc_list.append(acc) print("FINAL ACCURACY: {}".format( round(100 * sum(acc_list) / len(acc_list), 2)))
def bi_lstm(): tf.set_random_seed(1) # load data train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) with tf.device('/cpu:0'): embedding = tf.get_variable("embedding", [FLAGS.emb_size, FLAGS.word_dim], tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [None]) mask = tf.placeholder(tf.int32, [None]) output_keep_prob = tf.placeholder(tf.float32) seq_len = tf.placeholder(tf.int32, [None]) # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(embedding, x_) with tf.device('/gpu:2'): # lstm cell lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) # dropout lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=1 - FLAGS.dropout) lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=1 - FLAGS.dropout) # Get lstm cell output outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_cell_fw, lstm_cell_bw, x, sequence_length=seq_len, dtype=tf.float32) outputs = tf.concat(2, outputs) outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden]) # define weights and biases of logistic layer with tf.variable_scope('linear'): weights = tf.get_variable("weight", [2 * FLAGS.n_hidden, FLAGS.n_classes], tf.float32) biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32) logits = tf.matmul(outputs, weights) + biases loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_) * tf.cast(mask, tf.float32)) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask correct = tf.reduce_sum(y) with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.global_variables_initializer()) FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(FLAGS.epoch_size * FLAGS.epoch_step): batch_x, batch_y, mask_feed = util.padding( *train_data.next_batch(FLAGS.batch_size)) sequence_length = batch_x.shape[1] * np.ones( [FLAGS.batch_size], np.int32) sess.run(train_op, feed_dict={ x_: batch_x, y_: batch_y, output_keep_prob: 1 - FLAGS.dropout, mask: mask_feed, seq_len: sequence_length }) if step % 100 == 0: evalution(sess, correct, x_, y_, mask, output_keep_prob, seq_len, dev_data)
def train(model, vocab, pretrain_vardicts=None): print('train function called.') print(model.hps.data_path) devpath = model.hps.data_path.replace('train', 'dev') assert model.hps.data_path != devpath and os.path.exists( model.hps.data_path) and os.path.exists(devpath) train_data_loader = Batcher(vocab, model.hps.data_path, args) valid_data_loader = Batcher(vocab, devpath, args) print(train_data_loader.example_queue.qsize()) print(valid_data_loader.example_queue.qsize()) print(train_data_loader.batch_queue.qsize()) print(valid_data_loader.batch_queue.qsize()) with tf.Session(config=util.gpu_config()) as sess: train_logdir, dev_logdir = os.path.join(args.model_path, 'logdir/train'), os.path.join( args.model_path, 'logdir/dev') train_savedir = os.path.join(args.model_path, 'train/') print("[*] Train save directory is: {}".format(train_savedir)) if not os.path.exists(train_logdir): os.makedirs(train_logdir) if not os.path.exists(dev_logdir): os.makedirs(dev_logdir) if not os.path.exists(train_savedir): os.makedirs(train_savedir) summary_writer1 = tf.summary.FileWriter(train_logdir, sess.graph) summary_writer2 = tf.summary.FileWriter(dev_logdir, sess.graph) """ Initialize with pretrain variables """ if 'esim' not in model.hps.mode: assign_ops, uninitialized_varlist = util.assign_pretrain_weights( pretrain_vardicts) sess.run(assign_ops) sess.run(tf.initialize_variables(uninitialized_varlist)) else: sess.run(tf.global_variables_initializer()) print("Variable initialization end.") step = 0 while True: beg_time = time() batch = train_data_loader.next_batch() sample_per_epoch = 550153 if 'esim' in model.hps.mode else 1211 res = model.run_step(batch, sess, fcn_keeprate=model.hps.fcn_keep_rate, rnn_keeprate=model.hps.rnn_keep_rate, is_train=True) loss, summaries, step = res['loss'], res['summaries'], res[ 'global_step'] end_time = time() print("{} epoch, {} step, {}sec, {} loss".format( int(step * model.hps.batch_size / sample_per_epoch), step, round(end_time - beg_time, 3), round(float(loss), 3))) summary_writer1.add_summary(summaries, step) if step % 5 == 0: dev_batch = valid_data_loader.next_batch() res = model.run_step(dev_batch, sess, fcn_keeprate=-1, rnn_keeprate=-1, is_train=False) loss, summaries, step = res['loss'], res['summaries'], res[ 'global_step'] assert step % 5 == 0 print("[VALID] {} loss".format(round(loss, 3))) summary_writer2.add_summary(summaries, step) if step == 10 or step % 10000 == 0: model.saver.save(sess, train_savedir, global_step=step) if int(step * model.hps.batch_size / sample_per_epoch) > model.hps.max_epoch: model.saver.save(sess, train_savedir, global_step=step) print("training end") break
def bi_lstm_crf(): # load data print 'start read dataset' train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos')) dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos')) dev_data.fake_data(FLAGS.batch_size) print 'stop read dataset' tf.set_random_seed(1) # 词向量放到cpu里面可以节省显存 with tf.device('/cpu:0'): with tf.variable_scope('embedding') as scope: random_embedding = tf.get_variable( name="random_embedding", shape=[FLAGS.emb_size, FLAGS.word_dim], dtype=tf.float32) with tf.name_scope('placeholder'): x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) y_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None]) output_keep_prob = tf.placeholder(tf.float32) sequence_length = tf.reduce_sum(tf.sign(x_), reduction_indices=1) sequence_length = tf.cast(sequence_length, tf.int32) with tf.device('/gpu:2'): with tf.variable_scope('input_layer'): # x:[batch_size,n_steps,n_input] x = tf.nn.embedding_lookup(random_embedding, x_) # lstm cell with tf.name_scope('bi_lstm_layer'): lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden) # Get lstm cell output outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell_fw, cell_bw=lstm_cell_bw, inputs=x, sequence_length=sequence_length, dtype=tf.float32) outputs = tf.concat(2, outputs) outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden]) outputs = tf.nn.dropout(outputs, keep_prob=output_keep_prob) with tf.variable_scope('Softmax'): weights = tf.get_variable( name="weights", shape=[2 * FLAGS.n_hidden, FLAGS.n_classes], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01)) biases = tf.get_variable(name="biases", shape=[FLAGS.n_classes], dtype=tf.float32) matricized_unary_scores = tf.matmul(outputs, weights) + biases unary_scores = tf.reshape(matricized_unary_scores, [FLAGS.batch_size, -1, FLAGS.n_classes]) log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood( unary_scores, y_, sequence_length) l2_loss = tf.nn.l2_loss(weights) * FLAGS.beta loss = tf.reduce_mean(-log_likelihood) + l2_loss train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) saver = tf.train.Saver() best_acc = 0 if FLAGS.is_training == 1: with tf.Session(config=util.gpu_config()) as sess: sess.run(tf.global_variables_initializer()) epoch_size = train_data.numbers() // FLAGS.batch_size for step in range(epoch_size * FLAGS.epoch_step): batch_x, batch_y, _ = util.padding( *train_data.next_batch(FLAGS.batch_size)) sess.run( [l2_loss, loss, train_op], feed_dict={ x_: batch_x, y_: batch_y.reshape([FLAGS.batch_size, -1]), output_keep_prob: 1 - FLAGS.dropout }) if step % 100 == 0: cur_acc = evalution(sess, transition_params, dev_data, x_, y_, output_keep_prob, unary_scores, sequence_length) if cur_acc > best_acc: best_acc = cur_acc #saver.save(sess,'best.model') print 'best_acc: ' + str(best_acc) else: pass
#!/usr/bin/env python from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tensorflow as tf import coref_model as cm import util import time if __name__ == "__main__": config = util.initialize_from_env() model = cm.CorefModel(config) while True: try: with tf.Session(config=util.gpu_config()) as session: model.restore(session) model.evaluate(session, official_stdout=True) except tf.errors.ResourceExhaustedError: print("OOM") time.sleep(3) continue break