def dev_step(x_batch1, x_batch2, y_batch): total_loss = [] step = 0 test_batch = datahelper.batch_iter( list(zip(x_batch1, x_batch2, y_batch)), FLAGS.batch_size, 1) for test_data in test_batch: x_dev_batch1, x_dev_batch2, y_dev_batch = zip(*test_data) weights_d = [] for i in range(len(y_dev_batch)): weights_d.append(1.0) feed_dict = { rnn.is_training: False, rnn.input_x1: x_dev_batch1, rnn.input_x2: x_dev_batch2, rnn.input_y: y_dev_batch, rnn.dropout_keep_prob: 1.0, rnn.weights: weights_d } summaries, step, loss = sess.run( [dev_summary_op, rnn.global_step, rnn.loss], feed_dict) total_loss.append(loss) loss = np.mean(total_loss, axis=0) time_str = datetime.datetime.now().isoformat() print("Test: {}:step {}, loss: {:g}".format( time_str, step, loss)) log_write.write("Test: {}: step {}, loss {:g}.\n".format( time_str, step, loss)) log_write.flush() dev_summary_writer.add_summary(summaries, step)
def train(x_train1, x_dev1, x_train2, x_dev2, y_train, y_dev, word_embedding, max_len, vocab_size): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = MV_RNN(FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, max_len, FLAGS.hidden_size, vocab_size, FLAGS.embedding_dim) learning_rate = tf.train.exponential_decay(rnn.learning_rate, rnn.global_step, rnn.decay_steps, rnn.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(rnn.loss_val) grads_and_vars_clip = [(tf.clip_by_value(grad, -5, 5), var) for grad, var in grads_and_vars] train_op = optimizer.apply_gradients(grads_and_vars_clip, global_step=rnn.global_step) embedding = tf.constant(word_embedding, tf.float32) t_assign_embedding = tf.assign(rnn.Embedding, embedding) sess.run(t_assign_embedding) # Keep track of gradient values and sparity grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) loss_summary = tf.summary.scalar("loss", rnn.loss_val) train_summary_op = tf.summary.merge( [loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) dev_summary_op = tf.summary.merge([loss_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) checkpoint_dir = os.path.abspath( os.path.join(os.path.curdir, "checkpoint", timestamp + "_cnn")) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) log_file = checkpoint_dir + "\\log.txt" log_write = open(log_file, 'w', encoding='utf-8') checkpoint_prefix = os.path.join(checkpoint_dir, "model") saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints) def train_step(x_batch1, x_batch2, y_batch): weights_t = [] for i in range(len(y_batch)): if y_batch[i] == 1: weights_t.append(1.0) else: weights_t.append(1) feed_dict = { rnn.is_training: True, rnn.input_x1: x_batch1, rnn.input_x2: x_batch2, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob, rnn.weights: weights_t } summaries, _, step, loss = sess.run( [train_summary_op, train_op, rnn.global_step, rnn.loss], feed_dict) time_str = datetime.datetime.now().isoformat() print("Train: {}: step {}, loss {:g}. ".format( time_str, step, loss)) tmp = "Train: {}: step {}, loss {:g}.\n".format( time_str, step, loss) log_write.write(tmp) log_write.flush() train_summary_writer.add_summary(summaries, step) def dev_step(x_batch1, x_batch2, y_batch): total_loss = [] step = 0 test_batch = datahelper.batch_iter( list(zip(x_batch1, x_batch2, y_batch)), FLAGS.batch_size, 1) for test_data in test_batch: x_dev_batch1, x_dev_batch2, y_dev_batch = zip(*test_data) weights_d = [] for i in range(len(y_dev_batch)): weights_d.append(1.0) feed_dict = { rnn.is_training: False, rnn.input_x1: x_dev_batch1, rnn.input_x2: x_dev_batch2, rnn.input_y: y_dev_batch, rnn.dropout_keep_prob: 1.0, rnn.weights: weights_d } summaries, step, loss = sess.run( [dev_summary_op, rnn.global_step, rnn.loss], feed_dict) total_loss.append(loss) loss = np.mean(total_loss, axis=0) time_str = datetime.datetime.now().isoformat() print("Test: {}:step {}, loss: {:g}".format( time_str, step, loss)) log_write.write("Test: {}: step {}, loss {:g}.\n".format( time_str, step, loss)) log_write.flush() dev_summary_writer.add_summary(summaries, step) batches = datahelper.batch_iter( list(zip(x_train1, x_train2, y_train)), FLAGS.batch_size, FLAGS.num_epochs) for batch in batches: x_batch1, x_batch2, y_batch = zip(*batch) train_step(x_batch1, x_batch2, y_batch) current_step = tf.train.global_step(sess, rnn.global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev1, x_dev2, y_dev) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) log_write.close()