def train(x_train, y_train, vocab_processor, x_dev, y_dev): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dim, filter_sizes=list(map(int, filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.drop_out_keep: dropout_keep_prob } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) def dev_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.drop_out_keep: 1.0 } step, loss, accuracy = sess.run( [global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # mini_batch batches = data_helpers.batch_iter(list(zip(x_train, y_train)), batch_size, num_epochs) for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) # 每100次使用以此验证集 if current_step % 100 == 0: print("\nEvaluation:") dev_step(x_dev, y_dev) print("")
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) print("Sequnence Length: {:d}".format(sequence_length)) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=sequence_length, num_classes=2, vocab_size=len(vocabulary), embedding_size=FLAGS.embedding_dim, filter_sizes=map(int, FLAGS.filter_sizes.split(",")), num_filters=map(int, FLAGS.num_filters.split(",")), l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-4) grads_and_vars = optimizer.compute_gradients(cnn.loss, aggregation_method=2) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars:
# Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.8)) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=FLAGS.max_sequence_length,##cnt the word num of the sentence num_classes=y_train_shuffled.shape[1], vocab_size=len(vocabulary), embedding_size=embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda ) if FLAGS.use_pretrain: cnn.assign_embedding(sess, pretrained_embedding) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) #record the global step optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) ##calculate gradient train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step,name="train_op") # Output directory for models and summaries
# TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(0.1 * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension, filter_sizes=list( map(int, params['filter_sizes'].split(","))), num_filters=params['num_filters'], l2_reg_lambda=params['l2_reg_lambda']) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None:
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) print("Sequnence Length: {:d}".format(sequence_length)) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], word_size=FLAGS.word2vec_size, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None:
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # ================================================== ste = [] lo = [] with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) return loss # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") a = dev_step(x_dev, y_dev, writer=dev_summary_writer) ste.append(current_step) lo.append(loss) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) plt.plot(ste, lo) plt.xlabel("step") plt.ylabel("loss") plt.title('Loss Training_process') plt.savefig('loss.png')
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( title_length=FLAGS.title_length, sequence_length=FLAGS.window_length, num_classes=y_train.shape[1], dictionary=dictionary, embedding_size=FLAGS.embedding_dim, num_convlayers=FLAGS.num_convlayers, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.placeholder(tf.float32, shape=[]) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional)
# ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( input_length=x_train.shape[1], transitions_number=y_train.shape[1], words_vocab_size=data['words_vocab_size'], tags_vocab_size=data['tags_vocab_size'], labels_vocab_size=data['labels_vocab_size'], embedding_size=FLAGS.embedding_dim, input_cnt_words=data['input_cnt_words'], input_cnt_tags=data['input_cnt_tags'], input_cnt_labels=data['input_cnt_labels'], l2_reg_lambda=FLAGS.l2_reg_lambda, hidden_layer_size=64, ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional)
def train(x_train, y_train, vocab_processor, x_dev, y_dev, is_baseline, checkpoint_root): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): with tf.device('/device:GPU:0'): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) # learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, # 1000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) # 1e-3 grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries # timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", checkpoint_root)) #timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter( dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) extract_dict(vocab_processor, out_dir) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) # L2 norm constraint (too slow) https://github.com/dennybritz/cnn-text-classification-tf/issues/88 # sess.run(cnn.output_W.assign(tf.clip_by_norm(cnn.output_W, 1.0))) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) return loss, accuracy # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... best_metric_perf = 0.0 # dev_accuracy cur_metric_perf = 0.0 for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") _, cur_metric_perf = dev_step( x_dev, y_dev, writer=dev_summary_writer) print("") if cur_metric_perf > best_metric_perf: best_metric_perf = cur_metric_perf else: continue if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train, x_real_len_dev, sorted_label, max_document_length): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if FLAGS.model_type == "cnnrnn": obj = TextCNNRNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnncnn": obj = TextRNNCNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnnandcnn": obj = TextRNNandCNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnn": obj = TextRNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda) else: obj = TextCNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads_and_vars = optimizer.compute_gradients(obj.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", FLAGS.model_version)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", obj.loss) acc_summary = tf.summary.scalar("accuracy", obj.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) # resotre domain model variable domain_out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs_base", FLAGS.domain_model_version)) domain_checkpoint_dir = os.path.abspath(os.path.join(domain_out_dir, "checkpoints")) #last fully connection layer variable will not be restored because different task has different class_num domain_not_restore_var = [u'output'] domain_restore_var = [v for v in tf.global_variables() if v.name.split('/')[0] not in domain_not_restore_var] restored_saver = tf.train.Saver(domain_restore_var) ckpt = tf.train.get_checkpoint_state(domain_checkpoint_dir) if ckpt: print ("Reading model parameters from %s" % ckpt.model_checkpoint_path) restored_saver.restore(sess, ckpt.model_checkpoint_path) else: print("Writing to {}\n".format(out_dir)) # Save train params since eval.py needs them trained_dir = os.path.abspath(os.path.join(out_dir, "trained_results")) if not os.path.exists(trained_dir): os.makedirs(trained_dir) with open(trained_dir + '/sorted_label.json', 'w') as outfile: json.dump(sorted_label, outfile, indent=4, ensure_ascii=False) with open(trained_dir + '/train_params.json', 'w') as outfile: json.dump({"max_document_length":max_document_length}, outfile, indent=4, ensure_ascii=False) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) def train_step(x_batch, y_batch, x_real_len_batch): """ A single training step """ if FLAGS.model_type == "cnn": feed_dict = { obj.input_x: x_batch, obj.input_y: y_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.is_training: True } else: feed_dict = { obj.input_x: x_batch, obj.input_y: y_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.real_len: x_real_len_batch } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, obj.loss, obj.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def overfit(dev_loss,eva_num=3): n = len(dev_loss) if n < eva_num: return False for i in xrange(n-eva_num+1, n): if dev_loss[i] > dev_loss[i-1]: return False return True def dev_step(x_batch, y_batch, x_real_len_batch, writer=None): """ Evaluates model on a dev set """ dev_batches = data_helpers.batch_iter(list(zip(x_batch, y_batch, x_real_len_batch)), FLAGS.batch_size, 1, shuffle=False) all_pred = [] correct_total_num = 0 for batch in dev_batches: x_dev_batch, y_dev_batch, x_real_len_dev_batch = zip(*batch) if FLAGS.model_type == "cnn": feed_dict = { obj.input_x: x_dev_batch, obj.input_y: y_dev_batch, obj.dropout_keep_prob: 1.0, obj.is_training: False } else: feed_dict = { obj.input_x: x_dev_batch, obj.input_y: y_dev_batch, obj.dropout_keep_prob: 1.0, obj.real_len: x_real_len_dev_batch } step, summaries, pred, correct_pred_num = sess.run( [global_step, dev_summary_op, obj.predictions, obj.correct_pred_num], feed_dict) all_pred = np.concatenate([all_pred, pred]) correct_total_num += correct_pred_num if writer: writer.add_summary(summaries, step) dev_acc = 1.0 * correct_total_num / len(y_batch) print("right_sample {}, dev_sample {}, dev_acc {:g}".format(correct_total_num, len(y_batch), dev_acc)) return dev_acc # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train, x_real_len_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... dev_acc = [] for batch in batches: x_batch, y_batch, x_real_len_batch = zip(*batch) train_step(x_batch, y_batch, x_real_len_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:", current_step) cur_acc = dev_step(x_dev, y_dev, x_real_len_dev, writer=dev_summary_writer) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) dev_acc.append(cur_acc) if overfit(dev_acc): print("current accuracy drop and stop train..\n") sys.exit(0) print("")
def train_model(allow_save_model = True, print_intermediate_results = True,d_lossweight= None, pname=None): if d_lossweight == None: print("The End!!") a=0 b=0 print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparatopn # ================================================== # Load data # Load data print("Loading data...") # load each source project source_text = np.array([]) source_y0 = np.array([]) source_file_path = "./within_project/" + pname + '/train/' source_files = list() for class_name in class_names: source_files.append(source_file_path + class_name) tmp_text, tmp_y = data_helpers.load_data_and_labels(source_files) print(pname + ": " + str(len(tmp_text)) + " sentences") source_text = np.concatenate([source_text, tmp_text], 0) if len(source_y0) == 0: source_y0 = np.array(tmp_y) else: source_y0 = np.concatenate([source_y0, tmp_y], 0) # Build max_document_length = min(100, max( [len(x.split(" ")) for x in source_text])) # important here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) source_x0 = np.array(list(vocab_processor.fit_transform(source_text))) # target_x = np.array(list(vocab_processor.fit_transform(target_text))) # =========================================================================print vocab============================================ np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(source_y0))) x_shuffled = source_x0[shuffle_indices] y_shuffled = source_y0[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(source_y0))) source_x, target_x = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] source_y, target_y = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(source_y), len(target_y))) #=========================================================================print vocab============================================ #==================================================================================================================================== pro=[] num=[] if print_intermediate_results: print('data distribution in source dataset') pro, num=sa.print_data_distribution(source_y, class_names) if pro[0] == "nonSATD": a = num[0] if pro[1] == "SATD": b = num[1] print('data distribution in target dataset') sa.print_data_distribution(target_y, class_names) print("Max Document Length: {:d}".format(max_document_length)) print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Test size: {:d}/{:d}".format(len(source_y), len(target_y))) # Training # ================================================== min_loss = 100000000 max_f1 = 0.0 predictions_at_min_loss = None steps_per_epoch = (int)(len(source_y) / FLAGS.batch_size) + 1 with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( a0=a, b0=b, d_lossweight=a/b, sequence_length=max_document_length, num_classes=source_y.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.polynomial_decay(2*1e-3, global_step, steps_per_epoch * FLAGS.num_epochs, 1e-4, power=1) optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) if allow_save_model: print("!!!!!!os.path") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs-RQ1",pname )) print("Writing to {}\n".format(out_dir)) # Summaries for loss ,f1, auc loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) precision_summary = tf.summary.scalar("precision", cnn.precision) recall_summary = tf.summary.scalar("recall",cnn.recall) f1_summary = tf.summary.scalar("f1", cnn.f1) auc_summary = tf.summary.scalar("auc", cnn.auc) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary,precision_summary,recall_summary,f1_summary,auc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, 'summaries','train') train_summary_writer = tf.summary.FileWriter(train_summary_dir) train_summary_writer.add_graph(sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary, auc_summary,precision_summary,recall_summary,f1_summary,grad_summaries_merged]) dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter(dev_summary_dir) dev_summary_writer.add_graph(sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir_name = "checkpoint-"+pname checkpoint_dir = os.path.abspath(os.path.join(out_dir, checkpoint_dir_name)) # checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) vocab_dir_name=os.path.join(checkpoint_dir,"vocab") # Write vocabulary vocab_processor.save(vocab_dir_name) # Initialize all variables sess.run(tf.global_variables_initializer(), feed_dict={cnn.phase_train: True}) sess.run(tf.local_variables_initializer())# this is for version r0.12 def train_step(x_batch, y_batch): sess.run(tf.local_variables_initializer()) """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, cnn.phase_train: True } _, step, summaries, loss, mean_loss, l2_loss, accuracy,precision, recall, f1, auc = sess.run( [train_op,global_step, train_summary_op,cnn.loss, cnn.mean_loss, cnn.l2_loss, cnn.accuracy,cnn.precision, cnn.recall,cnn.f1,cnn.auc], feed_dict) time_str = datetime.datetime.now().isoformat() train_summary_writer.add_summary(summaries, step) return accuracy, precision, recall, f1, auc def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0, cnn.phase_train: False } summaries, step,loss, mean_loss, l2_loss,accuracy, precision, recall, f1, auc, predictions = sess.run( [ dev_summary_op,global_step,cnn.loss, cnn.mean_loss, cnn.l2_loss, cnn.accuracy,cnn.precision, cnn.recall,cnn.f1,cnn.auc,cnn.predictions], feed_dict) time_str = datetime.datetime.now().isoformat() if print_intermediate_results: print("{}: epoch {}, step {}, loss {:g}, acc {:g}, percision {:g}, recall{:g}, f1{:g}, auc {:g}, mean_loss {}, l2_loss {}".format( time_str, step/steps_per_epoch, step, loss, accuracy, precision, recall, f1, auc, mean_loss, l2_loss)) tp, fp, fn, tn,precision, recall, f1, auc2= sa.calculate_IR_metrics(y_batch, predictions, class_names,None) print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^") print(precision[1],recall[1],f1[1], auc2[1]) if writer !=None: print("devWrite!!!!") writer.add_summary(summaries, step) return accuracy, precision, recall, f1, auc, loss, predictions # Generate batches batches = data_helpers.batch_iter( list(zip(source_x, source_y)), FLAGS.batch_size, FLAGS.num_epochs) for batch in batches: x_batch, y_batch = zip(*batch) train_accuracy,train_precision,train_recall,train_f1,train_auc = train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) current_epoch = current_step/steps_per_epoch if current_step%steps_per_epoch==0 and current_epoch % FLAGS.evaluate_every == 0: if print_intermediate_results: print("Current train accuracy: %s" % (train_accuracy)) print("Current train precision: %s" % (train_precision)) print("Current train recall: %s" % (train_recall)) print("Current train f1: %s" % (train_f1)) print("Current train auc: %s" % (train_auc)) print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") fold_accuracy, precision, recall, f1, auc, loss, predictions = dev_step(target_x, target_y, writer=dev_summary_writer) tp, fp, fn, tn, precision, recall, f1, auc2= sa.calculate_IR_metrics( target_y, predictions, class_names,None) for i in range(len(class_names)): print(class_names[i], precision[i], recall[i], f1[i], auc2[i]) if loss < min_loss: if f1[1]> max_f1: min_loss = loss max_f1=f1[1] predictions_at_min_loss = predictions if allow_save_model: save_path = saver.save(sess, checkpoint_dir, global_step=current_step) if print_intermediate_results: print("Model saved in file: %s" % save_path) # Final result output_file = open(source_file_path + 'fp_sentences1', 'a',encoding='utf-8') print('Final result:') fold_accuracy, precision, recall, f1, auc, loss, predictions = dev_step(target_x, target_y) print("ACC: %s" % (fold_accuracy)) print( precision, recall, f1, auc) tp, fp, fn, tn, precision, recall, f1 , auc2= sa.calculate_IR_metrics( target_y, predictions, class_names, output_file) for i in range(len(class_names)): print (class_names[i], precision[i], recall[i], f1[i], auc2[1]) return min_loss, predictions_at_min_loss, target_y
def main(unused_argv): if FLAGS.job_name is None or FLAGS.job_name == '': raise ValueError('Must specify an explicit job_name !') else: print('job_name : ' + FLAGS.job_name) if FLAGS.task_index is None or FLAGS.task_index == '': raise ValueError('Must specify an explicit task_index!') else: print('task_index : ' + str(FLAGS.task_index)) ps_spec = FLAGS.ps_hosts.split(',') worker_spec = FLAGS.worker_hosts.split(',') num_worker = len(worker_spec) print("Number of worker = " + str(num_worker)) print("ps_spec = ") print(*ps_spec) print("worker_spec = ") print(*worker_spec) cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec}) print("After defining Cluster") print("Job name = " + FLAGS.job_name) print("task index = " + str(FLAGS.task_index)) # try: server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) print("After defining server") if FLAGS.job_name == 'ps': print("Parameter Server is executed") server.join() elif FLAGS.job_name == "worker": print("Parameter Server is executed") with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): is_chief = (FLAGS.task_index == 0) # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y_label = data_helpers.load_data_and_labels( FLAGS.data_file) # Build vocabulary max_document_length = max([len(x.split(" ")) for x in x_text]) vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) y = np.array(y_label) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) print(type(x), type(y)) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int( FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] print(y_train.shape) print("Vocabulary Size: {:d}".format( len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== tf.MaxAcc = 0.1 def copymax(path): shutil.copy(path, "{}.backup".format(path)) cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = FLAGS.out_dir print("Writing to {}\n".format(out_dir)) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") MaxAcc_prefi = os.path.join(checkpoint_dir, "MAXACCmodel") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) init_op = tf.global_variables_initializer() sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir=out_dir, init_op=init_op, saver=saver, global_step=global_step) sess = sv.prepare_or_wait_for_session(server.target, config=session_conf) # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) _, current_step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict={ cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob }) time_str = datetime.datetime.now().isoformat() if current_step % 100 == 0: print("{}: step {}, loss {:g}, acc {:g}".format( time_str, current_step, loss, accuracy)) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") loss, accuracy = sess.run( [cnn.loss, cnn.accuracy], feed_dict={ cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 }) time_str = datetime.datetime.now().isoformat() result = "{}: step {}, loss {:g}, acc {:g}".format( time_str, current_step, loss, accuracy) print(result) with open(os.path.join(out_dir, "result"), 'a+') as f: f.write("{}\n".format(result)) if tf.MaxAcc < accuracy: tf.MaxAcc = accuracy ifsave = True else: ifsave = False print("Max acc:{}".format(tf.MaxAcc)) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) if ifsave: path = saver.save(sess, MaxAcc_prefi, None) copymax("{}.data-00000-of-00001".format(path)) copymax("{}.index".format(path)) copymax("{}.meta".format(path))
def train(): with tf.device('/cpu:0'): x_text, pos1, pos2, y = data_helpers.load_data_and_labels( FLAGS.train_dir) # Build vocabulary # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>." # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>'] # => # [27 39 40 41 42 1 43 0 0 ... 0] # dimension = FLAGS.max_sentence_length text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( FLAGS.max_sentence_length) text_vec = np.array(list(text_vocab_processor.fit_transform(x_text))) print("Text Vocabulary Size: {:d}".format( len(text_vocab_processor.vocabulary_))) # Example: pos1[3] = [-2 -1 0 1 2 3 4 999 999 999 ... 999] # [95 96 97 98 99 100 101 999 999 999 ... 999] # => # [11 12 13 14 15 16 21 17 17 17 ... 17] # dimension = MAX_SENTENCE_LENGTH pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( FLAGS.max_sentence_length) pos_vocab_processor.fit(pos1 + pos2) pos1_vec = np.array(list(pos_vocab_processor.transform(pos1))) pos2_vec = np.array(list(pos_vocab_processor.transform(pos2))) print("Position Vocabulary Size: {:d}".format( len(pos_vocab_processor.vocabulary_))) x = np.array([list(i) for i in zip(text_vec, pos1_vec, pos2_vec)]) print("x = {0}".format(x.shape)) print("y = {0}".format(y.shape)) print("") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] x_dev = np.array(x_dev).transpose((1, 0, 2)) y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=x_train.shape[2], num_classes=y_train.shape[1], text_vocab_size=len(text_vocab_processor.vocabulary_), text_embedding_size=FLAGS.text_embedding_dim, pos_vocab_size=len(pos_vocab_processor.vocabulary_), pos_embedding_size=FLAGS.position_embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cnn.loss, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary text_vocab_processor.save(os.path.join(out_dir, "text_vocab")) pos_vocab_processor.save(os.path.join(out_dir, "position_vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) # Pre-trained word2vec if FLAGS.word2vec: # initial matrix with random uniform initW = np.random.uniform( -0.25, 0.25, (len(text_vocab_processor.vocabulary_), FLAGS.text_embedding_dim)) # load any vectors from the word2vec print("Load word2vec file {0}".format(FLAGS.word2vec)) with open(FLAGS.word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = text_vocab_processor.vocabulary_.get(word) if idx != 0: initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) sess.run(cnn.W_text.assign(initW)) print("Success to load pre-trained word2vec model!\n") # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) x_batch = np.array(x_batch).transpose((1, 0, 2)) # Train feed_dict = { cnn.input_text: x_batch[0], cnn.input_pos1: x_batch[1], cnn.input_pos2: x_batch[2], cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) train_summary_writer.add_summary(summaries, step) # Training log display if step % FLAGS.display_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # Evaluation if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") feed_dict = { cnn.input_text: x_dev[0], cnn.input_pos1: x_dev[1], cnn.input_pos2: x_dev[2], cnn.input_y: y_dev, cnn.dropout_keep_prob: 1.0 } summaries, loss, accuracy, predictions = sess.run([ dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions ], feed_dict) dev_summary_writer.add_summary(summaries, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) print( "(2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n" .format( f1_score(np.argmax(y_dev, axis=1), predictions, labels=np.array(range(1, 19)), average="macro"))) # Model checkpoint if step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # 定义训练 # ================================================== with tf.Graph().as_default(): # 图Granph()设为默认,详见function.md session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, # 设置允许TensorFlow在指定设备不存在时自动调整设备,详见funciton.md log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # 运行TensorFLow操作(operations)的类,也可以使用上下文管理:with cnn = TextCNN( # 实例化TextCNN模型,建立模型,另见function.md # 句子包含的单词个数,统一句子长度 sequence_length=x_train.shape[1], # 分类个数,导入标签数 num_classes=y_train.shape[1], # 选取进行训练的总单词个数 vocab_size=len(vocab_processor.vocabulary_), # 嵌入层输出 embedding_size=FLAGS.embedding_dim, # 过滤器的行数对应卷积后所得矩阵的列数,由于卷积不同所得矩阵的列数也不同,所以各过滤器行数也不同 filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), # 过滤器个数 num_filters=FLAGS.num_filters, # l2惩罚项大小 l2_reg_lambda=FLAGS.l2_reg_lambda)filter_sizes # Define Training procedure 定义训练过程,编译模型 global_step = tf.Variable(0, name="global_step", trainable=False) # 统计训练次数 optimizer = tf.train.AdamOptimizer(1e-3) # 指定优化器为Adam grads_and_vars = optimizer.compute_gradients(cnn.loss) # 损失函数偏差与方差即被优化目标 train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # train_op是新建的操作,用来对参数做梯度更新,每一次运行train_op就是一次训练 # Keep track of gradient values and sparsity (optional) # 追踪并可视化训练和评估过程 grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries 汇总输出储存目录 timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy 对损失和准确度的汇总 loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries 训练的汇总 train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries 评估的汇总 dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it # 检查点: 存储模型参数 checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) # 检查点存储路径 猜想:out_dir/checkpoints checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary 存储词典 vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables 初始化所有参数 sess.run(tf.global_variables_initializer()) # 定义单独训练步 def train_step(x_batch, y_batch): """ A single training step """ # 数据将通过占位符节点送给神经网络,必须让所有节点都有值,否则TensorFlow又要报错 feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } # 通过session.run()运行train_op,返回值就是我们想我评估的操作结果。注意train_op本身没有返回值,它只是更新了网络参数。 _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) # 用与单独训练步相似的函数来评估任意数据集的损失和精度,比如验证集或整个训练集。本质上这个函数和之前的一样,但是没有训练操作,也禁用了dropout。 def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
# split it np.random.seed(12345) ind = np.random.permutation(x.shape[0]) shuffled_x = x[ind, :] shuffled_y = y[ind, :] idx = int(FLAGS.dev_sample_percentage * x.shape[0]) train_x, train_y = shuffled_x[idx:, :], shuffled_y[idx:, :] dev_x, dev_y = shuffled_x[:idx, :], shuffled_y[:idx, :] with tf.Session() as sess: cnn = TextCNN(max_doc_len, len(mb.classes_), FLAGS.embedding_dim, len(text_processor.vocabulary_), list(map(int, FLAGS.filter_sizes.split(','))), FLAGS.num_filters) # train operation global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-5) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # IO direction stuff timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) # summary writer train_summary_dir = os.path.join(out_dir, "summary/train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
from text_cnn import TextCNN from text_cnn import TextNN from text_cnn import plot_accuracy # from main import get_data import numpy as np NUM_FILTERS = 3 FILTER_SIZES = [2, 3, 5] MAX_WORD = 40 EMBEDDING_LENGTH = 300 NUM_CLASSES = 2 model = TextCNN(NUM_FILTERS, FILTER_SIZES, MAX_WORD, EMBEDDING_LENGTH, NUM_CLASSES) nn = TextNN(MAX_WORD, EMBEDDING_LENGTH, NUM_CLASSES) # pos_path = "./review_polarity/txt_sentoken/pos/*.txt" # neg_path = "./review_polarity/txt_sentoken/neg/*.txt" # pos_data_train, pos_data_test = get_data(pos_path) # neg_data_train, neg_data_test = get_data(neg_path) def read_data_from_csv(path): res = [] with open(path, 'rb') as f: review = [] for line in f: nums = line.split(',') newWord = [float(num) for num in nums] review.append(newWord) if len(review) == 40: res.append(review)
def train(): with tf.device('/cpu:0'): x_text, y, pos1, pos2 = data_helpers.load_data_and_labels( FLAGS.train_path) # Build vocabulary # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>." # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>'] # => # [27 39 40 41 42 1 43 0 0 ... 0] # dimension = FLAGS.max_sentence_length text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( FLAGS.max_sentence_length) x = np.array(list(text_vocab_processor.fit_transform(x_text))) print("Text Vocabulary Size: {:d}".format( len(text_vocab_processor.vocabulary_))) print("x = {0}".format(x.shape)) print("y = {0}".format(y.shape)) print("") # Example: pos1[3] = [-2 -1 0 1 2 3 4 999 999 999 ... 999] # [95 96 97 98 99 100 101 999 999 999 ... 999] # => # [11 12 13 14 15 16 21 17 17 17 ... 17] # dimension = MAX_SENTENCE_LENGTH pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( FLAGS.max_sentence_length) pos_vocab_processor.fit(pos1 + pos2) p1 = np.array(list(pos_vocab_processor.transform(pos1))) p2 = np.array(list(pos_vocab_processor.transform(pos2))) print("Position Vocabulary Size: {:d}".format( len(pos_vocab_processor.vocabulary_))) print("position_1 = {0}".format(p1.shape)) print("position_2 = {0}".format(p2.shape)) print("") # Randomly shuffle data to split into train and test(dev) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] p1_shuffled = p1[shuffle_indices] p2_shuffled = p2[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] p1_train, p1_dev = p1_shuffled[:dev_sample_index], p1_shuffled[ dev_sample_index:] p2_train, p2_dev = p2_shuffled[:dev_sample_index], p2_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], text_vocab_size=len(text_vocab_processor.vocabulary_), text_embedding_size=FLAGS.text_embedding_dim, pos_vocab_size=len(pos_vocab_processor.vocabulary_), pos_embedding_size=FLAGS.pos_embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate, FLAGS.decay_rate, 1e-6) gvs = optimizer.compute_gradients(cnn.loss) capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary text_vocab_processor.save(os.path.join(out_dir, "text_vocab")) pos_vocab_processor.save(os.path.join(out_dir, "pos_vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) # Pre-trained word2vec if FLAGS.embedding_path: pretrain_W = utils.load_word2vec(FLAGS.embedding_path, FLAGS.text_embedding_dim, text_vocab_processor) sess.run(cnn.W_text.assign(pretrain_W)) print("Success to load pre-trained word2vec model!\n") # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, p1_train, p2_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... best_f1 = 0.0 # For save checkpoint(model) for batch in batches: x_batch, p1_batch, p2_batch, y_batch = zip(*batch) # Train feed_dict = { cnn.input_text: x_batch, cnn.input_p1: p1_batch, cnn.input_p2: p2_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) train_summary_writer.add_summary(summaries, step) # Training log display if step % FLAGS.display_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # Evaluation if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") feed_dict = { cnn.input_text: x_dev, cnn.input_p1: p1_dev, cnn.input_p2: p2_dev, cnn.input_y: y_dev, cnn.dropout_keep_prob: 1.0 } summaries, loss, accuracy, predictions = sess.run([ dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions ], feed_dict) dev_summary_writer.add_summary(summaries, step) time_str = datetime.datetime.now().isoformat() f1 = f1_score(np.argmax(y_dev, axis=1), predictions, labels=np.array(range(1, 19)), average="macro") print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) print( "[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n" .format(f1)) # Model checkpoint if best_f1 < f1: best_f1 = f1 path = saver.save(sess, checkpoint_prefix + "-{:.3g}".format(best_f1), global_step=step) print("Saved model checkpoint to {}\n".format(path))
def train_cnn(): """Training CNN model.""" # Load sentences, labels, and training parameters logger.info('✔︎ Loading data...') logger.info('✔︎ Training data processing...') train_data = dh.load_data_and_labels(FLAGS.training_data_file, FLAGS.embedding_dim) logger.info('✔︎ Validation data processing...') validation_data = dh.load_data_and_labels(FLAGS.validation_data_file, FLAGS.embedding_dim) logger.info('Recommended padding Sequence length is: {0}'.format( FLAGS.pad_seq_len)) logger.info('✔︎ Training data padding...') x_train_front, x_train_behind, y_train = dh.pad_data( train_data, FLAGS.pad_seq_len) logger.info('✔︎ Validation data padding...') x_validation_front, x_validation_behind, y_validation = dh.pad_data( validation_data, FLAGS.pad_seq_len) # Build vocabulary VOCAB_SIZE = dh.load_vocab_size(FLAGS.embedding_dim) pretrained_word2vec_matrix = dh.load_word2vec_matrix( VOCAB_SIZE, FLAGS.embedding_dim) # Build a graph and cnn object with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=FLAGS.pad_seq_len, num_classes=y_train.shape[1], vocab_size=VOCAB_SIZE, fc_hidden_size=FLAGS.fc_hidden_size, embedding_size=FLAGS.embedding_dim, embedding_type=FLAGS.embedding_type, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, pretrained_embedding=pretrained_word2vec_matrix) # Define training procedure with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): learning_rate = tf.train.exponential_decay( learning_rate=FLAGS.learning_rate, global_step=cnn.global_step, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) grads, vars = zip(*optimizer.compute_gradients(cnn.loss)) grads, _ = tf.clip_by_global_norm(grads, clip_norm=FLAGS.norm_ratio) train_op = optimizer.apply_gradients( zip(grads, vars), global_step=cnn.global_step, name="train_op") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in zip(grads, vars): if g is not None: grad_hist_summary = tf.summary.histogram( "{0}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries if FLAGS.train_or_restore == 'R': MODEL = input( "☛ Please input the checkpoints model you want to restore, " "it should be like(1490175368): " ) # The model you want to restore while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( '✘ The format of your input is illegal, please re-input: ' ) logger.info( '✔︎ The format of your input is legal, now loading to next step...' ) checkpoint_dir = 'runs/' + MODEL + '/checkpoints/' out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", MODEL)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) else: timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Validation summaries validation_summary_op = tf.summary.merge( [loss_summary, acc_summary]) validation_summary_dir = os.path.join(out_dir, "summaries", "validation") validation_summary_writer = tf.summary.FileWriter( validation_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) if FLAGS.train_or_restore == 'R': # Load cnn model logger.info("✔ Loading model...") checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) logger.info(checkpoint_file) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) else: checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Embedding visualization config config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = 'embedding' embedding_conf.metadata_path = FLAGS.metadata_file projector.visualize_embeddings(train_summary_writer, config) projector.visualize_embeddings(validation_summary_writer, config) # Save the embedding visualization saver.save( sess, os.path.join(out_dir, 'embedding', 'embedding.ckpt')) current_step = sess.run(cnn.global_step) def train_step(x_batch_front, x_batch_behind, y_batch): """A single training step""" feed_dict = { cnn.input_x_front: x_batch_front, cnn.input_x_behind: x_batch_behind, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, cnn.is_training: True } _, step, summaries, loss, accuracy = sess.run([ train_op, cnn.global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) logger.info("step {0}: loss {1:g}, acc {2:g}".format( step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def validation_step(x_batch_front, x_batch_behind, y_batch, writer=None): """Evaluates model on a validation set""" feed_dict = { cnn.input_x_front: x_batch_front, cnn.input_x_behind: x_batch_behind, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0, cnn.is_training: False } step, summaries, loss, accuracy, recall, precision, f1, auc = sess.run( [ cnn.global_step, validation_summary_op, cnn.loss, cnn.accuracy, cnn.recall, cnn.precision, cnn.F1, cnn.AUC ], feed_dict) logger.info( "step {0}: loss {1:g}, acc {2:g}, recall {3:g}, precision {4:g}, f1 {5:g}, AUC {6}" .format(step, loss, accuracy, recall, precision, f1, auc)) if writer: writer.add_summary(summaries, step) # Generate batches batches = dh.batch_iter( list(zip(x_train_front, x_train_behind, y_train)), FLAGS.batch_size, FLAGS.num_epochs) num_batches_per_epoch = int( (len(x_train_front) - 1) / FLAGS.batch_size) + 1 # Training loop. For each batch... for batch in batches: x_batch_front, x_batch_behind, y_batch = zip(*batch) train_step(x_batch_front, x_batch_behind, y_batch) current_step = tf.train.global_step(sess, cnn.global_step) if current_step % FLAGS.evaluate_every == 0: logger.info("\nEvaluation:") validation_step(x_validation_front, x_validation_behind, y_validation, writer=validation_summary_writer) if current_step % FLAGS.checkpoint_every == 0: checkpoint_prefix = os.path.join(checkpoint_dir, "model") path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.info( "✔︎ Saved model checkpoint to {0}\n".format(path)) if current_step % num_batches_per_epoch == 0: current_epoch = current_step // num_batches_per_epoch logger.info( "✔︎ Epoch {0} has finished!".format(current_epoch)) logger.info("✔︎ Done.")
X_dev, Y_dev, Xsymbol_dev = dataprocessor.FormatVecForCNN(dev_featureVec,dev_labelVec,dev_symbolVec); dataprocessor.dumpPlain("data/train.feature.vid.txt", "data/train.feature.idv.txt", "data/train.label.vid.txt", "data/train.label.idv.txt", "data/train.symbol.vid.txt", "data/train.symbol.idv.txt"); dataprocessor.dump("data/vocab_all.pkl"); print (np.array(X_train).shape); print(np.array(Y_train).shape); print(np.array(Xsymbol_train).shape); with tf.Session() as sess: cnn=TextCNN( sequence_length=FLAGS.sequence_length, num_classes=FLAGS.num_classes, dim=FLAGS.dim, filter_sizes=list(map(int,FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda); sinfo=sess.run(cnn.shapeinfo); print "sinfo"; print sinfo; global_step = tf.Variable(0,name="global_step",trainable=False); optimizer = tf.train.AdamOptimizer(1e-3); grads_and_vars = optimizer.compute_gradients(cnn.loss); train_op = optimizer.apply_gradients(grads_and_vars,global_step=global_step); grad_summaries = []; for g,v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name),g);
with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, intra_op_parallelism_threads=2, inter_op_parallelism_threads=2) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=text_x.shape[1], vocab_size=vocab_size, embedding_size=FLAGS.embedding_dim, filter_sizes=map(int, FLAGS.filter_sizes.split(",")), num_filters=FLAGS.num_filters, num_ratings=ratings.shape[1], num_locations=locations.shape[1], num_genders=genders.shape[1], num_ages=ages.shape[1], hidden_size=300, l2_reg_lambda=FLAGS.l2_reg_lambda ) # Define Training procedure learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate") adv_lambda = tf.placeholder(tf.float32, shape=[], name="adversarial_lambda") global_step = tf.Variable(0, name="global_step", trainable=False) all_var_list = tf.trainable_variables() optimizer_n = tf.train.AdamOptimizer(
gc.collect() # savepath = '/Users/bong/works/tfen/w2v_cnn/runs/backup/checkpoints/model-2900' savepath = '/Users/bong/works/tfen/w2v_cnn/runs/1464326614/checkpoints/model-6700' with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_test.shape[1], num_classes=3, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) saver = tf.train.Saver(tf.all_variables()) saver.restore(sess, savepath) def test_step(x_batch, y_batch, writer=None): """ Evaluates model on a test set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0
# print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # print x_train.shape, y_train.shape x_train, y_train = x_shuffled, y_shuffled # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dim, filter_sizes=filter_sizes, num_filters=num_filters, l2_reg_lambda=l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch):
def train_cnn(): """Step 0: load sentences, labels, and training parameters""" train_file = sys.argv[1] x_raw, y_raw, df, labels = data_helper.load_data_and_labels(train_file) parameter_file = sys.argv[2] params = json.loads(open(parameter_file).read()) """Step 1: pad each sentence to the same length and map each word to an id""" max_document_length = max([len(x.split(' ')) for x in x_raw]) logging.info( 'The maximum length of all sentences: {}'.format(max_document_length)) vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length) x = np.array(list(vocab_processor.fit_transform(x_raw))) y = np.array(y_raw) ''' dev_file = sys.argv[3] x_raw1, y_raw1, df, labels = data_helper.load_data_and_labels(dev_file) """Step 1: pad each sentence to the same length and map each word to an id""" max_document_length = max([len(x.split(' ')) for x in x_raw1]) logging.info('The maximum length of all sentences: {}'.format(max_document_length)) vocab_processor1 = learn.preprocessing.VocabularyProcessor(max_document_length) x_dev = np.array(list(vocab_processor1.fit_transform(x_raw1))) y_dev = np.array(y_raw1) ''' """Step 2: split the original dataset into train and test sets""" #x_, x_test, y_, y_test = train_test_split(x, y, test_size=0.13756, random_state=42) x_train, x_dev, y_train, y_dev = train_test_split(x, y, test_size=0.1557971014) """Step 3: shuffle the train set and split the train set into train and dev sets""" #shuffle_indices = np.random.permutation(np.arange(len(y_))) #x_shuffled = x_[shuffle_indices] #y_shuffled = y_[shuffle_indices] #x_train, x_dev, y_train, y_dev = train_test_split(x_shuffled, y_shuffled, test_size=0.1) """Step 4: save the labels into labels.json since predict.py needs it""" with open('./labels.json', 'w') as outfile: json.dump(labels, outfile, indent=4) #logging.info('x_train: {}, x_dev: {}, x_test: {}'.format(len(x_train), len(x_dev), len(x_test))) #logging.info('y_train: {}, y_dev: {}, y_test: {}'.format(len(y_train), len(y_dev), len(y_test))) logging.info('x_train: {}, x_dev: {}'.format(len(x_train), len(x_dev))) logging.info('y_train: {}, y_dev: {}'.format(len(y_train), len(y_dev))) """Step 5: build a graph and cnn object""" graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=params['embedding_dim'], filter_sizes=list( map(int, params['filter_sizes'].split(","))), num_filters=params['num_filters'], l2_reg_lambda=params['l2_reg_lambda']) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "trained_model_" + timestamp)) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # One training step: train the model with one batch def train_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: params['dropout_keep_prob'] } _, step, loss, acc = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) # One evaluation step: evaluate the model with one batch def dev_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, loss, acc, num_correct = sess.run( [global_step, cnn.loss, cnn.accuracy, cnn.num_correct], feed_dict) return num_correct # Save the word_to_id map since predict.py needs it vocab_processor.save(os.path.join(out_dir, "vocab.pickle")) sess.run(tf.initialize_all_variables()) # Training starts here train_batches = data_helper.batch_iter(list(zip(x_train, y_train)), params['batch_size'], params['num_epochs']) best_accuracy, best_at_step = 0, 0 """Step 6: train the cnn model with x_train and y_train (batch by batch)""" for train_batch in train_batches: x_train_batch, y_train_batch = zip(*train_batch) train_step(x_train_batch, y_train_batch) current_step = tf.train.global_step(sess, global_step) """Step 6.1: evaluate the model with x_dev and y_dev (batch by batch)""" if current_step % params['evaluate_every'] == 0: dev_batches = data_helper.batch_iter( list(zip(x_dev, y_dev)), params['batch_size'], 1) total_dev_correct = 0 for dev_batch in dev_batches: x_dev_batch, y_dev_batch = zip(*dev_batch) num_dev_correct = dev_step(x_dev_batch, y_dev_batch) total_dev_correct += num_dev_correct dev_accuracy = float(total_dev_correct) / len(y_dev) logging.critical( 'Accuracy on dev set: {}'.format(dev_accuracy)) """Step 6.2: save the model if it is the best based on accuracy of the dev set""" if dev_accuracy >= best_accuracy: best_accuracy, best_at_step = dev_accuracy, current_step path = saver.save(sess, checkpoint_prefix, global_step=current_step) logging.critical('Saved model {} at step {}'.format( path, best_at_step)) logging.critical('Best accuracy {} at step {}'.format( best_accuracy, best_at_step)) ''' """Step 7: predict x_test (batch by batch)""" test_batches = data_helper.batch_iter(list(zip(x_test, y_test)), params['batch_size'], 1) total_test_correct = 0 for test_batch in test_batches: x_test_batch, y_test_batch = zip(*test_batch) num_test_correct = dev_step(x_test_batch, y_test_batch) total_test_correct += num_test_correct test_accuracy = float(total_test_correct) / len(y_test) logging.critical('Accuracy on test set is {} based on the best model {}'.format(test_accuracy, path)) ''' logging.critical('The training is complete')
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) # 考虑如何在session_conf中加入:gpu_options.allow_growth = True; # 试试我们之前运行gpu的那个程序是怎么做到的 # session_conf = tf.ConfigProto() # session_conf.allow_soft_placement = True # session_conf.log_device_placement = True # session_conf.gpu_options.allow_growth = True """ 参数说明: sequence_length : 句子长度 num_classes: 分类任务类型个数 vocab_size:字典中单词个数 embedding_size:单词向量维度 filter_sizes: filter尺寸,列表类型 num_filters: filter个数 l2_reg_lambda:正则化weight """ sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step 参数说明: x_batch: 1个句子 y_batch: 1个标签 """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches:batch_size = 64; num_epochs = 1(原来是200) batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch...(注意:这里的batches是一批数据,每1个batch=1个句子+1个标签) for batch in batches: # batches会在需要的时候,(自动)随时提供数据,因为data_helpers.batch_iter中使用了yield x_batch, y_batch = zip(*batch) # 这里是解开batch train_step(x_batch, y_batch) current_step = tf.train.global_step( sess, global_step ) # global_step:Creates a variable to hold the global_step. if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train, x_real_len_dev, sorted_label): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if FLAGS.model_type == "cnnrnn": obj = TextCNNRNN( sequence_length=FLAGS.max_document_length, num_classes=[tmp_y.shape[1] for tmp_y in y_train], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnncnn": obj = TextRNNCNN( sequence_length=FLAGS.max_document_length, num_classes=[tmp_y.shape[1] for tmp_y in y_train], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnnandcnn": obj = TextRNNandCNN( sequence_length=FLAGS.max_document_length, num_classes=[tmp_y.shape[1] for tmp_y in y_train], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnn": obj = TextRNN( sequence_length=FLAGS.max_document_length, num_classes=[tmp_y.shape[1] for tmp_y in y_train], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda) else: obj = TextCNN( sequence_length=FLAGS.max_document_length, num_classes=[tmp_y.shape[1] for tmp_y in y_train], vocab_size=len(vocab_processor.vocabulary_), task_num=FLAGS.task_num, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads_and_vars = optimizer.compute_gradients(obj.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", FLAGS.model_version)) print("Writing to {}\n".format(out_dir)) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Save train params since eval.py needs them trained_dir = os.path.abspath( os.path.join(out_dir, "trained_results")) if not os.path.exists(trained_dir): os.makedirs(trained_dir) cPickle.dump(sorted_label, open(trained_dir + "/sorted_label", "w")) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch, x_real_len_batch): """ A single training step """ if FLAGS.model_type == "cnn": feed_dict = { obj.input_x: x_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.is_training: True } else: feed_dict = { obj.input_x: x_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.real_len: x_real_len_batch } for i in range(FLAGS.task_num): feed_dict[obj.input_y[i]] = y_batch[i] _, step, loss, accuracy = sess.run( [train_op, global_step, obj.loss, obj.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() for i in range(FLAGS.task_num): print("{}: step {}, loss {:g}, acc_{:d} {:g}".format( time_str, step, loss, i, accuracy[i])) def overfit(dev_loss, eva_num=3): n = len(dev_loss) if n < eva_num: return False for i in xrange(n - eva_num + 1, n): if dev_loss[i] > dev_loss[i - 1]: return False return True def dev_step(x_test, y_test, x_real_len_test): """ Evaluates model on a dev set """ all_pred = [[] for i in range(FLAGS.task_num)] all_act = [[] for i in range(FLAGS.task_num)] zip_list = [] for i in range(FLAGS.task_num): zip_list.append( list(zip(x_test, y_test[i], x_real_len_test))) batches, total_batch_num = data_helpers.multi_task_batch_iter( zip_list, FLAGS.batch_size, 1, shuffle=False) for i in xrange(total_batch_num): y_batch = [] for j in range(FLAGS.task_num): tmp_batch = batches[j].next() x_batch, y_tmp_batch, x_real_len_batch = zip( *tmp_batch) y_batch.append(y_tmp_batch) if FLAGS.model_type == "cnn": feed_dict = { obj.input_x: x_batch, obj.dropout_keep_prob: 1.0, obj.is_training: False } else: feed_dict = { obj.input_x: x_batch, obj.dropout_keep_prob: 1.0, obj.real_len: x_real_len_batch } for j in range(FLAGS.task_num): feed_dict[obj.input_y[j]] = y_batch[j] step, pred = sess.run([global_step, obj.predictions], feed_dict) for j in range(FLAGS.task_num): all_pred[j] = np.concatenate([all_pred[j], pred[j]]) all_act[j] = np.concatenate( [all_act[j], np.argmax(y_batch[j], axis=1)]) err_cnt = 0 for i in range(len(x_test)): for j in range(FLAGS.task_num): if all_pred[j][i] != all_act[j][i]: err_cnt += 1 break dev_acc = 1.0 * (len(x_test) - err_cnt) / len(x_test) print("dev_sample {}, dev_acc {:g}".format( len(x_test), dev_acc)) return dev_acc # Generate batches zip_list = [] for i in range(FLAGS.task_num): zip_list.append( list(zip(x_train, y_train[i], x_real_len_train))) batches, total_batch_num = data_helpers.multi_task_batch_iter( zip_list, FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... dev_acc = [] for i in xrange(total_batch_num): y_batch = [] for j in range(FLAGS.task_num): tmp_batch = batches[j].next() x_batch, tmp_y_batch, x_real_len_batch = zip(*tmp_batch) y_batch.append(tmp_y_batch) train_step(x_batch, y_batch, x_real_len_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:", current_step) cur_acc = dev_step(x_dev, y_dev, x_real_len_dev) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) dev_acc.append(cur_acc) if overfit(dev_acc): print("current accuracy drop and stop train..\n") sys.exit(0) print("")
def architecture(self): ####### DEFINITIONS ####### multiplier = len(FLAGS.filter_sizes_cnn1.split(",")) #create system parameters weights = { 'fc1': tf.Variable(tf.random_normal([(multiplier * FLAGS.num_filters), FLAGS.num_classes]), name="fc1-weights"), 'att2-W-char': tf.Variable(tf.random_normal([ multiplier * FLAGS.num_filters, multiplier * FLAGS.num_filters ]), name='att2-weights-W-char'), 'att2-v-char': tf.Variable(tf.random_normal([multiplier * FLAGS.num_filters]), name='att2-weigths-v-char'), } bias = { 'fc1': tf.Variable(tf.random_normal([FLAGS.num_classes]), name="fc1-bias-noreg"), 'att2-W-char': tf.Variable(tf.random_normal([multiplier * FLAGS.num_filters]), name="att2-char-bias-noreg") } # cnn initialization cnn = TextCNN(sequence_length=FLAGS.sequence_length, num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes_cnn1.split(","))), num_filters=FLAGS.num_filters, vocab_size=self.char_embeddings.shape[0], l2_reg_lambda=FLAGS.l2_reg_lambda) del self.char_embeddings #ARCHITECTURE # forward pass cnn_output = cnn.h_pool_flat # attention on char - user level att_context_vector_char = tf.tanh( tf.matmul(cnn_output, weights["att2-W-char"]) + bias["att2-W-char"]) attentions_char = tf.nn.softmax(tf.matmul( att_context_vector_char, tf.expand_dims(weights["att2-v-char"], -1)), axis=0) attention_output_char = tf.reduce_sum(cnn_output * attentions_char, 0) attention_output_char = tf.reshape(attention_output_char, [1, multiplier * FLAGS.num_filters]) # BPTT logits = tf.matmul(attention_output_char, weights['fc1']) + bias['fc1'] prediction = tf.nn.softmax(logits) if self.mode != "Test": loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=self.Y)) # add L2 regularization l2 = self.tf_ideal_l2_reg_parameter * sum( tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() if not ("noreg" in tf_var.name or "Bias" in tf_var.name)) loss_op += l2 # optimizer optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss_op) # calculate training accuracy for checking correctness correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(self.Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) return accuracy, train_op, loss_op, prediction, cnn else: return None, None, None, prediction, cnn
def train(w2v_model): # Training # ================================================== x_train, x_dev, y_train, y_dev, vocab_size = load_data(w2v_model) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(w2v_model, sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=vocab_size, embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint Save checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) def dev_test(): batches_dev = data_helpers.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1) for batch_dev in batches_dev: x_batch_dev, y_batch_dev = zip(*batch_dev) dev_step(x_batch_dev, y_batch_dev, writer=dev_summary_writer) for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_test() if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
# Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=6, vocab_size=len(vocabulary), #embedding_size=FLAGS.embedding_dim, embedding_size = 400, filter_sizes=map(int, FLAGS.filter_sizes.split(",")), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, embedding_maxtrix = Word2V) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-4) #optimizer = tf.train.GradientDescentOptimizer(0.1) #optimizer = tf.train.MomentumOptimizer(0.01, 0.9) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = []
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(.5) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None:
with tf.Graph().as_default(): # A Graph contains operations and tensors. # Session is the environment you are executing graph operations in, and it contains state about Variables and queues. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) with sess.as_default(): cnn = TextCNN( sequence_length=x.shape[1], # the length of the sentence num_classes=FLAGS.num_classes, # how many classes as output vocab_size=len(vocabulary), # total vocabulary embedding_size=FLAGS.embedding_dim, # vector length filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), # map function: Apply function to every item of iterable and return a list of the results # k filters (or kernels) of size n x n x q # where nn is smaller than the dimension of the image and qq can either be the same as the number of channels rr or smaller and may vary for each kernel. # each map is then subsampled typically with mean or max pooling over p x pp x p contiguous regions # where p ranges between 2 for small images (e.g. MNIST) and is usually not more than 5 for larger inputs. num_filters=FLAGS.num_filters, # how many filters in one layer l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) # allow TensorFlow handle the counting of training steps for us optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)