train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10, FLAGS.batch_size) # Training # ================================================== print("starting graph def") with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) print("started session") with sess.as_default(): siameseModel = SiameseLSTM( sequence_length=max_document_length, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, hidden_units=FLAGS.hidden_units, l2_reg_lambda=FLAGS.l2_reg_lambda, batch_size=FLAGS.batch_size) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) print("initialized siameseModel object") grads_and_vars=optimizer.compute_gradients(siameseModel.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) print("defined training_ops") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars:
def train_network(config=global_config.FLAGS): embedding_matrix = load_embeddings("embedding_matrix.p") txt_suffix = ( """layers_%(layers)s-dense_units_%(dense_units)s-hidden_%(hidden)s-l2_%(l2)s-dropout_%(dropout)s-multiply%(multiply)s-basiclstm_%(basic_lstm)s-ignore_%(ignore)s""" % { "layers": config.num_layers, "dense_units": config.dense_units, "hidden": config.hidden_units, "l2": config.l2_reg_lambda, "dropout": config.siamese_keep_prob, "multiply": config.multiply, "basic_lstm": config.basic_lstm, "ignore": config.ignore_one_in_every }).replace('\n', ' ').replace('\r', '') txt_suffix = txt_suffix + "-" + str( datetime.datetime.now().isoformat()) + ".txt" print("Text file name: ", txt_suffix) txt_file = open(txt_suffix, 'w') print("starting graph def") with tf.Graph().as_default(), tf.device("/gpu:0"): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) print("started session") with sess.as_default(): siamese_model = SiameseLSTM(config, vocab_size=len(embedding_matrix)) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(config.lr) print("initialized siameseModel object") grads_and_vars = optimizer.compute_gradients(siamese_model.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) print("defined training_ops") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", siamese_model.loss) # acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) # Initialize all variables sess.run(tf.global_variables_initializer()) print("init all variables") graph_def = tf.get_default_graph().as_graph_def() graphpb_txt = str(graph_def) with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f: f.write(graphpb_txt) last_validation_loss = 1000 checkpoint_model = '' for epoch in range(config.num_epochs): batches = train_batch_iter(config.batch_size) loss = 0.0 num = 0 i = 0 print('Starting epoch: {} at {}'.format( epoch, datetime.datetime.now().isoformat())) for batch in tqdm(batches): i += 1 if i % config.ignore_one_in_every == 0: continue x1_batch, x2_batch, x1_len, x2_len, y_batch, ids = zip(*batch) step_loss = step(x1_batch, x2_batch, x1_len, x2_len, y_batch, siamese_model, sess, global_step, tr_op_set, config.dropout_keep_prob, embedding_matrix, train_summary_writer, train_summary_op) loss += step_loss num += len(batch) / config.batch_size txt_file.write('Train loss at iteration {} is {}'.format( i, loss / num)) txt_file.write("\n") txt_file.flush() if num % 100 == 0: print('Train [{}] loss at step {} is {}'.format( datetime.datetime.now().isoformat(), num, loss / num)) current_step = tf.train.global_step(sess, global_step) print("Train [{}]: after epoch {} loss is {}".format( datetime.datetime.now().isoformat(), epoch, loss / num)) if epoch % config.evaluate_every == 0: print("\n Evaluation after epoch: {}".format(epoch)) dev_batches = val_batch_iter(config.batch_size) loss = 0.0 num = 0 i = 0 for db in tqdm(dev_batches): if len(db) < 1: continue x1_dev, x2_dev, x1_len_dev, x2_len_dev, y_dev, id_dev = zip( *db) if len(y_dev) < 1: continue step_loss = step(x1_dev, x2_dev, x1_len_dev, x2_len_dev, y_dev, siamese_model, sess, global_step, tr_op_set, config.siamese_keep_prob, embedding_matrix, dev_summary_writer, dev_summary_op, evaluate=True) loss += step_loss num += len(db) / config.batch_size txt_file.write( 'Validation loss at iteration {} is {}'.format( i, loss / num)) txt_file.write("\n") txt_file.flush() i += 1 print("Validation [{}]: after epoch {} loss is {}".format( datetime.datetime.now().isoformat(), epoch, loss / num)) saver.save(sess, checkpoint_prefix, global_step=current_step) tf.train.write_graph(sess.graph.as_graph_def(), checkpoint_prefix, "graph" + str(epoch) + ".pb", as_text=False) print( "Saved model {} with validation loss ={} checkpoint to {}\n" .format(epoch, loss / num, checkpoint_prefix)) checkpoint_model = checkpoint_prefix + "-" + str(current_step) if loss > last_validation_loss: if config.early_stopping: return last_checkpoint_model, last_validation_loss last_validation_loss = loss last_checkpoint_model = checkpoint_model print("Done!!!") txt_file.close() return checkpoint_model, last_validation_loss
def __launch_from_build(self, vocab_processor, trainableEmbeddings, out_dir, checkpoint_dir_abs, initW): # ================================================== print("starting graph def") graph = tf.Graph() with graph.as_default(): # will use default_graph as input para, and current default_graph is the `graph` sess = tf.Session(graph=graph, config=self.session_conf) print("started session") with sess.as_default(): if self.FLAGS.is_char_based: siameseModel = SiameseLSTM( sequence_length=self.FLAGS.max_document_length, vocab_size=len(vocab_processor.vocabulary_), embedding_size=self.FLAGS.embedding_dim, hidden_units=self.FLAGS.hidden_units, l2_reg_lambda=self.FLAGS.l2_reg_lambda, batch_size=self.FLAGS.batch_size) else: siameseModel = SiameseLSTMw2v( sequence_length=self.FLAGS.max_document_length, vocab_size=len(vocab_processor.vocabulary_), embedding_size=self.FLAGS.embedding_dim, hidden_units=self.FLAGS.hidden_units, l2_reg_lambda=self.FLAGS.l2_reg_lambda, batch_size=self.FLAGS.batch_size, trainableEmbeddings=trainableEmbeddings) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) print("initialized siameseModel object") grads_and_vars = optimizer.compute_gradients(siameseModel.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step, name='tr_op_set') print("defined training_ops") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) print("defined gradient summaries") # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", siameseModel.loss) acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_op = tf.identity(train_summary_op, 'train_summary_op') train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_op = tf.identity(dev_summary_op, 'dev_summary_op') dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) sess.run(tf.global_variables_initializer()) if initW is not None: sess.run(siameseModel.W.assign(initW)) graphpb_txt = str(graph.as_graph_def()) with open(os.path.join(checkpoint_dir_abs, "graphpb.txt"), 'w') as f: f.write(graphpb_txt) input_tensors = InputTensors(siameseModel.input_x1, siameseModel.input_x2, siameseModel.input_y, siameseModel.dropout_keep_prob) result_tensors = ResultTensors(global_step, siameseModel.loss, siameseModel.accuracy, siameseModel.distance, siameseModel.temp_sim) metric_ops = MetricOps(tr_op_set, train_summary_op, dev_summary_op, train_summary_writer, dev_summary_writer) return saver, sess, input_tensors, result_tensors, metric_ops
# train_set, dev_set, vocab_processor,sum_no_of_batches = inpH.getDataSets(FLAGS.training_files,max_document_length, 10, FLAGS.batch_size) # Training # ================================================== print("starting graph def") with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) print("started session") with sess.as_default(): siameseModel = SiameseLSTM( sequence_length=max_document_length, embedding_size=FLAGS.embedding_dim, hidden_units=FLAGS.hidden_units, l2_reg_lambda=FLAGS.l2_reg_lambda, batch_size=FLAGS.batch_size, word_embeddings=word_embeddings) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) print("initialized siameseModel object") grads_and_vars=optimizer.compute_gradients(siameseModel.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) print("defined training_ops") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars:
# ================================================== print("starting graph def") with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) print("started session") with sess.as_default(): siameseModel = SiameseLSTM( sequence_length=max_document_length, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, hidden_units=FLAGS.hidden_units, l2_reg_lambda=FLAGS.l2_reg_lambda, batch_size=FLAGS.batch_size, embedding_matrix=embedding_matrix, entity_embedding_matrix=entity_embedding_matrix, entity_embedding_dim=FLAGS.entity_embedding_dim, entity_vocab_size=entity_vocab_size, n_entity=n_entity, mode=mode) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(5e-4) print("initialized siameseModel object") grads_and_vars = optimizer.compute_gradients(siameseModel.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step)