def check(estring): x_raw = [estring] x_test = np.array(list(vocab_processor.transform(x_raw))) with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), 1, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) if(all_predictions[0]==1): return "L" else: return "C"
def batch_dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( zip(x_batch, y_batch), FLAGS.batch_size, 1) t_acc = 0.0 t_acc = float(t_acc) t_loss = 0.0 t_loss = float(t_loss) t = 0 f_r = open(file_name, "a+") step1 = 0 for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) t_acc += accuracy t_loss += loss t += 1 step1 = step if writer: writer.add_summary(summaries, step) f_r.write(str(step1) + " step " + " accuracy " + str(t_acc/t) + " loss " + str(t_loss/t) + "\n") print "total ", t_acc/t, "loss", t_loss/t
def run(self): input_data, label_data = data_helpers.load_data() n_steps = 3 n_input = 1 n_classes = 1 n_hidden = self.n_hidden batch_size = self.batch_size training_iters = self.training_iters display_step = self.display_step checkpoint_step = self.checkpoint_step # batches = data_helpers.batch_gen(zip(input_data, label_data), 2) # for batch in batches: # x_batch, y_batch = zip(*batch) # print('-' * 50) # print(x_batch) # print(y_batch) new_rnn = RNN(n_steps=n_steps, n_input=n_input, n_hidden=self.n_hidden, n_classes=n_classes) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(new_rnn.cost, global_step=global_step) # Adam Optimizer # optimizer = tf.train.AdamOptimizer(self.learning_rate) # grads_and_vars = optimizer.compute_gradients(new_rnn.cost) # train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.join(self.out_dir, "checkpoints") checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) def train_step(_x_batch, _y_batch): feed_dict = {new_rnn.x: _x_batch, new_rnn.y: _y_batch, new_rnn.istate: np.zeros((batch_size, 2*n_hidden))} _, step = sess.run([optimizer, global_step], feed_dict=feed_dict) step = tf.train.global_step(sess, global_step) if step % display_step == 0: # Calculate batch accuracy acc = sess.run(new_rnn.accuracy, feed_dict=feed_dict) # Calculate batch loss loss = sess.run(new_rnn.cost, feed_dict=feed_dict) # print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \ # ", Training Accuracy= " + "{:.5f}".format(acc) print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + str(loss) + \ ", Training Accuracy= " + str(acc) if step % checkpoint_step == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path)) batches = data_helpers.batch_iter(zip(input_data, label_data), batch_size, training_iters) for batch in batches: x_batch, y_batch = zip(*batch) x_batch = np.array(x_batch).reshape((batch_size, n_steps, n_input)) train_step(x_batch, y_batch)
def dev_step(x_dev, y_dev, test_set="dev", writer=None): dev_baches = data_helpers.batch_iter(list(zip(x_dev, y_dev)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec, is_shuffle=False) total_loss = 0 total_acc = 0 index = 0 total_correct_predictions = 0 total_dev_data = 0 for batch in dev_baches: if (len(batch[0]) == 0): continue x_batch, y_batch = zip(*batch[0]) batch_seq_len = batch[1] total_dev_data += len(x_batch) myfeed_dict = { input_x: x_batch, input_y: y_batch, seq_len_list: batch_seq_len, # myLSTM.istate: np.zeros((FLAGS.batch_size, 2 * FLAGS.n_hidden)), dropout_keep_prob: 1 } acc, cost, correct_predict, summaries = sess.run( [accuracy, loss, correct_predictions, dev_summary_op], feed_dict=myfeed_dict) if (test_set == "dev"): dev_summary_writer.add_summary(summaries, index + my_dev_step * 250) print("on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:g}".format(test_set, index, cost, acc)) destfile.write( "on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:g}\n".format(test_set, index, cost, acc)) destfile.write('\n') total_loss += cost total_acc += acc index += 1 total_correct_predictions += np.sum(correct_predict) print("#################################################################\n") destfile.write("####################################################################\n\n") avg_loss = total_loss / index avg_acc = total_acc / index real_acc = (total_correct_predictions * 1.0) / (total_dev_data) print("on {}, avarage_Loss: {:g}, avarage_acc: {:.6f}, real_acc: {:g}\n".format(test_set, avg_loss, avg_acc, real_acc)) destfile.write( "on {}, avarage_Loss: {:g}, avarage_acc: {:.6f}, real_acc: {:g}\n\n".format(test_set, avg_loss, avg_acc, real_acc)) return avg_loss, real_acc
def dev_step(x_batch, y_batch, test_set = "dev"): """ Evaluates model on a dev set """ dev_baches = data_helpers.batch_iter(list(zip(x_batch, y_batch)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec, is_shuffle=False) total_loss = 0 total_acc = 0 index = 0 total_correct_predictions = 0 total_dev_data = 0 for batch in dev_baches: if (len(batch[0]) == 0): continue x_batch, y_batch = zip(*batch[0]) total_dev_data += len(x_batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, correct_predict = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.correct_predictions], feed_dict) print("on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}".format(test_set, index, loss, accuracy)) destfile.write( "on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}\n".format(test_set, index, loss, accuracy)) total_loss += loss total_acc += accuracy index += 1 total_correct_predictions += np.sum(correct_predict) print("#################################################################\n") destfile.write("####################################################################\n\n") avg_loss = total_loss / (index) avg_acc = total_acc / (index) real_acc = (total_correct_predictions*1.0) / (total_dev_data) print("on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n".format(test_set, avg_loss, avg_acc, real_acc)) destfile.write( "on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n\n".format(test_set, avg_loss, avg_acc, real_acc)) if(test_set == "dev"): dev_summary_writer.add_summary(summaries, step) return avg_loss, real_acc
def eval_item (item_text): x_raw = [item_text] y_test = None #[1, 0] # Map data into vocabulary vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_raw))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) predictions_human_readable = np.column_stack((np.array(x_raw), all_predictions)) print(predictions_human_readable[0][1]) return predictions_human_readable[0][1]
def batch_dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ batches = data_helpers.batch_iter( zip(x_batch, y_batch), FLAGS.batch_size, 1) for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step)
def test_multiple_flags(flags): # Map data into vocabulary x_tests = [np.array(list(vocab_processor.transform(x_raw))) for x_raw in flags] checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/scores").outputs[0] # Generate batches for one epoch batches = [data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) for x_test in x_tests] # Collect the predictions here all_predictions = [] raw_results = [] softmax_scores = [] for batch in batches: for x_test_batch in batch: raw_results.append(sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})) return raw_results
sess.run(tf.initialize_all_variables()) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("Training loss", cross_entropy) valid_loss_summary = tf.scalar_summary("Validation loss", valid_mean_loss) valid_accuracy_summary = tf.scalar_summary("Validation accuracy", valid_mean_accuracy) summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, sess.graph) tf.merge_all_summaries() log("=======================================================") # Training if FLAGS.train: log("Starting training...") # Batches batches = data_helpers.batch_iter(zip(x_train, y_train), FLAGS.batch_size, FLAGS.epochs) test_batches = list(data_helpers.batch_iter(zip(x_test, y_test), FLAGS.batch_size, 1)) my_batch = batches.next() # To use with human_readable_output() # Pretty-printing variables global_step = 0 batches_in_epoch = len(y_train) / FLAGS.batch_size batches_in_epoch = batches_in_epoch if batches_in_epoch != 0 else 1 #prevent division by 0 if dataset smaller than batch_size total_num_step = FLAGS.epochs * batches_in_epoch for batch in batches: global_step += 1 x_batch, y_batch = zip(*batch) #Run the training step train_result, loss_summary_result = sess.run([train_step, loss_summary], feed_dict={data_in: x_batch, data_out: y_batch, dropout_keep_prob: 0.5})
def My_main(): if(FLAGS.is_load_model == False): os.system("rm -r runs") x_train, x_dev, y_train, y_dev, seq_max_len, vocabulary, vocabulary_inv, word2vec_vocab, word2vec_vec = read_from_dataset( FLAGS.input_dataset_path, FLAGS.word2vec_model_path, FLAGS.n_classes, FLAGS.max_seq_len_cutoff) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=seq_max_len, num_classes=FLAGS.n_classes, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, n_hidden_attention=FLAGS.n_hidden_attention) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.minimize(cnn.loss, var_list=tf.trainable_variables()) train_attention = optimizer.minimize(cnn.attention_loss, var_list=[cnn.W_word_attention, cnn.b_word_attention, cnn.attention_vector]) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph_def) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph_def) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs")) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #saver = tf.train.Saver(tf.all_variables()) saver = tf.train.Saver() # Initialize all variables sess.run(tf.global_variables_initializer()) if (FLAGS.is_load_model == True): suse = tf.report_uninitialized_variables() print("loading pretrained model...") destfile.write("loading pretrained model...\n") #checkpoint_file = '/home/ippr/roshanfekr/IMDB_Sentiment/input_attention_cnn/pr_runs/runs/checkpoints/model-50' checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) load_path = saver.restore(sess, checkpoint_file) print("pretrained model loaded from " + str(load_path)) destfile.write("pretrained model loaded from " + str(load_path) + "\n") #path = saver.save(sess, checkpoint_prefix, global_step=FLAGS.num_epochs) list_of_trainable_variables = sess.run(tf.trainable_variables()) print("number of trainable variables is: ", len(list_of_trainable_variables)) destfile.write("number of trainable variables is: " + str(len(list_of_trainable_variables)) + "\n") def train_step(x_batch, y_batch, epoch_num): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, } att_vector, doc_alfa, attention_output, filter = sess.run([cnn.myatt, cnn.doc_alfa, cnn.attention_h_pool_flat, cnn.filter_W],feed_dict) tp = "" if((epoch_num+1)%4 == 0): _, step, summaries, loss, accuracy, correct_predict, weigth_norm = sess.run( [train_attention, global_step, train_summary_op, cnn.attention_loss, cnn.attention_accuracy, cnn.attention_correct_predictions, cnn.attention_weigth_norm], feed_dict) tp = "attention " else: _, step, summaries, loss, accuracy, correct_predict, weigth_norm = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.correct_predictions, cnn.weigth_norm], feed_dict) tp = "conv " print(tp + "epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}".format(epoch_num, step, weigth_norm, loss, accuracy)) destfile.write(tp + "epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}\n".format(epoch_num, step, weigth_norm, loss, accuracy)) train_summary_writer.add_summary(summaries, step) ''' print("saving..") path = saver.save(sess, checkpoint_prefix, global_step=0) print("saved") ''' def dev_step(x_batch, y_batch, test_set = "dev"): """ Evaluates model on a dev set """ dev_baches = data_helpers.batch_iter(list(zip(x_batch, y_batch)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec, is_shuffle=False) total_loss = 0 total_acc = 0 index = 0 total_correct_predictions = 0 total_dev_data = 0 for batch in dev_baches: if (len(batch) == 0): continue x_batch, y_batch = zip(*batch[0]) total_dev_data += len(x_batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0, cnn.istrue: True } step, summaries, loss, accuracy, correct_predict = sess.run( [global_step, dev_summary_op, cnn.attention_loss, cnn.attention_accuracy, cnn.attention_correct_predictions], feed_dict) print("on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}".format(test_set, index, loss, accuracy)) destfile.write( "on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}\n".format(test_set, index, loss, accuracy)) total_loss += loss total_acc += accuracy index += 1 total_correct_predictions += np.sum(correct_predict) print("#################################################################\n") destfile.write("####################################################################\n\n") avg_loss = total_loss / (index) avg_acc = total_acc / (index) real_acc = (total_correct_predictions*1.0) / (total_dev_data) print("on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n".format(test_set, avg_loss, avg_acc, real_acc)) destfile.write( "on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n\n".format(test_set, avg_loss, avg_acc, real_acc)) if(test_set == "dev"): dev_summary_writer.add_summary(summaries, step) for epoch in range(FLAGS.num_epochs): batches = data_helpers.batch_iter(list(zip(x_train, y_train)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec) # Training loop. For each batch... for batch in batches: if (len(batch) == 0): continue x_batch, y_batch = zip(*batch[0]) current_step = tf.train.global_step(sess, global_step) train_step(x_batch, y_batch, epoch) if ((epoch+1) % FLAGS.checkpoint_every == 0): path = saver.save(sess, checkpoint_prefix, global_step=(epoch+1)) print("Saved model checkpoint to {}\n".format(path)) if ((epoch+1) % FLAGS.evaluate_every) == 0: print("testing on dev set: ") destfile.write("testing on dev set:\n") dev_step(x_dev, y_dev) if((epoch+1) % (FLAGS.evaluate_every*3)) == 0: print("###############################################") destfile.write("###############################################\n") print("testing on train set: ") destfile.write("testing on train set: \n") dev_step(x_train, y_train, test_set="train") path = saver.save(sess, checkpoint_prefix, global_step=FLAGS.num_epochs) print("Saved model checkpoint to {}\n".format(path)) print("Optimization Finished!") print("\nEvaluation:") dev_step(x_dev, y_dev) print("")
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(args): # start logger time_str = datetime.datetime.now().isoformat() logger = Logger('log_{}'.format(time_str)) # load data print "Loading data ..." logger.write("Loading data ...") x, y, vocab, vocab_inv, emb_vocab, num_classes = data_helpers.load_twitter_rnn() # split into k folds x_folds, y_folds = split_into_k_folds(x, y, args.num_folds) # fill out missing arg values args.vocab_size = len(vocab) args.seq_length = len(x[0]) args.num_classes = num_classes if emb_vocab is not None: args.emb_vocab = emb_vocab args.rnn_size = len(emb_vocab[emb_vocab.keys()[0]][1]) else: args.emb_vocab = None # report print "Vocabulary Size: {:d}".format(len(vocab)) print "Total/fold size: {:d}/{:d}".format(len(y), len(x_folds[0])) print "Sequence Length: {:d}".format(len(y[0])) logger.write("Vocabulary Size: {:d}".format(len(vocab))) logger.write("Total/fold size: {:d}/{:d}".format(len(y), len(x_folds[0]))) logger.write("Sequence Length: {:d}".format(len(y[0]))) # initialize a rnn model model = JinRNN(args) # define output directory time_str = datetime.datetime.now().isoformat() out_dir = os.path.abspath(os.path.join(os.path.curdir, args.save_dir, time_str)) # prepare saver checkpoint_dir = os.path.join(out_dir, 'checkpoints') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, "model") saver = tf.train.Saver(tf.all_variables()) # start a session sess_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) sess = tf.Session(config=sess_conf) with sess.as_default(): # final results test_token_acc_list = [] test_sentence_acc_list = [] # for each fold for i in range(args.num_folds): # initialize tf.initialize_all_variables().run() # use ith fold as a testset, and the rest as a trainset x_test, y_test = x_folds[i], y_folds[i] x_train, y_train = np.array([]), np.array([]) for j in range(args.num_folds): if j != i: x_train = x_folds[j] if len(x_train) == 0 else np.concatenate((x_train, x_folds[j]), axis=0) y_train = y_folds[j] if len(y_train) == 0 else np.concatenate((y_train, y_folds[j]), axis=0) print "Fold #{} Train/Test Size: {}/{}".format(i, len(y_train), len(y_test)) logger.write("Fold #{} Train/Test Size: {}/{}".format(i, len(y_train), len(y_test))) # generate train batches train_batches = data_helpers.batch_iter(x_train, y_train, args.batch_size, args.num_epochs) # current fold results curr_best_sentene_acc = 0.0 curr_best_token_acc = 0.0 # for each batch for x_train_batch, y_train_batch in train_batches: # obtain start time time_str = datetime.datetime.now().isoformat() # train feed = {model.inputs: x_train_batch, model.targets: y_train_batch, model.dropout_keep_prob: args.dropout_keep_prob} current_step, train_loss, _ = sess.run([model.global_step, model.cost, model.train_op], feed) sess.run(model.weight_clipping_op, feed) # rescale weight # print "{}: step {}, loss {:g}".format(time_str, current_step, train_loss) # evaluate with test set if current_step % args.evaluate_every == 0: print "\nEvaluation" logger.write("\nEvaluation") sum_accuracy = 0.0 sum_accuracy_sentence = 0.0 num_batches = 0 test_batches = data_helpers.batch_iter(x_test, y_test, args.batch_size, 1) for x_test_batch, y_test_batch in test_batches: feed = {model.inputs: x_test_batch, model.targets: y_test_batch, model.dropout_keep_prob: 1.0} current_step, accuracy, accuracy_sentence, predictions_sentence, loss = sess.run( [model.global_step, model.accuracy, model.accuracy_sentence, model.predictions_sentence, model.cost], feed) # for i in range(len(y_dev_batch)): # curr_sentence = x_dev_batch[i] # curr_target_codes = y_dev_batch[i] # curr_predicted_codes = predictions_sentence[i] # # # to see if the model predicts some difficult examples correctly # if ((1 in list(curr_predicted_codes) or 2 in list(curr_predicted_codes)) # and list(curr_predicted_codes) == list(curr_target_codes)): # print ' '.join([vocab_inv[e] for e in curr_sentence]) # print curr_target_codes # print curr_predicted_codes # print "{}: step {}, token-accuracy {:g}, sentence-accuracy {:g}"\ # .format(time_str, current_step, accuracy, accuracy_sentence) sum_accuracy += accuracy sum_accuracy_sentence += accuracy_sentence num_batches += 1 print "{}: step {}, token-accuracy {:g}, sentence-accuracy {:g}, loss {:g}\n".format( time_str, current_step, sum_accuracy/num_batches, sum_accuracy_sentence/num_batches, loss) logger.write("{}: step {}, token-accuracy {:g}, sentence-accuracy {:g}, loss {:g}\n" .format(time_str, current_step, sum_accuracy/num_batches, sum_accuracy_sentence/num_batches, loss)) # set the best result for the current fold curr_best_sentene_acc = max(curr_best_sentene_acc, sum_accuracy_sentence/num_batches) curr_best_token_acc = max(curr_best_token_acc, sum_accuracy/num_batches) # save the model if current_step % args.save_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print "Saved model checkpoint to {}\n".format(path) logger.write("Saved model checkpoint to {}\n".format(path)) print "-------------------------------------------------------------" print "Fold #{} RESULTS: token-accuracy {:g}, sentence-accuracy {:g}"\ .format(i, curr_best_token_acc, curr_best_sentene_acc) print "-------------------------------------------------------------" logger.write("-------------------------------------------------------------") logger.write("Fold #{} RESULTS: token-accuracy {:g}, sentence-accuracy {:g}" .format(i, curr_best_token_acc, curr_best_sentene_acc)) logger.write("-------------------------------------------------------------") # add to the results list test_sentence_acc_list.append(curr_best_sentene_acc) test_token_acc_list.append(curr_best_token_acc) print "==========================================================" print "FINAL RESULTS: token-accuracy {:g}, sentence-accuracy {:g}"\ .format(np.mean(test_token_acc_list), np.mean(test_sentence_acc_list)) print "==========================================================" logger.write("==========================================================") logger.write("FINAL RESULTS: token-accuracy {:g}, sentence-accuracy {:g}" .format(np.mean(test_token_acc_list), np.mean(test_sentence_acc_list))) logger.write("==========================================================")
def test_cnn(): """Test CNN model.""" # Load data logger.info("✔ Loading data...") logger.info('Recommand padding Sequence length is: {}'.format( FLAGS.pad_seq_len)) logger.info('✔︎ Test data processing...') test_data = data_helpers.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes, FLAGS.embedding_dim) logger.info('✔︎ Test data padding...') x_test, y_test = data_helpers.pad_data(test_data, FLAGS.pad_seq_len) y_test_bind = test_data.labels_bind # Build vocabulary VOCAB_SIZE = data_helpers.load_vocab_size(FLAGS.embedding_dim) pretrained_word2vec_matrix = data_helpers.load_word2vec_matrix( VOCAB_SIZE, FLAGS.embedding_dim) # Load cnn model logger.info("✔ Loading model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # pre-trained_word2vec pretrained_embedding = graph.get_operation_by_name( "embedding/W").outputs[0] # Tensors we want to evaluate logits = graph.get_operation_by_name("output/logits").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list( zip(x_test, y_test, y_test_bind)), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predicitons = [] eval_loss, eval_rec, eval_acc, eval_counter = 0.0, 0.0, 0.0, 0 for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_bind = zip( *batch_test) feed_dict = {input_x: x_batch_test, dropout_keep_prob: 1.0} batch_logits = sess.run(logits, feed_dict) if FLAGS.use_classbind_or_not == 'Y': predicted_labels = data_helpers.get_label_using_logits_and_classbind( batch_logits, y_batch_test_bind, top_number=FLAGS.top_num) if FLAGS.use_classbind_or_not == 'N': predicted_labels = data_helpers.get_label_using_logits( batch_logits, top_number=FLAGS.top_num) all_predicitons = np.append(all_predicitons, predicted_labels) cur_rec, cur_acc = 0.0, 0.0 for index, predicted_label in enumerate(predicted_labels): rec_inc, acc_inc = data_helpers.cal_rec_and_acc( predicted_label, y_batch_test[index]) cur_rec, cur_acc = cur_rec + rec_inc, cur_acc + acc_inc cur_rec = cur_rec / len(y_batch_test) cur_acc = cur_acc / len(y_batch_test) eval_rec, eval_acc, eval_counter = eval_rec + cur_rec, eval_acc + cur_acc, eval_counter + 1 logger.info( "✔︎ validation batch {} finished.".format(eval_counter)) eval_rec = float(eval_rec / eval_counter) eval_acc = float(eval_acc / eval_counter) logger.info("☛ Recall {:g}, Accuracy {:g}".format( eval_rec, eval_acc)) np.savetxt(SAVE_FILE, list(zip(all_predicitons)), fmt='%s') logger.info("✔ Done.")
def My_main(): x_train, x_dev, y_train, y_dev, seq_max_len, vocabulary, vocabulary_inv, word2vec_vocab, word2vec_vec = read_from_dataset( FLAGS.input_dataset_path, FLAGS.word2vec_model_path, FLAGS.n_classes, FLAGS.max_seq_len_cutoff) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=seq_max_len, num_classes=FLAGS.n_classes, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, vocab_size=len(vocabulary), l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=50) sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch, epoch_num): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, correct_predict, weigth_norm = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.correct_predictions, cnn.weigth_norm], feed_dict) print("epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}".format(epoch_num, step, weigth_norm, loss, accuracy)) destfile.write("epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}\n".format(epoch_num, step, weigth_norm, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, test_set = "dev"): """ Evaluates model on a dev set """ dev_baches = data_helpers.batch_iter(list(zip(x_batch, y_batch)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec, is_shuffle=False) total_loss = 0 total_acc = 0 index = 0 total_correct_predictions = 0 total_dev_data = 0 for batch in dev_baches: if (len(batch[0]) == 0): continue x_batch, y_batch = zip(*batch[0]) total_dev_data += len(x_batch) feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, correct_predict = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.correct_predictions], feed_dict) print("on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}".format(test_set, index, loss, accuracy)) destfile.write( "on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:.5f}\n".format(test_set, index, loss, accuracy)) total_loss += loss total_acc += accuracy index += 1 total_correct_predictions += np.sum(correct_predict) print("#################################################################\n") destfile.write("####################################################################\n\n") avg_loss = total_loss / (index) avg_acc = total_acc / (index) real_acc = (total_correct_predictions*1.0) / (total_dev_data) print("on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n".format(test_set, avg_loss, avg_acc, real_acc)) destfile.write( "on {}, avarage_Loss: {:.6f}, avarage_acc: {:.5f}, real_acc: {:.5f}\n\n".format(test_set, avg_loss, avg_acc, real_acc)) if(test_set == "dev"): dev_summary_writer.add_summary(summaries, step) return avg_loss, real_acc train_loss_list = [] train_acc_list = [] test_loss_list = [] test_acc_list = [] for epoch in range(FLAGS.num_epochs): batches = data_helpers.batch_iter(list(zip(x_train, y_train)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec) # Training loop. For each batch... for batch in batches: if (len(batch[0]) == 0): continue x_batch, y_batch = zip(*batch[0]) current_step = tf.train.global_step(sess, global_step) train_step(x_batch, y_batch, epoch) if ((epoch+1) % FLAGS.checkpoint_every == 0): path = saver.save(sess, checkpoint_prefix, global_step=(epoch+1)) print("Saved model checkpoint to {}\n".format(path)) if ((epoch+1) % FLAGS.evaluate_every) == 0: print("testing on dev set: ") destfile.write("testing on dev set:\n") avg_loss, real_acc = dev_step(x_dev, y_dev) test_loss_list.append(avg_loss) test_acc_list.append(real_acc) if (FLAGS.is_evaluation == True): print("###############################################") destfile.write("###############################################\n") print("testing on train set: ") destfile.write("testing on train set: \n") avg_loss, real_acc = dev_step(x_train, y_train, test_set="train") train_loss_list.append(avg_loss) train_acc_list.append(real_acc) path = saver.save(sess, checkpoint_prefix, global_step=FLAGS.num_epochs) print("Saved model checkpoint to {}\n".format(path)) print("Optimization Finished!") print("\nEvaluation:") dev_step(x_dev, y_dev) print("") return train_acc_list, train_loss_list, test_acc_list, test_loss_list
else: log('Data processing OK, creating network...') sess.run(tf.global_variables_initializer()) # Summaries for loss and accuracy loss_summary = tf.summary.scalar('Training loss', cross_entropy) valid_loss_summary = tf.summary.scalar('Validation loss', valid_mean_loss) valid_accuracy_summary = tf.summary.scalar('Validation accuracy', valid_mean_accuracy) summary_writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph) tf.summary.merge_all() # Training if FLAGS.train: # Batches batches = batch_iter(zip(x_train, y_train), FLAGS.batch_size, FLAGS.epochs) test_batches = list(batch_iter(zip(x_test, y_test), FLAGS.batch_size, 1)) my_batch = batches.next() # To use with human_readable_output() # Pretty-printing variables global_step = 0 batches_in_epoch = len(y_train) / FLAGS.batch_size batches_in_epoch = batches_in_epoch if batches_in_epoch != 0 else 1 total_num_step = FLAGS.epochs * batches_in_epoch batches_progressbar = tqdm(batches, total=total_num_step, desc='Starting training...') for batch in batches_progressbar: global_step += 1
def evaluate(): # parse arguments FLAGS(sys.argv) print(FLAGS.batch_size) # map data into vocabulary vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") print(vocab_path) vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) # CHANGE THIS: Load data. Load your own data here if FLAGS.eval_train: x_raw, y_test = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file) y_test = np.argmax(y_test, axis=1) else: x_raw = ["a masterpiece four years in the making", "everything is off."] y_test = [1, 0] x_test = np.array(list(vocab_processor.transform(x_raw))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("cnn_output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) # Print accuracy if y_test is defined if y_test is not None: correct_predictions = float(sum(all_predictions == y_test)) print("Total number of test examples: {}".format(len(y_test))) print("Accuracy: {:g}".format(correct_predictions/float(len(y_test)))) # Save the evaluation to a csv predictions_human_readable = np.column_stack((np.array(x_raw), all_predictions)) out_path = os.path.join(FLAGS.checkpoint_dir, "..", "prediction.csv") print("Saving evaluation to {0}".format(out_path)) with open(out_path, 'w') as f: import csv csv.writer(f).writerows(predictions_human_readable)
def train(): with tf.device('/cpu:0'): x_text, y = data_helpers.load_data_and_labels(FLAGS.pos_dir, FLAGS.neg_dir) text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( FLAGS.max_sentence_length, min_frequency=FLAGS.min_frequency) x = np.array(list(text_vocab_processor.fit_transform(x_text))) print("Text Vocabulary Size: {:d}".format( len(text_vocab_processor.vocabulary_))) print("x = {0}".format(x.shape)) print("y = {0}".format(y.shape)) print("") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rcnn = HAN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(text_vocab_processor.vocabulary_), word_embedding_size=FLAGS.word_embedding_dim, context_embedding_size=FLAGS.context_embedding_dim, attention_size=FLAGS.attention_size, hidden_size=FLAGS.hidden_size, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( rcnn.loss, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rcnn.loss) acc_summary = tf.summary.scalar("accuracy", rcnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary text_vocab_processor.save(os.path.join(out_dir, "text_vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) # Pre-trained word2vec if FLAGS.word2vec: # initial matrix with random uniform initW = np.random.uniform( -0.25, 0.25, (len(text_vocab_processor.vocabulary_), FLAGS.word_embedding_dim)) # load any vectors from the word2vec print("Load word2vec file {0}".format(FLAGS.word2vec)) with open(FLAGS.word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = text_vocab_processor.vocabulary_.get(word) if idx != 0: initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) sess.run(rcnn.W_text.assign(initW)) print("Success to load pre-trained word2vec model!\n") # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) # Train feed_dict = { rcnn.input_text: x_batch, rcnn.input_y: y_batch, rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, rcnn.loss, rcnn.accuracy ], feed_dict) train_summary_writer.add_summary(summaries, step) # Training log display if step % FLAGS.display_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # Evaluation if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") feed_dict_dev = { rcnn.input_text: x_dev, rcnn.input_y: y_dev, rcnn.dropout_keep_prob: 1.0 } summaries_dev, loss, accuracy = sess.run( [dev_summary_op, rcnn.loss, rcnn.accuracy], feed_dict_dev) dev_summary_writer.add_summary(summaries_dev, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}\n".format( time_str, step, loss, accuracy)) # Model checkpoint if step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
def eval(): with tf.device('/gpu:0'): x_text, y, desc1, desc2, wType, type_index = data_helpers.load_data_and_labels( FLAGS.test_path) text_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( text_path) x = np.array(list(text_vocab_processor.transform(x_text))) checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_text = graph.get_operation_by_name("input_text").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] emb_dropout_keep_prob = graph.get_operation_by_name( "emb_dropout_keep_prob").outputs[0] rnn_dropout_keep_prob = graph.get_operation_by_name( "rnn_dropout_keep_prob").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here preds = [] for x_batch in batches: pred = sess.run( predictions, { input_text: x_batch, emb_dropout_keep_prob: 1.0, rnn_dropout_keep_prob: 1.0, dropout_keep_prob: 1.0 }) preds.append(pred) preds = np.concatenate(preds) truths = np.argmax(y, axis=1) print(truths) result = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] # result = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]] for i in range(len(preds)): result[truths[i]][preds[i]] += 1 print("===the prediction result===") print("\t0\t1\t2\t3\t4\t5") count = 0 for i in range(len(result)): print( str(count) + "\t" + str(result[i][0]) + "\t" + str(result[i][1]) + "\t" + str(result[i][2]) + "\t" + str(result[i][3]) + "\t" + str(result[i][4]) + "\t" + str(result[i][5])) count += 1 precision = [] recall = [] for j in range(len(result)): p = round(result[j][j] / sum(result[j]), 3) * 100 col = [x[j] for x in result] r = round(result[j][j] / sum(col), 3) * 100 precision.append(p) recall.append(r) f1_scores = [] for k in range(len(precision)): if (precision[k] + recall[k]) == 0: f1_scores.append(0) else: f1 = round((2 * precision[k] * recall[k]) / (precision[k] + recall[k]), 1) f1_scores.append(f1) print(precision, recall, f1_scores) relationName = [ "before", "after", "simultaneous", "include", "be_included", "vague" ] for l in range(6): print(relationName[l] + "acc:" + str(precision[l]) + "%,recall:" + str(recall[l]) + "%,f1:" + str(f1_scores[l]) + "%") precision_ave = round(sum(precision) / 6, 1) recall_ave = round(sum(recall) / 6, 1) # f1_score_ave = round(sum(f1_scores)/6,1) f1_score_ave = f1_score(truths, preds, labels=np.array(range(6)), average="micro") print("acc_avg:" + str(precision_ave) + "%,recall_avg:" + str(recall_ave) + "%,f1:" + str(f1_score_ave) + "%") print("modelFile:" + str(FLAGS.checkpoint_dir))
print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches data = np.array(list(zip(x_train, y_train))) data_size = len(data) num_batches_per_epoch = int((len(data) - 1) / FLAGS.batch_size) + 1 for epoch in range(FLAGS.num_epochs): # Shuffle the data at each epoch shuffle_indices = np.random.permutation(np.arange(data_size)) shuffled_data = data[shuffle_indices] batches = data_helpers.batch_iter(data, FLAGS.batch_size, data_size, num_batches_per_epoch) # Training loop. For each batch... epoch_size = 0 for batch in batches: # if epoch_size >= 100: # break x_batch, y_batch = zip(*batch) # print(x_batch) # print(y_batch) train_step(x_batch, y_batch, epoch_size, num_batches_per_epoch) epoch_size += 1 current_step = tf.train.global_step(sess, global_step) print("\nEvaluation:") #dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if FLAGS.save_path:
def main(_): # FLAGS._parse_flags() # print("\nParameters:") # for attr, value in sorted(FLAGS.items()): # print("{}={}".format(attr.upper(), value)) # print("") # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.train_file, FLAGS.num_class) # Build vocabulary if FLAGS.embedding_type == "random": vocab_processor = learn.preprocessing.VocabularyProcessor( FLAGS.max_length) x = np.array(list(vocab_processor.fit_transform(x_text))) print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) elif FLAGS.embedding_type == "none-static": x, w2v = [], KeyedVectors.load_word2vec_format(FLAGS.word2vec_model, binary=False) vocab, embeddings = w2v.vocab, np.zeros( (len(w2v.index2word), w2v.vector_size), dtype=np.float32) for k, v in vocab.items(): embeddings[v.index] = w2v[k] for item in x_text: x.append([ w2v.vocab[word].index if word in w2v.vocab else w2v.vocab["__UNK__"].index for word in item.split(" ") ]) x = np.array(x, dtype=np.int32) print("Vocabulary Size: {:d}".format(len(w2v.vocab))) else: raise RuntimeError("embedding_type is random or none-static") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if FLAGS.embedding_type == "random": cnn = TextCNN(sequence_length=FLAGS.max_length, num_classes=FLAGS.num_class, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.embedding_type == "none-static": cnn = TextCNN(sequence_length=FLAGS.max_length, num_classes=FLAGS.num_class, embedding=embeddings, embedding_size=embeddings.shape[1], filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) lr = tf.train.exponential_decay(FLAGS.learning_rate, global_step, 2500, 0.8, staircase=True) optimizer = tf.train.GradientDescentOptimizer(lr) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints, save_relative_paths=True) # Write vocabulary if FLAGS.embedding_type == "random": vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 0.5 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) return step, loss, accuracy # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... best_dev_loss = 9999999 for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluating dev set:") _ , dev_loss, _ = dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") ## early-stopping if (dev_loss < best_dev_loss):
def initiate(args): # define output directory time_str = datetime.datetime.now().isoformat() out_dir = os.path.abspath(os.path.join(os.path.curdir, args.save_dir, time_str)) if not os.path.exists(out_dir): os.makedirs(out_dir) # initiate logger log_file_path = os.path.join(out_dir, 'log') logger = Logger(log_file_path) analysis_file_path = os.path.join(out_dir, 'analysis') analysis_logger = Logger(analysis_file_path) # report parameters logger.write("\nParameters:") for arg in args.__dict__: logger.write("{}={}".format(arg.upper(), args.__dict__[arg])) logger.write("") # load data logger.write("Loading data...") if args.data == 'gameforum': x_train, y_train, x_dev, y_dev, vocabulary, vocabulary_inv, vocabulary_embedding = data_helpers.load_data_gameforum_only(args.use_pretrained_embedding); elif args.data == 'semeval': x_train, y_train, x_dev, y_dev, vocabulary, vocabulary_inv, vocabulary_embedding = data_helpers.load_data_semeval_only(args.use_pretrained_embedding) else: x_train, y_train, x_dev, y_dev, vocabulary, vocabulary_inv, vocabulary_embedding = data_helpers.load_data(args.use_pretrained_embedding) num_classes = len(y_train[0]) # report logger.write("Vocabulary Size: {:d}".format(len(vocabulary))) logger.write("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # fill out missing arg values args.seq_length = x_train.shape[1] args.vocab_size = len(vocabulary) args.filter_sizes = map(int, args.filter_sizes.split(",")) args.vocabulary_embedding = vocabulary_embedding args.num_classes = num_classes # initialize a model if args.model == 'deep': model = DeepCNN(args) elif args.model == 'basic': model = BasicCNN(args) else: logger.write("Invalid model") sys.exit() # for train summary grad_summaries = [] for g, v in model.grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) loss_summary = tf.scalar_summary("loss", model.loss) acc_summary = tf.scalar_summary("accuracy", model.accuracy) train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") # prepare saver checkpoint_dir = os.path.join(out_dir, 'checkpoints') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, "model") saver = tf.train.Saver(tf.all_variables()) # generate batches batches = data_helpers.batch_iter(x_train, y_train, args.batch_size, args.num_epochs) # define train / test methods def train_model(x, y, dropout_prob, writer, log=False): feed_dict = { model.input_x: x, model.input_y: y, model.dropout_keep_prob: dropout_prob } _, step, loss, accuracy, summaries = sess.run( [model.train_op, model.global_step, model.loss, model.accuracy, train_summary_op], feed_dict) sess.run(model.weight_rescaling_op) # l2 norm rescaling writer.add_summary(summaries, step) if log: time_str = datetime.datetime.now().isoformat() logger.write("{}: step {}, loss {:g}, acc {:g}".format(time_str, step-1, loss, accuracy)) def test_model(x, y): logger.write("\nEvaluate:") feed_dict = { model.input_x: x, model.input_y: y, model.dropout_keep_prob: 1.0 } step, loss, accuracy, predictions, targets = sess.run( [model.global_step, model.loss, model.accuracy, model.predictions, model.targets], feed_dict) time_str = datetime.datetime.now().isoformat() logger.write("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) logger.write("") return accuracy, predictions, targets # start a session sess_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) sess = tf.Session(config=sess_conf) with sess.as_default(): # initialize tf.initialize_all_variables().run() train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph_def) current_step = 0 if args.train: # train the model from scratch best_test_accuracy = 0.0 for x_batch, y_batch in batches: # train train_model(x_batch, y_batch, args.dropout_keep_prob, train_summary_writer, current_step % (args.evaluate_every/4) == 0) current_step = tf.train.global_step(sess, model.global_step) # evaluate with dev set if current_step % args.evaluate_every == 0: accuracy, predictions, targets = test_model(x_dev, y_dev) # Conduct analysis if the current model is the best so far if accuracy > best_test_accuracy: best_test_accuracy = accuracy analysis_logger.write("Analysis at {}: acc={}".format(current_step, accuracy), begin=True) analysis_logger.write("Tweet\tPred\tTrue (0=Positive, 1=Neutral, 2=Negative)") for i in range(len(x_dev)): tweet_idx = x_dev[i] prediction, true_label = predictions[i], targets[i] try: tweet = " ".join([vocabulary_inv[word_idx] for word_idx in tweet_idx if word_idx != 0]) analysis_logger.write("{}\t{}\t{}".format(tweet, prediction, true_label)) except UnicodeEncodeError: analysis_logger.write("{}\t{}\t{}".format("ENCODING ERROR", prediction, true_label)) analysis_logger.write("\n") # save model if current_step % args.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.write("Saved model checkpoint to {}\n".format(path)) else: # load the model logger.write("Loading the model...")
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # Print accuracy if y_test is defined d = ['all', 'an', 'ben', 'call', 'claims', 'hr']
def My_main(): x_train, x_dev, y_train, y_dev, seq_max_len, vocabulary, vocabulary_inv, word2vec_vocab, word2vec_vec = read_from_dataset( FLAGS.input_dataset_path, FLAGS.word2vec_model_path, FLAGS.n_classes, FLAGS.max_seq_len_cutoff) ############################## Variable Definition ############################################## input_x = tf.placeholder(tf.float32, [None, seq_max_len, FLAGS.embedding_dim], name="input_x") input_y = tf.placeholder(tf.float32, [None, FLAGS.n_classes], name='input_y') seq_len_list = tf.placeholder(tf.int64, [None], name='seq_len_list') dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") weights = { 'W_out': tf.Variable(tf.random_normal([2 * FLAGS.n_hidden, FLAGS.n_classes]), name='W_out') } biases = { 'B_out': tf.Variable(tf.random_normal([FLAGS.n_classes]), name='B_out') } with tf.name_scope('Model'): myLSTM_outputs, my_variables_value = LSTM_class.dynamic_bidirectional_LSTM(input_x, seq_len_list, FLAGS.n_hidden, weights, biases, dropout_keep_prob) with tf.name_scope('Predict'): predictions = tf.argmax(myLSTM_outputs, 1, name='predictions') with tf.name_scope("loss"): weight_amount = tf.nn.l2_loss(weights['W_out']) + tf.nn.l2_loss(biases['B_out']) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=myLSTM_outputs, labels=input_y)) \ + FLAGS.l2_reg_lambda * weight_amount with tf.name_scope('Accuracy'): correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1), name='correct_prediction') accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy') with tf.name_scope('Optimizer'): optimize = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate).minimize(loss) # Adam Optimizer ################################################################################################## ################################# make summary ################################################### out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs")) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "mymodel") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(max_to_keep=50) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", loss) acc_summary = tf.summary.scalar("accuracy", accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") all_summary_dir = os.path.join(out_dir, "summaries", "all_summaries") for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) # Summarize all gradients # Merge all summaries into a single op merged_summary_op = tf.summary.merge_all() ############################# end summary ######################################################## ################################################################################################## if (FLAGS.is_load_model == True): checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) print("starting from check point...") init = tf.global_variables_initializer() with tf.Session() as sess: my_train_step = 0 my_dev_step = 0 sess.run(init) if (FLAGS.is_load_model == True): print("loading from checkpoint...") load_path = saver.restore(sess, checkpoint_file) print("model loaded from checkpoint.") # op to write logs to Tensorboard all_summary_writer = tf.summary.FileWriter(all_summary_dir, graph=tf.get_default_graph()) train_summary_writer = tf.summary.FileWriter(train_summary_dir, graph=tf.get_default_graph()) dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, graph=tf.get_default_graph()) def dev_step(x_dev, y_dev, test_set="dev", writer=None): dev_baches = data_helpers.batch_iter(list(zip(x_dev, y_dev)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec, is_shuffle=False) total_loss = 0 total_acc = 0 index = 0 total_correct_predictions = 0 total_dev_data = 0 for batch in dev_baches: if (len(batch[0]) == 0): continue x_batch, y_batch = zip(*batch[0]) batch_seq_len = batch[1] total_dev_data += len(x_batch) myfeed_dict = { input_x: x_batch, input_y: y_batch, seq_len_list: batch_seq_len, # myLSTM.istate: np.zeros((FLAGS.batch_size, 2 * FLAGS.n_hidden)), dropout_keep_prob: 1 } acc, cost, correct_predict, summaries = sess.run( [accuracy, loss, correct_predictions, dev_summary_op], feed_dict=myfeed_dict) if (test_set == "dev"): dev_summary_writer.add_summary(summaries, index + my_dev_step * 250) print("on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:g}".format(test_set, index, cost, acc)) destfile.write( "on {}, test index: {:g}, Minibatch Loss: {:.6f}, acc: {:g}\n".format(test_set, index, cost, acc)) destfile.write('\n') total_loss += cost total_acc += acc index += 1 total_correct_predictions += np.sum(correct_predict) print("#################################################################\n") destfile.write("####################################################################\n\n") avg_loss = total_loss / index avg_acc = total_acc / index real_acc = (total_correct_predictions * 1.0) / (total_dev_data) print("on {}, avarage_Loss: {:g}, avarage_acc: {:.6f}, real_acc: {:g}\n".format(test_set, avg_loss, avg_acc, real_acc)) destfile.write( "on {}, avarage_Loss: {:g}, avarage_acc: {:.6f}, real_acc: {:g}\n\n".format(test_set, avg_loss, avg_acc, real_acc)) return avg_loss, real_acc def train_step(x_batch, y_batch, batch_seq_len, my_train_step, epoch_num): myfeed_dict = { input_x: x_batch, input_y: y_batch, seq_len_list: batch_seq_len, dropout_keep_prob: FLAGS.dropout_keep_prob } _, cost, acc, currect_predict, weigth_norm, summaries, all_summaries, myvars = sess.run( [optimize, loss, accuracy, correct_predictions, weight_amount, train_summary_op, merged_summary_op, my_variables_value], feed_dict=myfeed_dict) train_summary_writer.add_summary(summaries, my_train_step) all_summary_writer.add_summary(all_summaries, my_train_step) weigth_norm = weigth_norm * FLAGS.l2_reg_lambda print("epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}".format(epoch_num, my_train_step, weigth_norm, cost, acc)) destfile.write( "epoch: {:g}, iteration: {:g}, weigth_norm: {:.6f}, loss: {:.4f}, acc: {:.4f}\n".format(epoch_num, my_train_step, weigth_norm, cost, acc)) train_loss_list = [] train_acc_list = [] test_loss_list = [] test_acc_list = [] for epoch in range(FLAGS.num_epochs): batches = data_helpers.batch_iter(list(zip(x_train, y_train)), batch_size=FLAGS.batch_size, seq_length=seq_max_len, emmbedding_size=FLAGS.embedding_dim, word2vec_vocab=word2vec_vocab, word2vec_vec=word2vec_vec) for batch in batches: my_train_step += 1 if (len(batch[0]) == 0): continue batch_xs, batch_ys = zip(*batch[0]) batch_seq_len = batch[1] # print(my_train_step) train_step(batch_xs, batch_ys, batch_seq_len, my_train_step, epoch) if ((epoch + 1) % FLAGS.checkpoint_every == 0): path = saver.save(sess, checkpoint_prefix, global_step=(epoch + 1)) print("Saved model checkpoint to {}\n".format(path)) if ((epoch + 1) % FLAGS.evaluate_every == 0): print("testing on dev set: ") destfile.write("testing on dev set:\n") avg_loss, real_acc = dev_step(x_dev, y_dev) test_loss_list.append(avg_loss) test_acc_list.append(real_acc) if (FLAGS.is_evaluation == True): my_dev_step += 1 print("###############################################") destfile.write("###############################################\n") print("testing on train set: ") destfile.write("testing on train set: \n") avg_loss, real_acc = dev_step(x_train, y_train, test_set="train") train_loss_list.append(avg_loss) train_acc_list.append(real_acc) path = saver.save(sess, checkpoint_prefix, global_step=50) print("Saved model checkpoint to {}\n".format(path)) print("Optimization Finished!") print("\nEvaluation:") dev_step(x_dev, y_dev) print("") return train_acc_list,train_loss_list,test_acc_list,test_loss_list
print('Test:') else: print('Dev') return y_pred, y_true #train loop best_accuracy = [0.0] * 2 best_step = [0] * 2 done = False #p, r, f = 0.0, 0.0, 0.0 for i in range(FLAGS.num_epochs): if done: break print('Episode: ', i) print('Training teacher...') batches_t = data_helpers.batch_iter( list(zip(train_x_t, train_label_t)), FLAGS.batch_size, 1) for batch in batches_t: x_batch, y_batch = zip(*batch) seq_len_batch = [int(len(x)) for x in x_batch] y_class_t = [0] * len(seq_len_batch) x_batch = np.asarray(x_batch, dtype=np.float32) current_step = train_step_teacher(x_batch, y_batch, y_class_t, seq_len_batch) if current_step % FLAGS.evaluate_every == 0: yp, yt = final_test_step_teacher(dev_x_t, dev_label_t, epoch=1, bigram=BI_GRAM) print('Teacher:') tmpacc = evaluate_word_PRF(yp, yt) if best_accuracy[0] < tmpacc:
cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0, cnn.pad: np.zeros([len(x_batch), 1, FLAGS.embedding_dim, 1]), cnn.ind: range(len(x_batch)) # doesn't matter } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) print("VALID step {}, loss {:g}, acc {:g}".format(step, loss, accuracy)) if writer: writer.add_summary(summaries, step) return accuracy, loss # Generate batches batches = data_helpers.batch_iter( zip(x_train, y_train, range(x_train.shape[0])), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... max_acc = 0 best_at_step = 0 for batch in batches: x_batch, y_batch, ind_batch = zip(*batch) train_step(x_batch, y_batch, ind_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: acc, loss = dev_step(x_dev, y_dev, writer=dev_summary_writer) if acc >= max_acc: if acc >= max_acc: max_acc = acc best_at_step = current_step path = saver.save(sess, checkpoint_prefix, global_step=current_step) if current_step % FLAGS.checkpoint_every == 0:
cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, test_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) print("generate batches...") # Generate batches batches = dh.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: print("inside loop...") # print('batch',batch[1,1]) x_batch, y_batch = zip(*batch) # print('x_batch, y_batch',x_batch, y_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("inside if1...") print("\nEvaluation:") test_step(x_test, y_test, writer=test_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: print("inside if2...")
allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Загрузка сохраненного мета графа и восстановление переменных saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Получаем метки из графа по имени input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Тензор, который мы хотим оценить predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Генерируем пакеты для одной попытки batches = data_helpers.batch_iter(x_test, FLAGS.batch_size, 1, shuffle=False) # Сбор прогнозов all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) # Вывести точность correct_predictions = float(sum(all_predictions == y_test)) print("Общее число тестовых примеров: {}".format(len(y_test))) print("Точность: {:g}".format(correct_predictions/float(len(y_test))))
def evalNewData(): with tf.device('/gpu:0'): x_text = data_helpers.load_data(FLAGS.test_path) text_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab") text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( text_path) x = np.array(list(text_vocab_processor.transform(x_text))) checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_text = graph.get_operation_by_name("input_text").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] emb_dropout_keep_prob = graph.get_operation_by_name( "emb_dropout_keep_prob").outputs[0] rnn_dropout_keep_prob = graph.get_operation_by_name( "rnn_dropout_keep_prob").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers.batch_iter(list(x), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here preds = [] for x_batch in batches: pred = sess.run( predictions, { input_text: x_batch, emb_dropout_keep_prob: 1.0, rnn_dropout_keep_prob: 1.0, dropout_keep_prob: 1.0 }) preds.append(pred) preds = np.concatenate(preds) with open(os.path.join("predict_result", "1_300_model_1572577057_1105.txt"), 'a', encoding="utf-8") as resultFile: for i in range(0, len(x_text)): resultFile.write(x_text[i] + "\n") resultFile.write(str(preds[i] + 1) + "\n\n")
# Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] phase_train = graph.get_operation_by_name("phase_train").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] scores = graph.get_operation_by_name("output/scores").outputs[0] # scores = tf.nn.softmax(scores) # Generate batches for one epoch batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] all_scores = [] for x_test_batch in batches: batch_predictions, batch_scores = sess.run([predictions, scores], { input_x: x_test_batch, dropout_keep_prob: 1.0, phase_train: False }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) all_scores.extend(batch_scores)
def train(x_train, y_train, x_dev, y_dev, feature_dim_dict, hidden_size): with tf.Graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): dcn = deep_cross_network(feature_dim_dict=feature_dim_dict, hidden_size=hidden_size) # Define training procedures global_step = tf.Variable(0, name="global_step", trainable=True) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(dcn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy log_loss_summary = tf.summary.scalar("loss", dcn.loss) # Train Summaries train_summary_op = tf.summary.merge( [log_loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([log_loss_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) sess.run(tf.global_variables_initializer()) def compute_sample_weight(labels, class_weight=None, sample_weight=None): if class_weight is None and sample_weight is None: return np.ones(len(labels)) sample_weight = np.array(labels) for label, weight in class_weight.items(): sample_weight[sample_weight == label] = weight return sample_weight def train_step(x_batch, y_batch, class_weight=None, sample_weight=None): """ :param x_batch: :param y_batch: :param class_weight: :param sample_weight: :return: """ feed_dict = { dcn.input_x: x_batch, dcn.input_y: y_batch, dcn.dropout_keep_prob: 0.5, dcn.sample_weight: compute_sample_weight(y_batch, class_weight, sample_weight) } _, step, summaries, train_loss, logits = sess.run([ train_op, global_step, train_summary_op, dcn.loss, dcn.logit ], feed_dict) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, class_weight=None, sample_weight=None, writer=None): """ :param x_batch: :param y_batch: :param class_weight: :param sample_weight: :param writer :return: """ feed_dict = { dcn.input_x: x_batch, dcn.input_y: y_batch, dcn.dropout_keep_prob: 1.0, dcn.sample_weight: compute_sample_weight(y_batch, class_weight, sample_weight) } step, summaries, loss = sess.run( [global_step, dev_summary_op, dcn.loss], feed_dict) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop, for each epoch for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % 1000 == 0: print("\nEvaluation:") dev_step(x_dev, y_dev) print("") if current_step % 1000 == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("saved model checkpoints to {}\n".format(path))
def train(): # parse arguments FLAGS(sys.argv) print(FLAGS.batch_size) # This is not working any more, because api has been changed!!! print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file) # Build vocabulary max_document_length = max([len(x.split(" ")) for x in x_text]) vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): sequence_length = x_train.shape[1] num_classes = y_train.shape[1] input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) with tf.Session(config=session_conf) as sess: cnn_text = TextModel( input_x, input_y, max_sequence_len=sequence_length, num_classes=num_classes, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) prediction, loss, optimize, accuracy = cnn_text.get_model_variables() # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", loss) acc_summary = tf.summary.scalar("accuracy", accuracy) # train summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # eval summaries eval_summary_op = tf.summary.merge([loss_summary, acc_summary]) eval_summary_dir = os.path.join(out_dir, "summaries", "eval") eval_summary_writer = tf.summary.FileWriter(eval_summary_dir, sess.graph) # checkpoint directory checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") # tensorflow assumes this directory already exists, so we need to create it if it not exists if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # initialize all variables init_g = tf.global_variables_initializer() init_l = tf.local_variables_initializer() sess.run(init_l) sess.run(init_g) def train_step(x_batch, y_batch): """ train_step """ feed_dict = { cnn_text.data: x_batch, cnn_text.target: y_batch, cnn_text.dropout_keep_prob: FLAGS.dropout_keep_prob } _, summaries, train_loss, train_accuracy = sess.run( [optimize, train_summary_op, loss, accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() current_step = tf.train.global_step(sess, cnn_text.global_step) print("{0}: step {1}, loss {2:g}, acc {3:g}".format(time_str, current_step, train_loss, train_accuracy)) train_summary_writer.add_summary(summaries) def eval_step(x_batch, y_batch): """ eval_step """ feed_dict = { cnn_text.data: x_batch, cnn_text.target: y_batch, cnn_text.dropout_keep_prob: 1.0 } step, summaries, eval_loss, eval_accuracy = sess.run( [cnn_text.global_step, eval_summary_op, loss, accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("evaluation {0}: step {1}, loss {2:g}, acc {3:g}".format(time_str, step, eval_loss, eval_accuracy)) eval_summary_writer.add_summary(summaries) # generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # training loop, for each batch ... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, cnn_text.global_step) if 0 == current_step % FLAGS.evaluate_every: eval_step(x_dev, y_dev) if 0 == current_step % FLAGS.checkpoint_every: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("saved model checkpoint to {0}".format(path))
""" feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def __init__(self): x_text, y = data_helpers.load_data_and_labels("./train/pos","./train/neg") # Build vocabulary x_list = [x.split(" ") for x in x_text] vocab_processor = data_helpers.n_grams(x_list, max_word_cnt, n_gram) print 'feed finished' x = np.array(data_helpers.fit_transform(vocab_processor, x_list, max_document_length, n_gram)) # print x[0] print 'fit transform finished' # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:] y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:] print("Vocabulary Size: {:d}".format(len(vocab_processor))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = MLP( sequence_length=x_train.shape[1], num_classes=2, vocab_size=len(vocab_processor), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters = FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) starter_learning_rate = 1e-3 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 3000, 0.96, staircase=True) optimizer = tf.train.AdamOptimizer(starter_learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) self.out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(self.out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(self.out_dir, "summaries", "train") train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.merge_summary([loss_summary, acc_summary]) dev_summary_dir = os.path.join(self.out_dir, "summaries", "dev") dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it self.checkpoint_dir = os.path.abspath(os.path.join(self.out_dir, "checkpoints")) checkpoint_prefix = os.path.join(self.checkpoint_dir, "model") if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # Write vocabulary pickle.dump(vocab_processor, open(os.path.join(self.out_dir,"vocab"), "wb" ) ) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob:1 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches t = list(zip(x_train, y_train)) batches = data_helpers.batch_iter( t, FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("step {}: accuracy {:g}".format(step, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) #len(batches) # Training loop. For each batch... i=1 for batch in batches: i+=1 x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) #
def train(x_train, y_train, word_id_dict, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(FLAGS.flag_values_dict()) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) # lr decay decayed_lr = tf.train.exponential_decay(FLAGS.lr, global_step, 1000, FLAGS.lr_decay, staircase=True) optimizer = tf.train.AdadeltaOptimizer(decayed_lr) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", "text_cnn_multichannel_MR_3,4,5,6")) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary, configuration with smart_open.smart_open(os.path.join(out_dir, "vocab"), 'wb') as f: pickle.dump(word_id_dict, f) with smart_open.smart_open(os.path.join(out_dir, "config"), 'wb') as f: pickle.dump(FLAGS.flag_values_dict(), f) # Initialize all variables sess.run(tf.global_variables_initializer()) if FLAGS.word2vec: # word2vec 활용 시 print("Loading W2V data...") pre_emb = KeyedVectors.load_word2vec_format( FLAGS.word2vec, binary=True) #pre-trained word2vec load pre_emb.init_sims(replace=True) num_keys = len(pre_emb.vocab) print("loaded word2vec len ", num_keys) # initial matrix with random uniform, pretrained word2vec으로 vocabulary 내 단어들을 초기화하기 위핸 weight matrix 초기화 initW = np.random.uniform( -0.25, 0.25, (FLAGS.vocab_size, FLAGS.embedding_dim)) # load any vectors from the word2vec print("init initW cnn.W in FLAG") for w in word_id_dict.keys(): arr = [] s = re.sub('[^0-9a-zA-Z]+', '', w) if w in pre_emb: # 직접 구축한 vocab 내 단어가 google word2vec에 존재하면 arr = pre_emb[w] # word2vec vector를 가져옴 elif w.lower() in pre_emb: # 소문자로도 확인 arr = pre_emb[w.lower()] elif s in pre_emb: # 전처리 후 확인 arr = pre_emb[s] elif s.isdigit(): # 숫자이면 arr = pre_emb['1'] if len(arr ) > 0: # 직접 구축한 vocab 내 단어가 google word2vec에 존재하면 idx = word_id_dict[w] # 단어 index initW[idx] = np.asarray(arr).astype( np.float32) # 적절한 index에 word2vec word 할당 print("assigning initW to cnn. len=" + str(len(initW))) sess.run(cnn.W.assign(initW)) # initW를 cnn.W에 할당 if FLAGS.is_multi_channel: print("assigning initW to cnn.2 len=" + str(len(initW))) sess.run(cnn.W2.assign(initW)) def train_step(x_batch, y_batch): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, lr, summaries, loss, accuracy = sess.run([ train_op, global_step, decayed_lr, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, lr{:g}, acc {:g}".format( time_str, step, loss, lr, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) return accuracy # Generate batches batches = dh.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... max = 0 for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") accuracy = dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if accuracy > max: max = accuracy path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))