"dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch #x_batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) #m_batches = data_helpers.batch_iter3(list(m), FLAGS.batch_size, 1, shuffle=False) #x_batches, m_batches = data_helpers.batch_iter2(list(x_test), list(m), FLAGS.batch_size, 1, shuffle=False) #m_batches = data_helpers.batch_iter2(list(m), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch, m_test_batch in data_helpers.batch_iter2( list(x_test), list(m), FLAGS.batch_size, 1, shuffle=False): batch_predictions = sess.run( predictions, { input_x: x_test_batch, input_m: m_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # Print accuracy if y_test is defined if y_test is not None: correct_predictions = float(sum(all_predictions == y_test)) print("Total number of test examples: {}".format(len(y_test))) print("Accuracy: {:g}".format(correct_predictions / float(len(y_test))))
def train(Train, Test, word_embedding): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): model = eval(use_model)( sequence_length=Train['x'].shape[1], target_sequence_length=Train['T'].shape[1], targets_num_max=Train['Ts'].shape[1], num_classes=Train['y'].shape[1], word_embedding_dim=FLAGS.word_embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda) writer = tf.summary.FileWriter("logs/LSTM_GCN3", sess.graph) vs = tf.trainable_variables() print('There are %d train_able_variables in the Graph: ' % len(vs)) for v in vs: print(v) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", use_data, use_model)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Test summaries test_summary_op = tf.summary.merge([loss_summary, acc_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.summary.FileWriter( test_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # else: # raise Exception('The checkpoint_dir already exists:',checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary # vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch): """ A single training step """ feed_dict = { model.input_x: x_batch, model.input_target: T_batch, model.input_targets_all: Ts_batch, model.sen_len: x_len_batch, model.target_len: T_len_batch, model.targets_all_len_a: Ts_len_batch, model.relate_self: R_Self_batch, model.relate_cross: R_Cross_batxh, model.target_which: T_W_batch, model.target_position: T_P_batch, model.targets_all_position_a: Ts_P_batch, model.input_y: y_batch, model.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, model.loss, model.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def test_step(x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch, summary=None, writer=None): """ Evaluates model on a dev set """ feed_dict = { model.input_x: x_batch, model.input_target: T_batch, model.input_targets_all: Ts_batch, model.sen_len: x_len_batch, model.target_len: T_len_batch, model.targets_all_len_a: Ts_len_batch, model.relate_self: R_Self_batch, model.relate_cross: R_Cross_batxh, model.target_which: T_W_batch, model.target_position: T_P_batch, model.targets_all_position_a: Ts_P_batch, model.input_y: y_batch, model.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, softmax, true_y, predictions = sess.run( [ global_step, summary, model.loss, model.accuracy, model.softmax, model.true_y, model.predictions ], feed_dict) if writer: writer.add_summary(summaries, step) return step, loss, true_y, predictions # Generate batches batches = data_helpers.batch_iter( list( zip(Train['x'], Train['T'], Train['Ts'], Train['x_len'], Train['T_len'], Train['Ts_len'], Train['R_Self'], Train['R_Cross'], Train['T_W'], Train['T_P'], Train['Ts_P'], Train['y'])), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... train_acc, dev_acc, test_acc, train_all_softmax, test_all_softmax = [], [], [], [], [] max_test_acc = 0 max_test_F1_macro = 0 for batch in batches: x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch = zip( *batch) train_step(x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print('\nBy now ,the max test acc is: ', max_test_acc) print(' the max F1 score is: ', max_test_F1_macro) print("\nEvaluation Text:") loss = 0 true_y = np.array([]) predictions = np.array([]) batches_test = data_helpers.batch_iter2(list( zip(Test['x'], Test['T'], Test['Ts'], Test['x_len'], Test['T_len'], Test['Ts_len'], Test['R_Self'], Test['R_Cross'], Test['T_W'], Test['T_P'], Test['Ts_P'], Test['y'])), 256, 1, shuffle=False) for batch_test in batches_test: x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch = zip( *batch_test) step_i, loss_i, true_y_i, predictions_i = test_step( x_batch, T_batch, Ts_batch, x_len_batch, T_len_batch, Ts_len_batch, R_Self_batch, R_Cross_batxh, T_W_batch, T_P_batch, Ts_P_batch, y_batch, summary=test_summary_op, writer=test_summary_writer) loss += loss_i true_y = np.concatenate([true_y, true_y_i]) predictions = np.concatenate( [predictions, predictions_i]) accuracy = metrics.accuracy_score(true_y, predictions) test_F1_i = metrics.f1_score(true_y, predictions, average='macro') time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}, F1 {:g}".format( time_str, current_step, loss, accuracy, test_F1_i)) test_acc_i = accuracy test_acc.append(test_acc_i) print( '----------------------------------------------------------' ) print("") if current_step % FLAGS.checkpoint_every == 0: if test_acc_i > max_test_acc: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) print('->>>>>>>>>>>>>>>>>>>>>>>') max_test_step = current_step max_test_acc = test_acc_i if test_F1_i > max_test_F1_macro: max_test_F1_macro = test_F1_i print('max_test_step: ', max_test_step) print('max_test_acc: ', max_test_acc) print('max_test_F1_macro: ', max_test_F1_macro) return train_acc, dev_acc, max_test_acc, max_test_F1_macro, max_test_step, train_all_softmax, test_all_softmax
def classify_query(self, query, passage): with self.graph.as_default(): #pdb.set_trace() if (self.cuda): #print('(DA) use cuda') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=0.2) self.session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) else: #print('(DA) do not use cuda') os.environ['CUDA_VISIBLE_DEVICES'] = '' self.session_conf = tf.ConfigProto(allow_soft_placement=True, device_count={'GPU': 0}) #pdb.set_trace() self.sess = tf.Session( config=self.session_conf) # after this, the model is on GPU #print('4') m_a = [] with self.sess.as_default(): # Transform data x_test = np.array(list(self.vocab_processor.transform([query ]))) m_data = data_helpers.checkMatch(passage, query) if np.size(x_test[0]) < len(m_data): m_temp = m_data[0:np.size(x_test[0])] else: m_zero = np.zeros((np.size(x_test[0]) - len(m_data)), dtype=np.float) m_temp = np.concatenate([m_data, m_zero], 0) m_a.append(m_temp) m = np.array(m_a) # Load the saved meta graph and restore variables #pdb.set_trace() saver = tf.train.import_meta_graph("{}.meta".format( self.checkpoint_file)) saver.restore(self.sess, self.checkpoint_file) # Get the placeholders from the graph by name input_x = self.graph.get_operation_by_name( "input_x").outputs[0] input_m = self.graph.get_operation_by_name( "input_m").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = self.graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate #print('before predictions') predictions = self.graph.get_operation_by_name( "output/predictions").outputs[0] #print('after predictions') # Generate batches for one epoch #batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) #x_batches, m_batches = data_helpers.batch_iter2(list(x_test), list(m), 1, 1, shuffle=False) # Collect the predictions here all_predictions = [] #for x_test_batch, m_test_batch in zip(x_batches, m_batches): for x_test_batch, m_test_batch in data_helpers.batch_iter2( list(x_test), list(m), 1, 1, shuffle=False): #print('before sess.run') batch_predictions = self.sess.run( predictions, { input_x: x_test_batch, input_m: m_test_batch, dropout_keep_prob: 1.0 }) #print('after sess.run') all_predictions = np.concatenate( [all_predictions, batch_predictions]) return int(all_predictions[0])