def evaluate(q1_dev, q2_dev, y_dev, sess, model): """ Evaluate model on a dev set :param q1_dev: :param q2_dev: :param y_dev: :param sess: :return: """ data_len = len(y_dev) batch_eval = batch_iter_per_epoch(q1_dev, q2_dev, y_dev) total_loss = 0.0 total_acc = 0.0 for q1_batch_eval, q2_batch_eval, y_batch_eval in batch_eval: batch_len = len(y_batch_eval) feed_dict = feed_data(q1_batch_eval, q2_batch_eval, y_batch_eval, keep_prob=1.0, model=model) loss, accuracy = sess.run([model.loss, model.accuracy], feed_dict) total_loss += loss * batch_len total_acc += accuracy * batch_len return total_loss / data_len, total_acc / data_len
def predict(): print('Loading test data ...') start_time = time.time() q1_train, q2_train, y_train, q1_dev, q2_dev, y_dev, q1_test, q2_test, y_test, vocab_size = load_pkl_set( args.pkl_files) del q1_train, q2_train, y_train, q1_dev, q2_dev, y_dev # MVLSTM model init model = MVLSTM(sequence_length=args.max_q_len, num_classes=args.num_classes, embedding_dim=args.embedding_dim, vocab_size=vocab_size, max_length=args.max_q_len, hidden_dim=args.hidden_size, learning_rate=args.learning_rate) # q1_pad = np.array(list(vocab_processor.transform(q1_test))) # q2_pad = np.array(list(vocab_processor.transform(q2_test))) session = tf.Session() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(session, save_path=save_path) print('Testing ...') loss_test, acc_test = evaluate(q1_test, q2_test, y_test, session, model=model) print('Test loss:{0:>6.2}, Test acc:{1:7.2%}'.format(loss_test, acc_test)) test_batches = batch_iter_per_epoch(q1_test, q2_test, y_test, shuffle=False) all_predictions = [] all_predict_prob = [] count = 0 # concatenate第一次不能为空,需要一个判断来赋all_predict_prob for q1_test_batch, q2_test_batch, y_test_batch in test_batches: batch_predictions, batch_predict_probs = session.run( [model.y_pred, model.probs], feed_dict={ model.input_q1: q1_test_batch, model.input_q2: q2_test_batch, model.dropout_keep_prob: 1.0 }) all_predictions = np.concatenate([all_predictions, batch_predictions]) if count == 0: all_predict_prob = batch_predict_probs else: all_predict_prob = np.concatenate( [all_predict_prob, batch_predict_probs]) count = 1 y_test = [float(temp) for temp in y_test] # Evaluation indices print('Precision, Recall, F1-Score ...') print( metrics.classification_report(y_test, all_predictions, target_names=['not match', 'match'])) # Confusion Matrix print('Confusion Matrix ...') print(metrics.confusion_matrix(y_test, all_predictions)) # Write probability to csv out_dir = os.path.join(args.save_dir, 'predict_prob_csv') print('Saving evaluation to {0}'.format(out_dir)) with open(out_dir, 'w') as f: csv.writer(f).writerows(all_predict_prob) time_dif = get_time_dif(start_time) print('Time usage:', time_dif)
def train(): # Load data print('Loading data ...') start_time = time.time() q1_train, q2_train, y_train, q1_dev, q2_dev, y_dev, q1_test, q2_test, y_test, vocab_size = load_pkl_set( args.pkl_files) del q1_test, q2_test, y_test time_dif = get_time_dif(start_time) print('Time usage:', time_dif) print('Configuring TensorBoard and Saver ...') tensorboard_dir = args.tensorboard_dir if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) # MVLSTM model init model = MVLSTM(sequence_length=args.max_q_len, num_classes=args.num_classes, embedding_dim=args.embedding_dim, vocab_size=vocab_size, max_length=args.max_q_len, hidden_dim=args.hidden_size, learning_rate=args.learning_rate) tf.summary.scalar('loss', model.loss) tf.summary.scalar('accuracy', model.accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) # Configuring Saver saver = tf.train.Saver() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Create Session session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) print('Training and Deviation ...') start_time = time.time() total_batch = 0 best_acc_dev = 0.0 last_improved = 0 require_improvement = 30000 # Early stopping tag = False for epoch in range(args.epochs): print('Epoch:', epoch + 1) batch_train = batch_iter_per_epoch(q1_train, q2_train, y_train, args.batch_size) for q1_batch, q2_batch, y_batch in batch_train: feed_dict = feed_data(q1_batch, q2_batch, y_batch, args.dropout_keep_prob, model=model) if total_batch % args.checkpoint_every == 0: # write to tensorboard scalar summary = session.run(merged_summary, feed_dict) writer.add_summary(summary, total_batch) if total_batch % args.evaluate_every == 0: # print performance on train set and dev set feed_dict[model.dropout_keep_prob] = 1.0 loss_train, acc_train = session.run( [model.loss, model.accuracy], feed_dict=feed_dict) loss_dev, acc_dev = evaluate(q1_dev, q2_dev, y_dev, session, model=model) if acc_dev > best_acc_dev: # save best result best_acc_dev = acc_dev last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) print( 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:7.2%}, Val Loss: {3:>6.2}, Val Acc:' '{4:>7.2%}, Time:{5}{6}'.format(total_batch, loss_train, acc_train, loss_dev, acc_dev, time_dif, improved_str)) session.run(model.optim, feed_dict) total_batch += 1 if total_batch - last_improved > require_improvement: # having no improvement for a long time print('No optimization for a long time, auto-stopping ...') tag = True break if tag: # early stopping break