Ejemplo n.º 1
0
def dev_step(sess, model, target_loss_weight):
    results = defaultdict(list)
    num_test = 0
    num_correct = 0.0
    test_batches = data_helpers.batch_iter(valid_dataset,
                                           FLAGS.batch_size,
                                           1,
                                           target_loss_weight,
                                           FLAGS.max_sequence_length,
                                           charVocab,
                                           FLAGS.max_word_length,
                                           shuffle=True)
    for test_batch in test_batches:
        x_question, x_answer, x_question_len, x_answer_len, x_target, x_target_weight, id_pairs, x_q_char, x_q_len, x_a_char, x_a_len = test_batch
        feed_dict = {
            model.question: x_question,
            model.answer: x_answer,
            model.question_len: x_question_len,
            model.answer_len: x_answer_len,
            model.target: x_target,
            model.target_loss_weight: x_target_weight,
            model.dropout_keep_prob: 1.0,
            model.q_charVec: x_q_char,
            model.q_charLen: x_q_len,
            model.a_charVec: x_a_char,
            model.a_charLen: x_a_len
        }
        batch_accuracy, predicted_prob = sess.run(
            [model.accuracy, model.probs], feed_dict)
        num_test += len(predicted_prob)
        if num_test % 1000 == 0:
            print(num_test)

        num_correct += len(predicted_prob) * batch_accuracy
        for i, prob_score in enumerate(predicted_prob):
            question_id, answer_id, label = id_pairs[i]
            results[question_id].append((answer_id, label, prob_score))

    #calculate top-1 precision
    print('num_test_samples: {}  test_accuracy: {}'.format(
        num_test, num_correct / num_test))
    accu, precision, recall, f1, loss = metrics.classification_metrics(results)
    print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(
        accu, precision, recall, f1, loss))

    mrr = metrics.mean_reciprocal_rank(results)
    top_1_precision = metrics.top_k_precision(results, k=1)
    top_2_precision = metrics.top_k_precision(results, k=2)
    top_5_precision = metrics.top_k_precision(results, k=5)
    top_10_precision = metrics.top_k_precision(results, k=10)
    total_valid_query = metrics.get_num_valid_query(results)

    print(
        'MRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}\n'.
        format(mrr, top_1_precision, total_valid_query))
    print('Top-2 precision: {}'.format(top_2_precision))
    print('Top-5 precision: {}'.format(top_5_precision))
    print('Top-10 precision: {}'.format(top_10_precision))

    return mrr
Ejemplo n.º 2
0
            def dev_step():
                results = defaultdict(list)
                num_test = 0
                num_correct = 0.0
                test_batches = data_helpers.batch_iter(test_dataset,
                                                       FLAGS.batch_size,
                                                       1,
                                                       target_loss_weight,
                                                       idf,
                                                       SEQ_LEN,
                                                       charVocab,
                                                       FLAGS.max_word_length,
                                                       shuffle=True)
                for test_batch in test_batches:
                    x_question, x_answer, x_question_len, x_answer_len, x_target, x_target_weight, id_pairs, extra_feature, q_feature, a_feature, x_q_char, x_q_len, x_a_char, x_a_len = test_batch
                    feed_dict = {
                        esim.question: x_question,
                        esim.answer: x_answer,
                        esim.question_len: x_question_len,
                        esim.answer_len: x_answer_len,
                        esim.target: x_target,
                        esim.target_loss_weight: x_target_weight,
                        esim.dropout_keep_prob: 1.0,
                        esim.extra_feature: extra_feature,
                        esim.q_word_feature: q_feature,
                        esim.a_word_feature: a_feature,
                        esim.q_charVec: x_q_char,
                        esim.q_charLen: x_q_len,
                        esim.a_charVec: x_a_char,
                        esim.a_charLen: x_a_len
                    }
                    batch_accuracy, predicted_prob = sess.run(
                        [esim.accuracy, esim.probs], feed_dict)
                    num_test += len(predicted_prob)
                    if num_test % 1000 == 0:
                        print(num_test)

                    num_correct += len(predicted_prob) * batch_accuracy
                    for i, prob_score in enumerate(predicted_prob):
                        question_id, answer_id, label = id_pairs[i]
                        results[question_id].append(
                            (answer_id, label, prob_score))

                #calculate top-1 precision
                print('num_test_samples: {}  test_accuracy: {}'.format(
                    num_test, num_correct / num_test))
                accu, precision, recall, f1, loss = metrics.classification_metrics(
                    results)
                print(
                    'Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                    format(accu, precision, recall, f1, loss))

                mvp = metrics.mean_average_precision(results)
                mrr = metrics.mean_reciprocal_rank(results)
                top_1_precision = metrics.top_1_precision(results)
                total_valid_query = metrics.get_num_valid_query(results)
                print(
                    'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                    .format(mvp, mrr, top_1_precision, total_valid_query))

                return mrr
Ejemplo n.º 3
0
                q_char_len: x_q_len,
                a_char_feature: x_a_char,
                a_char_len: x_a_len
            }
            predicted_prob = sess.run(prob, feed_dict)
            num_test += len(predicted_prob)
            print('num_test_sample={}'.format(num_test))
            for i, prob_score in enumerate(predicted_prob):
                qid, aid, label = batch_id_pairs[i]
                results[qid].append((aid, label, prob_score))

accu, precision, recall, f1, loss = metrics.classification_metrics(results)
print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(
    accu, precision, recall, f1, loss))

mrr = metrics.mean_reciprocal_rank(results)
top_1_precision = metrics.top_k_precision(results, k=1)
top_2_precision = metrics.top_k_precision(results, k=2)
top_5_precision = metrics.top_k_precision(results, k=5)
top_10_precision = metrics.top_k_precision(results, k=10)
total_valid_query = metrics.get_num_valid_query(results)
print('MRR (mean reciprocal rank): {}\tTop-1 recall: {}\tNum_query: {}'.format(
    mrr, top_1_precision, total_valid_query))
print('Top-2 recall: {}'.format(top_2_precision))
print('Top-5 recall: {}'.format(top_5_precision))
print('Top-10 recall: {}'.format(top_10_precision))

out_path = FLAGS.output_file
print("Saving evaluation to {}".format(out_path))
with open(out_path, 'w') as f:
    f.write("query_id\tdocument_id\tscore\trank\trelevance\n")