Exemple #1
0
        def test_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            test_batches = data_helpers.batch_iter(test_dataset,
                                                   FLAGS.batch_size,
                                                   1,
                                                   target_loss_weight,
                                                   FLAGS.max_utter_len,
                                                   FLAGS.max_utter_num,
                                                   FLAGS.max_response_len,
                                                   shuffle=False)
            for test_batch in test_batches:
                x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs = test_batch
                feed_dict = {
                    imn.utterances: x_utterances,
                    imn.response: x_response,
                    imn.utterances_len: x_utterances_len,
                    imn.response_len: x_response_len,
                    imn.utters_num: x_utters_num,
                    imn.target: x_target,
                    imn.target_loss_weight: x_target_weight,
                    imn.dropout_keep_prob: 1.0
                }
                batch_accuracy, predicted_prob = sess.run(
                    [imn.accuracy, imn.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy
                for i, prob_score in enumerate(predicted_prob):
                    question_id, response_id, label = id_pairs[i]
                    results[question_id].append(
                        (response_id, label, prob_score))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(
                num_test, num_correct / num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(
                results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                  format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print(
                'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                .format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr
Exemple #2
0
def run_test(dir_path, op_name, sess, training, accuracy, prob, pair_ids, output_layer):
    results = defaultdict(list)
    num_test = 0
    num_correct = 0.0
    n_updates = 0
    mrr = 0
    t0 = time()
    try:
        while True:
            n_updates += 1

            batch_accuracy, predicted_prob, pair_ = sess.run([accuracy, prob, pair_ids], feed_dict={training: False})
            question_id, answer_id, label = pair_
            
            num_test += len(predicted_prob)
            num_correct += len(predicted_prob) * batch_accuracy
            for i, prob_score in enumerate(predicted_prob):
                # question_id, answer_id, label = pair_id[i]
                results[question_id[i]].append((answer_id[i], label[i], prob_score[0]))

            if n_updates%2000 == 0:
                tf.logging.info("n_update %d , %s: Mins Used: %.2f" %
                                (n_updates, op_name, (time() - t0) / 60.0))

    except tf.errors.OutOfRangeError:
        # calculate top-1 precision
        print('num_test_samples: {}  test_accuracy: {}'.format(num_test, num_correct / num_test))
        accu, precision, recall, f1, loss = metrics.classification_metrics(results)
        print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(accu, precision, recall, f1, loss))

        mvp = metrics.mean_average_precision(results)
        mrr = metrics.mean_reciprocal_rank(results)
        top_1_precision = metrics.top_1_precision(results)
        total_valid_query = metrics.get_num_valid_query(results)
        print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(
            mvp, mrr, top_1_precision, total_valid_query))

        out_path = os.path.join(dir_path, "output_test.txt")
        print("Saving evaluation to {}".format(out_path))
        with open(out_path, 'w') as f:
          f.write("query_id\tdocument_id\tscore\trank\trelevance\n")
          for us_id, v in results.items():
            v.sort(key=operator.itemgetter(2), reverse=True)
            for i, rec in enumerate(v):
              r_id, label, prob_score = rec
              rank = i+1
              f.write('{}\t{}\t{}\t{}\t{}\n'.format(us_id, r_id, prob_score, rank, label))
    return mrr
Exemple #3
0
                response_len: x_response_len,
                utterances_num: x_utters_num,
                dropout_keep_prob: 1.0,
                u_char_feature: x_u_char,
                u_char_len: x_u_char_len,
                r_char_feature: x_r_char,
                r_char_len: x_r_char_len
            }
            predicted_prob = sess.run(prob, feed_dict)
            num_test += len(predicted_prob)
            print('num_test_sample={}'.format(num_test))
            for i, prob_score in enumerate(predicted_prob):
                us_id, r_id, label = id_pairs[i]
                results[us_id].append((r_id, label, prob_score))

accu, precision, recall, f1, loss = metrics.classification_metrics(results)
print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(
    accu, precision, recall, f1, loss))

mvp = metrics.mean_average_precision(results)
mrr = metrics.mean_reciprocal_rank(results)
top_1_precision = metrics.top_1_precision(results)
total_valid_query = metrics.get_num_valid_query(results)
print(
    'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
    .format(mvp, mrr, top_1_precision, total_valid_query))

out_path = FLAGS.output_file
print("Saving evaluation to {}".format(out_path))
with open(out_path, 'w') as f:
    f.write("query_id\tdocument_id\tscore\trank\trelevance\n")
Exemple #4
0
def run_test(epoch_no, dir_path, op_name, sess, training, accuracy, prob,
             pair_ids):
    results = defaultdict(list)
    num_test = 0
    num_correct = 0.0
    n_updates = 0
    mrr = 0
    t0 = time()
    try:
        while True:
            n_updates += 1

            batch_accuracy, predicted_prob, pair_ = sess.run(
                [accuracy, prob, pair_ids], feed_dict={training: False})
            question_id, answer_id, label = pair_

            # question_id = question_id.eval()
            # answer_id = answer_id.eval()
            # label = label.eval()
            num_test += len(predicted_prob)
            # if num_test % 1000 == 0:
            #     print(num_test)

            num_correct += len(predicted_prob) * batch_accuracy
            for i, prob_score in enumerate(predicted_prob):
                # question_id, answer_id, label = pair_id[i]
                results[question_id[i]].append(
                    (answer_id[i], label[i], prob_score[0]))

            if n_updates % 2000 == 0:
                tf.logging.info(
                    "epoch: %i  n_update %d , %s: Mins Used: %.2f" %
                    (epoch_no, n_updates, op_name, (time() - t0) / 60.0))

    except tf.errors.OutOfRangeError:

        threshold = 0.95
        none_id = 10000000
        print("threshold: {}".format(threshold))
        for q_id, a_list in results.items():
            correct_flag = 0
            for (a_id, label, score) in a_list:
                if int(label) == 1:
                    correct_flag = 1
            if correct_flag == 0:
                results[q_id].append((none_id, 1, threshold))
            else:
                results[q_id].append((none_id, 0, threshold))
        # calculate top-1 precision
        print('num_test_samples: {}  test_accuracy: {}'.format(
            num_test, num_correct / num_test))
        accu, precision, recall, f1, loss = metrics.classification_metrics(
            results)
        print(
            'Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(
                accu, precision, recall, f1, loss))

        mvp = metrics.mean_average_precision(results)
        mrr = metrics.mean_reciprocal_rank(results)
        top_1_precision = metrics.top_1_precision(results)
        total_valid_query = metrics.get_num_valid_query(results)
        print(
            'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
            .format(mvp, mrr, top_1_precision, total_valid_query))

        out_path = os.path.join(dir_path,
                                "ubuntu_output_epoch_{}.txt".format(epoch_no))
        print("Saving evaluation to {}".format(out_path))
        with open(out_path, 'w') as f:
            f.write("query_id\tdocument_id\tscore\trank\trelevance\n")
            for us_id, v in results.items():
                v.sort(key=operator.itemgetter(2), reverse=True)
                for i, rec in enumerate(v):
                    r_id, label, prob_score = rec
                    rank = i + 1
                    f.write('{}\t{}\t{}\t{}\t{}\n'.format(
                        us_id, r_id, prob_score, rank, label))

        global best_score
        if op_name == 'valid' and mrr > best_score:
            best_score = mrr
            saver = tf.train.Saver()
            dir_path = os.path.join(dir_path, "epoch {}".format(epoch_no))
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
            saver.save(sess, dir_path)
            tf.logging.info(">> save model!")

    return mrr
Exemple #5
0
        def dev_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset,
                                                    FLAGS.batch_size,
                                                    1,
                                                    target_loss_weight,
                                                    FLAGS.max_utter_len,
                                                    FLAGS.max_utter_num,
                                                    FLAGS.max_response_len,
                                                    charVocab,
                                                    FLAGS.max_word_length,
                                                    shuffle=False)
            for valid_batch in valid_batches:
                x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_responses_num, x_dist, x_target, x_target_weight, id_pairs, x_u_char, x_u_char_len, x_r_char, x_r_char_len = valid_batch
                feed_dict = {
                    u2u_imn.utterances: x_utterances,
                    u2u_imn.response: x_response,
                    u2u_imn.utterances_len: x_utterances_len,
                    u2u_imn.response_len: x_response_len,
                    u2u_imn.utters_num: x_utters_num,
                    u2u_imn.responses_num: x_responses_num,
                    u2u_imn.distance: x_dist,
                    u2u_imn.target: x_target,
                    u2u_imn.target_loss_weight: x_target_weight,
                    u2u_imn.dropout_keep_prob: 1.0,
                    u2u_imn.u_charVec: x_u_char,
                    u2u_imn.u_charLen: x_u_char_len,
                    u2u_imn.r_charVec: x_r_char,
                    u2u_imn.r_charLen: x_r_char_len
                }
                batch_accuracy, predicted_prob = sess.run(
                    [u2u_imn.accuracy, u2u_imn.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy
                for i, prob_score in enumerate(predicted_prob):
                    question_id, response_id, label = id_pairs[i]
                    results[question_id].append(
                        (response_id, label, prob_score))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(
                num_test, num_correct / num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(
                results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                  format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print(
                'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                .format(mvp, mrr, top_1_precision, total_valid_query))

            all_preds = []
            for i in range(len(results)):
                all_preds.append([r[2] for r in results[str(i)]])
            df = pd.DataFrame(all_preds,
                              columns=[
                                  'prediction_' + str(i)
                                  for i in range(len(all_preds[0]))
                              ])
            if not os.path.isdir(FLAGS.output_predictions_folder):
                os.makedirs(FLAGS.output_predictions_folder)
            with open(
                    os.path.join(FLAGS.output_predictions_folder,
                                 'config.json'), 'w') as f:
                conf = {}
                for k, v in FLAGS.__dict__['__flags'].items():
                    conf[k] = v
                conf['ranker'] = "U2U"
                conf['seed'] = str(conf['random_seed'])
                args_dict = {}
                args_dict['args'] = conf

                f.write(json.dumps(args_dict, indent=4, sort_keys=True))
            df.to_csv(FLAGS.output_predictions_folder + "/predictions.csv",
                      index=False)

            return mrr