Esempio n. 1
0
def predict(test_exe,
            test_program,
            test_pyreader,
            fetch_list,
            processor,
            test_path=None):
    if not os.path.exists(args.checkpoints):
        os.makedirs(args.checkpoints)
    output_prediction_file = os.path.join(args.checkpoints, "predictions.json")
    output_nbest_file = os.path.join(args.checkpoints,
                                     "nbest_predictions.json")
    output_null_log_odds_file = os.path.join(args.checkpoints,
                                             "null_odds.json")

    test_pyreader.start()
    all_results = []
    time_begin = time.time()
    while True:
        try:
            np_unique_ids, np_start_logits, np_end_logits, np_num_seqs = test_exe.run(
                fetch_list=fetch_list, program=test_program)
            for idx in range(np_unique_ids.shape[0]):
                if len(all_results) % 1000 == 0:
                    print("Processing example: %d" % len(all_results))
                unique_id = int(np_unique_ids[idx])
                start_logits = [float(x) for x in np_start_logits[idx].flat]
                end_logits = [float(x) for x in np_end_logits[idx].flat]
                all_results.append(
                    RawResult(unique_id=unique_id,
                              start_logits=start_logits,
                              end_logits=end_logits))
        except fluid.core.EOFException:
            test_pyreader.reset()
            break
    time_end = time.time()

    features = processor.get_features(processor.predict_examples,
                                      is_training=False)
    if test_path is None:
        adv_f1 = write_predictions(
            processor.predict_examples, features, all_results,
            args.n_best_size, args.max_answer_length, args.do_lower_case,
            output_prediction_file, output_nbest_file,
            output_null_log_odds_file, args.version_2_with_negative,
            args.null_score_diff_threshold, args.verbose, args.predict_file)
    else:
        adv_f1 = write_predictions(
            processor.predict_examples, features, all_results,
            args.n_best_size, args.max_answer_length, args.do_lower_case,
            output_prediction_file, output_nbest_file,
            output_null_log_odds_file, args.version_2_with_negative,
            args.null_score_diff_threshold, args.verbose, test_path)

    print(adv_f1)
    return adv_f1
Esempio n. 2
0
def predict(test_exe, test_program, test_data_loader, fetch_list, processor,
            name):
    if not os.path.exists(args.checkpoints):
        os.makedirs(args.checkpoints)
    output_prediction_file = os.path.join(args.checkpoints,
                                          name + "predictions.json")
    output_nbest_file = os.path.join(args.checkpoints,
                                     name + "nbest_predictions.json")
    output_null_log_odds_file = os.path.join(args.checkpoints,
                                             name + "null_odds.json")

    test_data_loader.start()
    all_results = []
    time_begin = time.time()
    while True:
        try:
            outputs = test_exe.run(fetch_list=fetch_list, program=test_program)
            np_unique_ids, np_start_top_log_probs, np_start_top_index, np_end_top_log_probs, np_end_top_index,  np_cls_logits, \
                      = outputs[0:6]

            for idx in range(np_unique_ids.shape[0]):
                if len(all_results) % 1000 == 0:
                    print("Processing example: %d" % len(all_results))
                unique_id = int(np_unique_ids[idx])
                start_top_log_probs = [
                    float(x) for x in np_start_top_log_probs[idx].flat
                ]
                start_top_index = [
                    int(x) for x in np_start_top_index[idx].flat
                ]
                end_top_log_probs = [
                    float(x) for x in np_end_top_log_probs[idx].flat
                ]
                end_top_index = [int(x) for x in np_end_top_index[idx].flat]
                cls_logits = float(np_cls_logits[idx].flat[0])

                all_results.append(
                    RawResult(unique_id=unique_id,
                              start_top_log_probs=start_top_log_probs,
                              start_top_index=start_top_index,
                              end_top_log_probs=end_top_log_probs,
                              end_top_index=end_top_index,
                              cls_logits=cls_logits))
        except fluid.core.EOFException:
            test_data_loader.reset()
            break
    time_end = time.time()

    with io.open(args.predict_file, "r", encoding="utf8") as f:
        orig_data = json.load(f)["data"]

    features = processor.get_features(processor.predict_examples,
                                      is_training=False)
    ret = write_predictions(processor.predict_examples, features, all_results,
                            args.n_best_size, args.max_answer_length,
                            output_prediction_file, output_nbest_file,
                            output_null_log_odds_file, orig_data, args)
    # Log current result
    print("=" * 80)
    log_str = "Result | "
    for key, val in ret.items():
        log_str += "{} {} | ".format(key, val)
    print(log_str)
    print("=" * 80)