Esempio n. 1
0
def just_korquad_eval(args):
    expected_version = 'KorQuAD_v1.0'
    with open(args.predict_file) as dataset_file:
        dataset_json = json.load(dataset_file)
        read_version = "_".join(dataset_json['version'].split("_")[:-1])
        if (read_version != expected_version):
            logger.info('Evaluation expects ' + expected_version +
                        ', but got dataset with ' + read_version,
                        file=sys.stderr)
        dataset = dataset_json['data']
    with open(
            os.path.join(args.output_dir, "predictions_{}.json".format(
                args.eda_type))) as prediction_file:
        predictions = json.load(prediction_file)
    logger.info(json.dumps(korquad_eval(dataset, predictions)))
Esempio n. 2
0
def evaluate(args, model, eval_examples, eval_features):
    """ Eval """
    all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                 dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                  dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                   dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
    dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                            all_example_index)
    sampler = SequentialSampler(dataset)
    dataloader = DataLoader(dataset,
                            sampler=sampler,
                            batch_size=args.batch_size)

    logger.info("***** Evaluating *****")
    logger.info("  Num features = %d", len(dataset))
    logger.info("  Batch size = %d", args.batch_size)

    model.eval()
    all_results = []
    set_seed(
        args)  # Added here for reproductibility (even between python 2 and 3)
    logger.info("Start evaluating!")
    for input_ids, input_mask, segment_ids, example_indices in tqdm(
            dataloader, desc="Evaluating"):
        input_ids = input_ids.to(args.device)
        input_mask = input_mask.to(args.device)
        segment_ids = segment_ids.to(args.device)
        with torch.no_grad():
            batch_start_logits, batch_end_logits = model(
                input_ids, segment_ids, input_mask)
        for i, example_index in enumerate(example_indices):
            start_logits = batch_start_logits[i].detach().cpu().tolist()
            end_logits = batch_end_logits[i].detach().cpu().tolist()
            eval_feature = eval_features[example_index.item()]
            unique_id = int(eval_feature.unique_id)
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))
    output_prediction_file = os.path.join(args.output_dir, "predictions.json")
    output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")
    write_predictions(eval_examples, eval_features, all_results,
                      args.n_best_size, args.max_answer_length, False,
                      output_prediction_file, output_nbest_file, None, False,
                      False, 0.0)

    expected_version = 'KorQuAD_v1.0'
    with open(args.predict_file) as dataset_file:
        dataset_json = json.load(dataset_file)
        read_version = "_".join(dataset_json['version'].split("_")[:-1])
        if (read_version != expected_version):
            logger.info('Evaluation expects ' + expected_version +
                        ', but got dataset with ' + read_version,
                        file=sys.stderr)
        dataset = dataset_json['data']
    with open(os.path.join(args.output_dir,
                           "predictions.json")) as prediction_file:
        predictions = json.load(prediction_file)
    logger.info(json.dumps(korquad_eval(dataset, predictions)))
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--checkpoint",
                        default='output/korquad_3.bin',
                        type=str,
                        help="checkpoint")
    parser.add_argument(
        "--output_dir",
        default='debug',
        type=str,
        help=
        "The output directory where the model checkpoints and predictions will be written."
    )
    parser.add_argument("--model_config", type=str)
    parser.add_argument("--vocab", type=str)

    ## Other parameters
    parser.add_argument(
        "--predict_file",
        default='data/KorQuAD_v1.0_dev.json',
        type=str,
        help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json"
    )

    parser.add_argument(
        "--max_seq_length",
        default=512,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. Sequences "
        "longer than this will be truncated, and sequences shorter than this will be padded."
    )
    parser.add_argument(
        "--doc_stride",
        default=64,
        type=int,
        help=
        "When splitting up a long document into chunks, how much stride to take between chunks."
    )
    parser.add_argument(
        "--max_query_length",
        default=96,
        type=int,
        help=
        "The maximum number of tokens for the question. Questions longer than this will "
        "be truncated to this length.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")

    parser.add_argument("--batch_size",
                        default=16,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument(
        "--n_best_size",
        default=20,
        type=int,
        help=
        "The total number of n-best predictions to generate in the nbest_predictions.json output file."
    )
    parser.add_argument(
        "--max_answer_length",
        default=30,
        type=int,
        help=
        "The maximum length of an answer that can be generated. This is needed because the start "
        "and end predictions are not conditioned on one another.")
    parser.add_argument(
        "--verbose_logging",
        action='store_true',
        help=
        "If true, all of the warnings related to data processing will be printed. "
        "A number of warnings are expected for a normal SQuAD evaluation.")

    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--fp16',
        action='store_true',
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit"
    )
    parser.add_argument('--ensemble', default='false', type=str)
    args = parser.parse_args()
    args.ensemble = args.ensemble.lower() == 'true'

    # Setup CUDA, GPU & distributed training
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count()
    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger.info("device: %s, n_gpu: %s, 16-bits training: %s", device,
                args.n_gpu, args.fp16)

    # Set seed
    set_seed(args)

    tokenizer = BertTokenizer(vocab_file=args.vocab,
                              do_basic_tokenize=True,
                              max_len=args.max_seq_length)
    config = Config.from_json_file(args.model_config)
    model = QuestionAnswering(config)

    # Evaluate
    examples, features = load_and_cache_examples(args, tokenizer)

    if not args.ensemble:
        logger.info(" Load Model: %s ", args.checkpoint)
        model.load_state_dict(torch.load(args.checkpoint))
        num_params = count_parameters(model)
        logger.info("Total Parameter: %d" % num_params)
        if args.fp16:
            model.half()
        model.to(args.device)

        logger.info("Evaluation parameters %s", args)
        results = evaluate(args, model, examples, features)
    else:
        list_ckpts = []
        with open(os.path.join(args.output_dir, "ckpt_list.txt"), 'r') as f:
            for line in f:
                list_ckpts.append(line.strip())

        list_results = []
        for i, ckpt in enumerate(list_ckpts):
            fn = os.path.join(args.output_dir, ckpt)
            logger.info(" Load Model: %s ", fn)
            model.load_state_dict(torch.load(fn))
            num_params = count_parameters(model)
            logger.info("Total Parameter: %d" % num_params)
            if args.fp16:
                model.half()
            model.to(args.device)

            logger.info("Evaluation parameters %s", args)
            results = evaluate(args, model, examples, features)
            list_results.append(results)

        list_qid = []
        for example in examples:
            list_qid.append(example.qas_id)

        all_predictions = collections.OrderedDict()
        for qid in list_qid:
            max_prob, answer = 0.0, ""
            for results in list_results:
                prob, text = 0.0, None
                for output in results[qid]:
                    if output["text"]:
                        prob = output["probability"]
                        text = output["text"]
                        break
                if prob > max_prob:
                    max_prob = prob
                    answer = text

            all_predictions[qid] = answer

        output_prediction_file = os.path.join(args.output_dir,
                                              "predictions.json")
        with open(output_prediction_file, "w") as writer:
            writer.write(json.dumps(all_predictions, indent=4) + "\n")

        expected_version = 'KorQuAD_v1.0'
        with open(args.predict_file) as dataset_file:
            dataset_json = json.load(dataset_file)
            read_version = "_".join(dataset_json['version'].split("_")[:-1])
            if (read_version != expected_version):
                logger.info('Evaluation expects ' + expected_version +
                            ', but got dataset with ' + read_version,
                            file=sys.stderr)
            dataset = dataset_json['data']
        with open(os.path.join(args.output_dir,
                               "predictions.json")) as prediction_file:
            predictions = json.load(prediction_file)
        logger.info(json.dumps(korquad_eval(dataset, predictions)))