예제 #1
0
def evaluate(model, data_loader, raw_dataset, args):
    model.eval()

    all_start_logits = []
    all_end_logits = []
    tic_eval = time.time()

    for batch in data_loader:
        input_ids, _ = batch
        start_logits_tensor, end_logits_tensor = model(input_ids)

        for idx in range(start_logits_tensor.shape[0]):
            if len(all_start_logits) % 1000 == 0 and len(all_start_logits):
                print("Processing example: %d" % len(all_start_logits))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()

            all_start_logits.append(start_logits_tensor.numpy()[idx])
            all_end_logits.append(end_logits_tensor.numpy()[idx])

    all_predictions, all_nbest_json, scores_diff_json = compute_prediction(
        raw_dataset, data_loader.dataset, (all_start_logits, all_end_logits),
        args.version_2_with_negative, args.n_best_size, args.max_answer_length,
        args.null_score_diff_threshold)

    # Can also write all_nbest_json and scores_diff_json files if needed
    with open('prediction.json', "w", encoding='utf-8') as writer:
        writer.write(
            json.dumps(all_predictions, ensure_ascii=False, indent=4) + "\n")

    squad_evaluate(examples=[raw_data for raw_data in raw_dataset],
                   preds=all_predictions,
                   na_probs=scores_diff_json)

    model.train()
예제 #2
0
def evaluate(model, data_loader, args, global_step, write_predictions=False):
    model.eval()

    all_start_logits = []
    all_end_logits = []

    for batch in data_loader:
        input_ids = batch[0]
        start_logits_tensor, end_logits_tensor = model(input_ids)

        for idx in range(start_logits_tensor.shape[0]):
            all_start_logits.append(start_logits_tensor.numpy()[idx])
            all_end_logits.append(end_logits_tensor.numpy()[idx])

    all_predictions, all_nbest_json, scores_diff_json = compute_prediction(
        data_loader.dataset.data, data_loader.dataset.new_data,
        (all_start_logits, all_end_logits), args.version_2_with_negative,
        args.n_best_size, args.max_answer_length,
        args.null_score_diff_threshold)

    # Can also write all_nbest_json and scores_diff_json files if needed
    if write_predictions:
        with open(f'{str(global_step)}_prediction.json', "w",
                  encoding='utf-8') as writer:
            writer.write(
                json.dumps(all_predictions, ensure_ascii=False, indent=4) +
                "\n")

    squad_evaluate(examples=data_loader.dataset.data,
                   preds=all_predictions,
                   na_probs=scores_diff_json)

    model.train()
예제 #3
0
 def predict(self, dataset, raw_dataset, collate_fn, args, do_eval=True):
     batch_sampler = paddle.io.BatchSampler(dataset,
                                            batch_size=args.batch_size,
                                            shuffle=False)
     data_loader = paddle.io.DataLoader(dataset=dataset,
                                        batch_sampler=batch_sampler,
                                        collate_fn=collate_fn,
                                        num_workers=0,
                                        return_list=True)
     outputs = []
     all_start_logits = []
     all_end_logits = []
     for data in data_loader:
         output = self.predict_batch(data)
         outputs.append(output)
         if do_eval:
             all_start_logits.extend(list(output[0]))
             all_end_logits.extend(list(output[1]))
     if do_eval:
         all_predictions, all_nbest_json, scores_diff_json = compute_prediction(
             raw_dataset, data_loader.dataset,
             (all_start_logits, all_end_logits),
             args.version_2_with_negative, args.n_best_size,
             args.max_answer_length, args.null_score_diff_threshold)
         squad_evaluate(examples=[raw_data for raw_data in raw_dataset],
                        preds=all_predictions,
                        na_probs=scores_diff_json)
     return outputs
예제 #4
0
def evaluate(model, data_loader, is_test=False):
    model.eval()

    all_start_logits = []
    all_end_logits = []
    tic_eval = time.time()

    for batch in data_loader:
        input_ids, token_type_ids = batch
        start_logits_tensor, end_logits_tensor = model(input_ids,
                                                       token_type_ids)

        for idx in range(start_logits_tensor.shape[0]):
            if len(all_start_logits) % 10 == 0 and len(all_start_logits):
                print("Processing example: %d" % len(all_start_logits))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()

            all_start_logits.append(start_logits_tensor.numpy()[idx])
            all_end_logits.append(end_logits_tensor.numpy()[idx])

    all_predictions, _, _ = compute_prediction(
        data_loader.dataset.data, data_loader.dataset.new_data,
        (all_start_logits, all_end_logits), False, 20, 30)

    if is_test:
        # Can also write all_nbest_json and scores_diff_json files if needed
        with open('prediction.json', "w", encoding='utf-8') as writer:
            writer.write(
                json.dumps(
                    all_predictions, ensure_ascii=False, indent=4) + "\n")
    else:
        squad_evaluate(
            examples=data_loader.dataset.data,
            preds=all_predictions,
            is_whitespace_splited=False)

    count = 0
    for example in data_loader.dataset.data:
        count += 1
        print()
        print('问题:',example['question'])
        print('原文:',''.join(example['context']))
        print('答案:',all_predictions[example['id']])
        if count >= 5:
            break

    model.train()
예제 #5
0
파일: run_du.py 프로젝트: jandyu/models-1
def evaluate(model, data_loader, args, tokenizer, do_pred=False):
    model.eval()

    RawResult = collections.namedtuple(
        "RawResult", ["unique_id", "start_logits", "end_logits"])

    all_results = []
    tic_eval = time.time()

    for batch in data_loader:
        input_ids, segment_ids, unipue_ids = batch
        start_logits_tensor, end_logits_tensor = model(input_ids, segment_ids)

        for idx in range(unipue_ids.shape[0]):
            if len(all_results) % 1000 == 0 and len(all_results):
                print("Processing example: %d" % len(all_results))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()
            unique_id = int(unipue_ids[idx])
            start_logits = [float(x) for x in start_logits_tensor.numpy()[idx]]
            end_logits = [float(x) for x in end_logits_tensor.numpy()[idx]]
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))

    all_predictions, _, _ = compute_predictions(data_loader.dataset.examples,
                                                data_loader.dataset.features,
                                                all_results, args.n_best_size,
                                                args.max_answer_length,
                                                args.do_lower_case, False, 0.0,
                                                args.verbose, tokenizer, False)
    if do_pred:
        with open('prediction.json', "w", encoding='utf-8') as writer:
            writer.write(
                json.dumps(all_predictions, ensure_ascii=False, indent=4) +
                "\n")
    else:
        squad_evaluate(examples=data_loader.dataset.examples,
                       preds=all_predictions,
                       is_whitespace_splited=False)

    model.train()
예제 #6
0
def evaluate(model, data_loader, args):
    model.eval()

    all_start_logits = []
    all_end_logits = []
    tic_eval = time.time()

    for batch in data_loader:
        input_ids, token_type_ids = batch
        start_logits_tensor, end_logits_tensor = model(input_ids,
                                                       token_type_ids)

        for idx in range(start_logits_tensor.shape[0]):
            if len(all_start_logits) % 1000 == 0 and len(all_start_logits):
                print("Processing example: %d" % len(all_start_logits))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()

            all_start_logits.append(start_logits_tensor.numpy()[idx])
            all_end_logits.append(end_logits_tensor.numpy()[idx])

    all_predictions, _, _ = compute_prediction(
        data_loader.dataset.data, data_loader.dataset.new_data,
        (all_start_logits, all_end_logits), False, args.n_best_size,
        args.max_answer_length)

    # Can also write all_nbest_json and scores_diff_json files if needed
    with open('prediction.json', "w", encoding='utf-8') as writer:
        writer.write(
            json.dumps(
                all_predictions, ensure_ascii=False, indent=4) + "\n")

    squad_evaluate(
        examples=data_loader.dataset.data,
        preds=all_predictions,
        is_whitespace_splited=False)

    model.train()
예제 #7
0
def evaluate(model, data_loader, args):
    model.eval()

    RawResult = collections.namedtuple(
        "RawResult", ["unique_id", "start_logits", "end_logits"])

    all_results = []
    tic_eval = time.time()

    for batch in data_loader:
        input_ids, segment_ids, unipue_ids = batch
        start_logits_tensor, end_logits_tensor = model(input_ids, segment_ids)

        for idx in range(unipue_ids.shape[0]):
            if len(all_results) % 1000 == 0 and len(all_results):
                print("Processing example: %d" % len(all_results))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()
            unique_id = int(unipue_ids[idx])
            start_logits = [float(x) for x in start_logits_tensor.numpy()[idx]]
            end_logits = [float(x) for x in end_logits_tensor.numpy()[idx]]
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))

    all_predictions, all_nbest_json, scores_diff_json = compute_predictions(
        data_loader.dataset.examples, data_loader.dataset.features,
        all_results, args.n_best_size, args.max_answer_length,
        args.do_lower_case, args.version_2_with_negative,
        args.null_score_diff_threshold, args.verbose,
        data_loader.dataset.tokenizer)

    squad_evaluate(data_loader.dataset.examples, all_predictions,
                   scores_diff_json, 1.0)

    model.train()
예제 #8
0
def evaluate(model, raw_dataset, dataset, data_loader, args, do_eval=True):
    model.eval()

    all_start_logits = []
    all_end_logits = []
    tic_eval = time.time()
    for batch in data_loader:
        start_logits, end_logits = model(**batch)
        for idx in range(start_logits.shape[0]):
            if len(all_start_logits) % 1000 == 0 and len(all_start_logits):
                logger.info("Processing example: %d" % len(all_start_logits))
                logger.info('time per 1000: %s' % (time.time() - tic_eval))
                tic_eval = time.time()

            all_start_logits.append(start_logits.numpy()[idx])
            all_end_logits.append(end_logits.numpy()[idx])

    all_predictions, _, _ = compute_prediction(
        raw_dataset, dataset, (all_start_logits, all_end_logits), False,
        args.n_best_size, args.max_answer_length)

    mode = 'validation' if do_eval else 'test'
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    if do_eval:
        filename = os.path.join(args.output_dir, 'prediction_validation.json')
    else:
        filename = os.path.join(args.output_dir, 'cmrc2018_predict.json')
    with open(filename, "w", encoding='utf-8') as writer:
        writer.write(
            json.dumps(all_predictions, ensure_ascii=False, indent=4) + "\n")
    if do_eval:
        res = squad_evaluate(examples=[raw_data for raw_data in raw_dataset],
                             preds=all_predictions,
                             is_whitespace_splited=False)
        model.train()
        return res['exact'], res['f1']

    model.train()
예제 #9
0
    def evaluate(model, criterion, data_loader, width_mult=1.0):
        model.eval()
        all_start_logits = []
        all_end_logits = []
        metric.reset()
        for batch in data_loader:
            if "cmrc2018" in task_name:
                input_ids, token_type_ids = batch['input_ids'], batch[
                    'token_type_ids']
                logits = model(
                    input_ids, token_type_ids, attention_mask=[None, None])
                if width_mult == 100:
                    start_logits_tensor, end_logits_tensor = logits
                else:
                    start_logits_tensor, end_logits_tensor = logits[0]
                for idx in range(start_logits_tensor.shape[0]):
                    if len(all_start_logits) % 1000 == 0 and len(
                            all_start_logits):
                        logger.info("Processing example: %d" %
                                    len(all_start_logits))
                    all_start_logits.append(start_logits_tensor.numpy()[idx])
                    all_end_logits.append(end_logits_tensor.numpy()[idx])

            else:
                input_ids, segment_ids, labels = batch['input_ids'], batch[
                    'token_type_ids'], batch['labels']
                logits = model(
                    input_ids, segment_ids, attention_mask=[None, None])
                if isinstance(logits, tuple):
                    logits = logits[0]
                loss = criterion(logits, labels)
                if task_name == "msra_ner":
                    preds = logits.argmax(axis=2)
                    num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
                        batch['seq_len'], preds, batch['labels'])
                    metric.update(num_infer_chunks.numpy(),
                                  num_label_chunks.numpy(),
                                  num_correct_chunks.numpy())
                else:
                    correct = metric.compute(logits, labels)
                    metric.update(correct)
        if "cmrc2018" in task_name:
            n_best_size = 20
            max_answer_length = 50
            all_predictions, _, _ = compute_prediction(
                self.eval_examples, self.eval_dataset,
                (all_start_logits, all_end_logits), False, n_best_size,
                max_answer_length)
            res = squad_evaluate(
                examples=[raw_data for raw_data in self.eval_examples],
                preds=all_predictions,
                is_whitespace_splited=False)
            if width_mult == 100:
                logger.info("teacher model, EM: %f, F1: %f" %
                            (res['exact'], res['f1']))
            else:
                logger.info("width_mult: %s, EM: %f, F1: %f, " %
                            (str(width_mult), res['exact'], res['f1']))
            res = res['exact']
        else:
            res = metric.accumulate()
            # Teacher model's evaluation
            if task_name == "msra_ner":
                if width_mult == 100:
                    logger.info(
                        "teacher model, eval loss: %f, precision: %f, recall: %f, f1_score: %f"
                        % (paddle.mean(loss).numpy(), res[0], res[1], res[2]))
                else:
                    logger.info(
                        "width_mult: %s, eval loss: %f, precision: %f, recall: %f, f1_score: %f"
                        % (str(width_mult), paddle.mean(loss).numpy(), res[0],
                           res[1], res[2]))
                res = res[2]
            else:
                if width_mult == 100:
                    logger.info("teacher model, eval loss: %f, acc: %s, " %
                                (loss.numpy(), res))
                else:
                    logger.info("width_mult: %s, eval loss: %f, acc: %s, " %
                                (str(width_mult), loss.numpy(), res))
        model.train()
        return res
예제 #10
0

if __name__ == '__main__':
    args = get_args()

    if args.language == 'ch':
        ref_ans = read_dataset(args.golden_path)
        pred_ans = read_model_prediction(args.pred_file)
        F1, EM, TOTAL, SKIP = evaluate_ch(ref_ans, pred_ans)

        output_result = OrderedDict()
        output_result['F1'] = '%.3f' % F1
        output_result['EM'] = '%.3f' % EM
        output_result['TOTAL'] = TOTAL
        output_result['SKIP'] = SKIP
        print(json.dumps(output_result))
    else:
        ref_ans = read_dataset(args.golden_path)
        pred_ans = read_temp(args.pred_file)
        res = []
        for i in ref_ans:
            ins = ref_ans[i]
            ins['id'] = str(ins['sent_id'])
            ins['answers'] = [ins['sent_label']]
            if ins['answers'] == [""]:
                ins['is_impossible'] = True
            else:
                ins['is_impossible'] = False
            res.append(ins)
        squad_evaluate(examples=res, preds=pred_ans)
예제 #11
0
def main(args):
    paddle.enable_static()
    place = paddle.set_device('ipu')
    set_seed(args.seed)
    main_program = paddle.static.default_main_program()
    startup_program = paddle.static.default_startup_program()

    # The sharding of encoder layers
    if args.num_hidden_layers == 12:
        attn_ipu_index = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
        ff_ipu_index = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
    else:
        raise Exception("Only support num_hidden_layers = 12")

    bert_config = {
        k: getattr(args, k)
        for k in IpuBertConfig._fields if hasattr(args, k)
    }
    bert_config['embeddings_scope'] = DeviceScope(0, 0, "Embedding")
    bert_config['attn_scopes'] = [
        DeviceScope(attn_ipu_index[i], attn_ipu_index[i])
        for i in range(args.num_hidden_layers)
    ]
    bert_config['ff_scopes'] = [
        DeviceScope(ff_ipu_index[i], ff_ipu_index[i])
        for i in range(args.num_hidden_layers)
    ]
    bert_config['layers_per_ipu'] = [6, 6]

    config = IpuBertConfig(**bert_config)

    # custom_ops
    custom_ops = load_custom_ops()

    logging.info("building model")

    if args.is_training:
        [indices, segments, positions, input_mask, start_labels,
         end_labels] = create_data_holder(args)
    else:
        [indices, segments, positions, input_mask] = create_data_holder(args)

    # Encoder Layers
    bert_model = BertModel(config, custom_ops)
    encoders, _ = bert_model(indices, segments, positions, input_mask)

    squad_scope = DeviceScope(args.num_ipus - 1, args.num_ipus - 1, "squad")
    with squad_scope:
        qa_cls = IpuBertForQuestionAnswering(args.hidden_size, args.seq_len)
        start_logits, end_logits = qa_cls(encoders)

        if args.is_training:
            acc_loss = IpuBertQAAccAndLoss(custom_ops)
            acc0, acc1, loss = acc_loss(start_logits, end_logits, start_labels,
                                        end_labels)

    # load squad dataset
    raw_dataset, data_loader = load_squad_dataset(args)

    total_samples = len(data_loader.dataset)
    max_steps = total_samples // args.batch_size * args.epochs
    logging.info("total samples: %d, total batch_size: %d, max steps: %d" %
                 (total_samples, args.batch_size, max_steps))

    if args.is_training:
        lr_scheduler = LinearDecayWithWarmup(args.learning_rate, max_steps,
                                             args.warmup_steps)
        optimizer = paddle.optimizer.Adam(
            learning_rate=lr_scheduler,
            weight_decay=args.weight_decay,
            beta1=args.beta1,
            beta2=args.beta2,
            epsilon=args.adam_epsilon)
        optimizer.minimize(loss)

    # Static executor
    exe = paddle.static.Executor(place)
    exe.run(startup_program)

    # Set initial weights
    state_dict = main_program.state_dict()
    reset_state_dict = reset_program_state_dict(state_dict)
    paddle.static.set_program_state(main_program, reset_state_dict)

    if args.enable_load_params:
        logging.info(f'loading weights from: {args.load_params_path}')
        if not args.load_params_path.endswith('pdparams'):
            raise Exception('need pdparams file')
        with open(args.load_params_path, 'rb') as file:
            params = pickle.load(file)
        # Delete mlm and nsp weights
        if args.is_training and 'linear_72.w_0' in params:
            params.pop("linear_72.w_0")
            params.pop("linear_72.b_0")
        paddle.static.set_program_state(main_program, params)

    if args.tf_checkpoint:
        from load_tf_ckpt import load_initializers_from_tf
        logging.info(f'loading weights from: {args.tf_checkpoint}')
        initializers, _ = load_initializers_from_tf(args.tf_checkpoint, args)
        paddle.static.set_program_state(main_program, initializers)

    # Create ipu_strategy
    ipu_strategy = create_ipu_strategy(args)

    if args.is_training:
        feed_list = [
            "indices", "segments", "positions", "input_mask", "start_labels",
            "end_labels"
        ]
        fetch_list = [loss.name, acc0.name, acc1.name]
    else:
        feed_list = ["indices", "segments", "positions", "input_mask"]
        fetch_list = [start_logits.name, end_logits.name]

    ipu_compiler = paddle.static.IpuCompiledProgram(
        main_program, ipu_strategy=ipu_strategy)
    logging.info(f'start compiling, please wait some minutes')
    cur_time = time.time()
    main_program = ipu_compiler.compile(feed_list, fetch_list)
    time_cost = time.time() - cur_time
    logging.info(f'finish compiling! time cost: {time_cost}')

    if args.is_training:
        global_step = 0
        batch_start = time.time()
        for epoch in range(args.epochs):
            for batch in data_loader:
                global_step += 1

                feed = {
                    "indices": batch[0],
                    "segments": batch[1],
                    "positions": batch[2],
                    "input_mask": batch[3],
                    "start_labels": batch[4],
                    "end_labels": batch[5],
                }
                lr_scheduler.step()

                train_start = time.time()
                outputs = exe.run(main_program,
                                  feed=feed,
                                  fetch_list=fetch_list,
                                  use_program_cache=True)
                train_cost = time.time() - train_start
                total_cost = time.time() - batch_start

                tput = args.batch_size / total_cost
                if args.wandb:
                    wandb.log({
                        "epoch": epoch,
                        "global_step": global_step,
                        "loss": np.mean(outputs[0]),
                        "accuracy": np.mean(outputs[1:]),
                        "train_cost": train_cost,
                        "total_cost": total_cost,
                        "throughput": tput,
                        "learning_rate": lr_scheduler(),
                    })

                if global_step % args.logging_steps == 0:
                    logging.info({
                        "epoch": epoch,
                        "global_step": global_step,
                        "loss": np.mean(outputs[0]),
                        "accuracy": np.mean(outputs[1:]),
                        "train_cost": train_cost,
                        "total_cost": total_cost,
                        "throughput": tput,
                        "learning_rate": lr_scheduler(),
                    })

                batch_start = time.time()

        # save final state
        ipu_compiler._backend.weights_to_host()
        paddle.static.save(main_program.org_program,
                           os.path.join(args.output_dir, 'Final_model'))

    if not args.is_training:
        all_start_logits = []
        all_end_logits = []
        for step, batch in enumerate(data_loader):
            if step % args.logging_steps == 0:
                logging.info(f'running step: {step}')

            real_len = np.array(batch[0]).shape[0]
            # padding zeros if needed
            if real_len < args.batch_size:
                batch = [np.asarray(x) for x in batch]
                pad0 = np.zeros([args.batch_size - real_len,
                                 args.seq_len]).astype(batch[0].dtype)
                batch[0] = np.vstack((batch[0], pad0))
                batch[1] = np.vstack((batch[1], pad0))
                batch[2] = np.vstack((batch[2], pad0))
                pad1 = np.zeros(
                    [args.batch_size - real_len, 1, 1, args.seq_len]) - 1e3
                pad1 = pad1.astype(batch[3].dtype)
                batch[3] = np.vstack((batch[3], pad1))

            feed = {
                "indices": batch[0],
                "segments": batch[1],
                "positions": batch[2],
                "input_mask": batch[3],
            }
            start_logits, end_logits = exe.run(main_program,
                                               feed=feed,
                                               fetch_list=fetch_list)

            start_logits = start_logits.reshape([-1, args.seq_len])
            end_logits = end_logits.reshape([-1, args.seq_len])
            for idx in range(real_len):
                all_start_logits.append(start_logits[idx])
                all_end_logits.append(end_logits[idx])

        # evaluate results
        all_predictions, all_nbest_json, scores_diff_json = compute_prediction(
            raw_dataset, data_loader.dataset,
            (all_start_logits, all_end_logits))
        squad_evaluate(
            examples=[raw_data for raw_data in raw_dataset],
            preds=all_predictions,
            na_probs=scores_diff_json)
        # write results to file
        with open('squad_prediction.json', "w", encoding='utf-8') as writer:
            writer.write(
                json.dumps(
                    all_predictions, ensure_ascii=False, indent=4) + "\n")
예제 #12
0
def evaluate(args, is_test=True):
    # 加载模型
    model_state = paddle.load(args.model_path)
    model = ErnieForQuestionAnswering.from_pretrained(args.model_name)
    model.load_dict(model_state)
    model.eval()

    # 加载数据
    train_ds, dev_ds, test_ds = load_dataset('dureader_robust',
                                             splits=('train', 'dev', 'test'))
    tokenizer = paddlenlp.transformers.ErnieTokenizer.from_pretrained(
        args.model_name)
    test_trans_func = partial(prepare_validation_features,
                              max_seq_length=args.max_seq_length,
                              doc_stride=args.doc_stride,
                              tokenizer=tokenizer)
    test_ds.map(test_trans_func, batched=True, num_workers=4)
    test_batch_sampler = paddle.io.BatchSampler(test_ds,
                                                batch_size=args.batch_size,
                                                shuffle=False)

    test_batchify_fn = lambda samples, fn=Dict(
        {
            "input_ids": Pad(axis=0, pad_val=tokenizer.pad_token_id),
            "token_type_ids": Pad(axis=0, pad_val=tokenizer.pad_token_type_id)
        }): fn(samples)

    test_data_loader = paddle.io.DataLoader(dataset=test_ds,
                                            batch_sampler=test_batch_sampler,
                                            collate_fn=test_batchify_fn,
                                            return_list=True)

    all_start_logits = []
    all_end_logits = []
    tic_eval = time.time()

    for batch in test_data_loader:
        input_ids, token_type_ids = batch
        start_logits_tensor, end_logits_tensor = model(input_ids,
                                                       token_type_ids)

        for idx in range(start_logits_tensor.shape[0]):
            if len(all_start_logits) % 10 == 0 and len(all_start_logits):
                print("Processing example: %d" % len(all_start_logits))
                print('time per 1000:', time.time() - tic_eval)
                tic_eval = time.time()

            all_start_logits.append(start_logits_tensor.numpy()[idx])
            all_end_logits.append(end_logits_tensor.numpy()[idx])

    all_predictions, _, _ = compute_prediction(
        test_data_loader.dataset.data, test_data_loader.dataset.new_data,
        (all_start_logits, all_end_logits), False, 20, 30)

    if is_test:
        # Can also write all_nbest_json and scores_diff_json files if needed
        with open('prediction.json', "w", encoding='utf-8') as writer:
            writer.write(
                json.dumps(all_predictions, ensure_ascii=False, indent=4) +
                "\n")
    else:
        squad_evaluate(examples=test_data_loader.dataset.data,
                       preds=all_predictions,
                       is_whitespace_splited=False)

    count = 0
    for example in test_data_loader.dataset.data:
        count += 1
        print()
        print('问题:', example['question'])
        print('原文:', ''.join(example['context']))
        print('答案:', all_predictions[example['id']])
        if count >= 5:
            break

    model.train()
예제 #13
0
    def predict(self,
                dataset,
                tokenizer,
                batchify_fn,
                args,
                dev_example=None,
                dev_ds_ori=None):
        if args.collect_shape:
            self.set_dynamic_shape(args.max_seq_length, args.batch_size)
        if args.task_name == "cmrc2018":
            dataset_removed = dataset.remove_columns(
                ["offset_mapping", "attention_mask", "example_id"])
            sample_num = len(dataset)
            batches = []
            for i in range(0, sample_num, args.batch_size):
                batch_size = min(args.batch_size, sample_num - i)
                batch = [dataset_removed[i + j] for j in range(batch_size)]
                batches.append(batch)
        else:
            sample_num = len(dataset)
            batches = []
            for i in range(0, sample_num, args.batch_size):
                batch_size = min(args.batch_size, sample_num - i)
                batch = [dataset[i + j] for j in range(batch_size)]
                batches.append(batch)
        if args.perf:
            for i, batch in enumerate(batches):
                batch = batchify_fn(batch)
                input_ids, segment_ids = batch["input_ids"].numpy(
                ), batch["token_type_ids"].numpy()
                output = self.predict_batch([input_ids, segment_ids])
                if i > args.perf_warmup_steps:
                    break
            time1 = time.time()
            nums = 0
            for batch in batches:
                batch = batchify_fn(batch)
                input_ids, segment_ids = batch["input_ids"].numpy(
                ), batch["token_type_ids"].numpy()
                nums = nums + input_ids.shape[0]
                output = self.predict_batch([input_ids, segment_ids])
            total_time = time.time() - time1
            print("task name: %s, sample nums: %s, time: %s, QPS: %s " %
                  (args.task_name, nums, total_time, nums / total_time))

        else:
            if args.task_name == "msra_ner":
                metric = ChunkEvaluator(label_list=args.label_list)
                metric.reset()
                all_predictions = []
                batch_num = len(dataset['input_ids'])
                for batch in batches:
                    batch = batchify_fn(batch)
                    input_ids, segment_ids = batch["input_ids"].numpy(
                    ), batch["token_type_ids"].numpy()
                    output = self.predict_batch([input_ids, segment_ids])[0]
                    preds = np.argmax(output, axis=2)
                    all_predictions.append(preds.tolist())
                    num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
                        batch["seq_len"], paddle.to_tensor(preds),
                        batch["labels"])
                    metric.update(num_infer_chunks.numpy(),
                                  num_label_chunks.numpy(),
                                  num_correct_chunks.numpy())
                res = metric.accumulate()
                print("task name: %s, (precision, recall, f1): %s, " %
                      (args.task_name, res))
            elif args.task_name == "cmrc2018":
                all_start_logits = []
                all_end_logits = []
                for batch in batches:
                    batch = batchify_fn(batch)
                    input_ids, segment_ids = batch["input_ids"].numpy(
                    ), batch["token_type_ids"].numpy()
                    start_logits, end_logits = self.predict_batch(
                        [input_ids, segment_ids])
                    for idx in range(start_logits.shape[0]):
                        if len(all_start_logits) % 1000 == 0 and len(
                                all_start_logits):
                            print("Processing example: %d" %
                                  len(all_start_logits))
                        all_start_logits.append(start_logits[idx])
                        all_end_logits.append(end_logits[idx])
                all_predictions, _, _ = compute_prediction(
                    dev_example, dataset, (all_start_logits, all_end_logits),
                    False, args.n_best_size, args.max_answer_length)
                res = squad_evaluate(
                    examples=[raw_data for raw_data in dev_example],
                    preds=all_predictions,
                    is_whitespace_splited=False)
                print("task name: %s, EM: %s, F1: %s" %
                      (args.task_name, res['exact'], res['f1']))
                return all_predictions
            else:
                all_predictions = []
                metric = METRIC_CLASSES[args.task_name]()
                metric.reset()
                for i, batch in enumerate(batches):
                    batch = batchify_fn(batch)
                    output = self.predict_batch([
                        batch["input_ids"].numpy(),
                        batch["token_type_ids"].numpy()
                    ])[0]
                    preds = np.argmax(output, axis=1)
                    all_predictions.append(preds.tolist())
                    correct = metric.compute(paddle.to_tensor(output),
                                             batch["labels"])
                    metric.update(correct)
                res = metric.accumulate()

                print("task name: %s, acc: %s, " % (args.task_name, res))
                return all_predictions
예제 #14
0
 def compute_metrics(p: EvalPrediction):
     ret = squad_evaluate(examples=p.label_ids,
                          preds=p.predictions,
                          is_whitespace_splited=False)
     return dict(ret)