Esempio n. 1
0
    def evaluate(self, eval_data, output_dir):
        """
        Evaluates the model on eval_data.

        Utility function to be used by the eval_model() method. Not intended to be used directly.
        """
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args
        eval_output_dir = output_dir

        results = {}

        if isinstance(eval_data, str):
            with open(eval_data, 'r') as f:
                eval_examples = json.load(f)
        else:
            eval_examples = eval_data

        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True)

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args["eval_batch_size"])

        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args['silent']):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                }

                if args['model_type'] != 'distilbert':
                    inputs['token_type_ids'] = None if args[
                        'model_type'] == 'xlm' else batch[2]

                example_indices = batch[3]

                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4], 'p_mask': batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(
            output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(
            output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(
            output_dir, "null_odds_{}.json".format(prefix))

        if args['model_type'] in ['xlnet', 'xlm']:
            # XLNet uses a more complex post-processing procedure
            all_predictions, all_nbest_json, scores_diff_json = write_predictions_extended(
                examples, features, all_results, args['n_best_size'],
                args['max_answer_length'], output_prediction_file,
                output_nbest_file, output_null_log_odds_file, eval_data,
                model.config.start_n_top, model.config.end_n_top, True,
                tokenizer, not args['silent'])
        else:
            all_predictions, all_nbest_json, scores_diff_json = write_predictions(
                examples, features, all_results, args['n_best_size'],
                args['max_answer_length'], False, output_prediction_file,
                output_nbest_file, output_null_log_odds_file,
                not args['silent'], True, args['null_score_diff_threshold'])

        return all_predictions, all_nbest_json, scores_diff_json
Esempio n. 2
0
    def evaluate(self, eval_data, output_dir):
        """
        Evaluates the model on eval_data.

        Utility function to be used by the eval_model() method. Not intended to be used directly.
        """
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        if isinstance(eval_data, str):
            with open(eval_data, "r") as f:
                eval_examples = json.load(f)
        else:
            eval_examples = eval_data

        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True
        )

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])

        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args["silent"]):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                }

                if args["model_type"] != "distilbert":
                    inputs["token_type_ids"] = None if args["model_type"] == "xlm" else batch[2]

                example_indices = batch[3]

                if args["model_type"] in ["xlnet", "xlm"]:
                    inputs.update({"cls_index": batch[4], "p_mask": batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args["model_type"] in ["xlnet", "xlm"]:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]),
                        )
                    else:
                        result = RawResult(
                            unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i]),
                        )
                    all_results.append(result)

        prefix = "test"
        os.makedirs(output_dir, exist_ok=True)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

        if args["model_type"] in ["xlnet", "xlm"]:
            # XLNet uses a more complex post-processing procedure
            (all_predictions, all_nbest_json, scores_diff_json,) = write_predictions_extended(
                examples,
                features,
                all_results,
                args["n_best_size"],
                args["max_answer_length"],
                output_prediction_file,
                output_nbest_file,
                output_null_log_odds_file,
                eval_data,
                model.config.start_n_top,
                model.config.end_n_top,
                True,
                tokenizer,
                not args["silent"],
            )
        else:
            all_predictions, all_nbest_json, scores_diff_json = write_predictions(
                examples,
                features,
                all_results,
                args["n_best_size"],
                args["max_answer_length"],
                False,
                output_prediction_file,
                output_nbest_file,
                output_null_log_odds_file,
                not args["silent"],
                True,
                args["null_score_diff_threshold"],
            )

        return all_predictions, all_nbest_json, scores_diff_json