예제 #1
0
    def predict(self, to_predict, n_best_size=None):
        """
        Performs predictions on a list of python dicts containing contexts and qas.

        Args:
            to_predict: A python list of python dicts containing contexts and questions to be sent to the model for prediction.
                        E.g: predict([
                            {
                                'context': "Some context as a demo",
                                'qas': [
                                    {'id': '0', 'question': 'What is the context here?'},
                                    {'id': '1', 'question': 'What is this for?'}
                                ]
                            }
                        ])
            n_best_size (Optional): Number of predictions to return. args['n_best_size'] will be used if not specified.

        Returns:
            preds: A python list containg the predicted answer, and id for each question in to_predict.
        """
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        if not n_best_size:
            n_best_size = args['n_best_size']

        self._move_model_to_device()

        eval_examples = build_examples(to_predict)
        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True, no_cache=True)

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args["eval_batch_size"])

        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args['silent']):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                }

                if args['model_type'] != 'distilbert':
                    inputs['token_type_ids'] = None if args[
                        'model_type'] == 'xlm' else batch[2]

                example_indices = batch[3]

                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4], 'p_mask': batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        if args['model_type'] in ['xlnet', 'xlm']:
            answers = get_best_predictions_extended(
                examples, features, all_results, n_best_size,
                args['max_answer_length'], model.config.start_n_top,
                model.config.end_n_top, True, tokenizer,
                args['null_score_diff_threshold'])
        else:
            answers = get_best_predictions(examples, features, all_results,
                                           n_best_size,
                                           args['max_answer_length'], False,
                                           False, True, False)

        return answers
예제 #2
0
    def evaluate(self, eval_data, output_dir):
        """
        Evaluates the model on eval_data.

        Utility function to be used by the eval_model() method. Not intended to be used directly.
        """
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        if isinstance(eval_data, str):
            with open(eval_data, "r") as f:
                eval_examples = json.load(f)
        else:
            eval_examples = eval_data

        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True
        )

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])

        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args["silent"]):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                }

                if args["model_type"] != "distilbert":
                    inputs["token_type_ids"] = None if args["model_type"] == "xlm" else batch[2]

                example_indices = batch[3]

                if args["model_type"] in ["xlnet", "xlm"]:
                    inputs.update({"cls_index": batch[4], "p_mask": batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args["model_type"] in ["xlnet", "xlm"]:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]),
                        )
                    else:
                        result = RawResult(
                            unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i]),
                        )
                    all_results.append(result)

        prefix = "test"
        os.makedirs(output_dir, exist_ok=True)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

        if args["model_type"] in ["xlnet", "xlm"]:
            # XLNet uses a more complex post-processing procedure
            (all_predictions, all_nbest_json, scores_diff_json,) = write_predictions_extended(
                examples,
                features,
                all_results,
                args["n_best_size"],
                args["max_answer_length"],
                output_prediction_file,
                output_nbest_file,
                output_null_log_odds_file,
                eval_data,
                model.config.start_n_top,
                model.config.end_n_top,
                True,
                tokenizer,
                not args["silent"],
            )
        else:
            all_predictions, all_nbest_json, scores_diff_json = write_predictions(
                examples,
                features,
                all_results,
                args["n_best_size"],
                args["max_answer_length"],
                False,
                output_prediction_file,
                output_nbest_file,
                output_null_log_odds_file,
                not args["silent"],
                True,
                args["null_score_diff_threshold"],
            )

        return all_predictions, all_nbest_json, scores_diff_json
예제 #3
0
    def evaluate(self, eval_data, output_dir):
        """
        Evaluates the model on eval_data.

        Utility function to be used by the eval_model() method. Not intended to be used directly.
        """
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args
        eval_output_dir = output_dir

        results = {}

        if isinstance(eval_data, str):
            with open(eval_data, 'r') as f:
                eval_examples = json.load(f)
        else:
            eval_examples = eval_data

        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True)

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args["eval_batch_size"])

        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args['silent']):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                }

                if args['model_type'] != 'distilbert':
                    inputs['token_type_ids'] = None if args[
                        'model_type'] == 'xlm' else batch[2]

                example_indices = batch[3]

                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4], 'p_mask': batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(
            output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(
            output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(
            output_dir, "null_odds_{}.json".format(prefix))

        if args['model_type'] in ['xlnet', 'xlm']:
            # XLNet uses a more complex post-processing procedure
            all_predictions, all_nbest_json, scores_diff_json = write_predictions_extended(
                examples, features, all_results, args['n_best_size'],
                args['max_answer_length'], output_prediction_file,
                output_nbest_file, output_null_log_odds_file, eval_data,
                model.config.start_n_top, model.config.end_n_top, True,
                tokenizer, not args['silent'])
        else:
            all_predictions, all_nbest_json, scores_diff_json = write_predictions(
                examples, features, all_results, args['n_best_size'],
                args['max_answer_length'], False, output_prediction_file,
                output_nbest_file, output_null_log_odds_file,
                not args['silent'], True, args['null_score_diff_threshold'])

        return all_predictions, all_nbest_json, scores_diff_json
    def predict(self, to_predict, n_best_size=None):
        """
        Performs predictions on a list of python dicts containing contexts and qas.

        Args:
            to_predict: A python list of python dicts containing contexts and questions to be sent to the model for prediction.
                        E.g: predict([
                            {
                                'context': "Some context as a demo",
                                'qas': [
                                    {'id': '0', 'question': 'What is the context here?'},
                                    {'id': '1', 'question': 'What is this for?'}
                                ]
                            }
                        ])
            n_best_size (Optional): Number of predictions to return. args.n_best_size will be used if not specified.

        Returns:
            preds: A python list containing the predicted answer, and id for each question in to_predict.
        """  # noqa: ignore flake8"
        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        if not n_best_size:
            n_best_size = args.n_best_size

        self._move_model_to_device()

        eval_examples = build_examples(to_predict)
        eval_dataset, examples, features = self.load_and_cache_examples(
            eval_examples, evaluate=True, output_examples=True, no_cache=True
        )

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

        model.eval()

        all_results = []
        for batch in tqdm(eval_dataloader, disable=args.silent, desc="Running Prediction"):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                }

                if self.args.model_type in ["xlm", "roberta", "distilbert", "camembert", "electra", "xlmroberta"]:
                    del inputs["token_type_ids"]

                example_indices = batch[3]

                if args.model_type in ["xlnet", "xlm"]:
                    inputs.update({"cls_index": batch[4], "p_mask": batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args.model_type in ["xlnet", "xlm"]:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(
                            unique_id=unique_id,
                            start_top_log_probs=to_list(outputs[0][i]),
                            start_top_index=to_list(outputs[1][i]),
                            end_top_log_probs=to_list(outputs[2][i]),
                            end_top_index=to_list(outputs[3][i]),
                            cls_logits=to_list(outputs[4][i]),
                        )
                    else:
                        result = RawResult(
                            unique_id=unique_id,
                            start_logits=to_list(outputs[0][i]),
                            end_logits=to_list(outputs[1][i]),
                        )
                    all_results.append(result)

        if args.model_type in ["xlnet", "xlm"]:
            answers = get_best_predictions_extended(
                examples,
                features,
                all_results,
                n_best_size,
                args.max_answer_length,
                model.config.start_n_top,
                model.config.end_n_top,
                True,
                tokenizer,
                args.null_score_diff_threshold,
            )
        else:
            answers = get_best_predictions(
                examples, features, all_results, n_best_size, args.max_answer_length, False, False, True, False,
            )

        answer_list = [{"id": answer["id"], "answer": answer["answer"]} for answer in answers]
        probability_list = [{"id": answer["id"], "probability": answer["probability"]} for answer in answers]

        return answer_list, probability_list