def predict(self, to_predict, n_best_size=None): """ Performs predictions on a list of python dicts containing contexts and qas. Args: to_predict: A python list of python dicts containing contexts and questions to be sent to the model for prediction. E.g: predict([ { 'context': "Some context as a demo", 'qas': [ {'id': '0', 'question': 'What is the context here?'}, {'id': '1', 'question': 'What is this for?'} ] } ]) n_best_size (Optional): Number of predictions to return. args['n_best_size'] will be used if not specified. Returns: preds: A python list containg the predicted answer, and id for each question in to_predict. """ tokenizer = self.tokenizer device = self.device model = self.model args = self.args if not n_best_size: n_best_size = args['n_best_size'] self._move_model_to_device() eval_examples = build_examples(to_predict) eval_dataset, examples, features = self.load_and_cache_examples( eval_examples, evaluate=True, output_examples=True, no_cache=True) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"]) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None model.eval() all_results = [] for batch in tqdm(eval_dataloader, disable=args['silent']): batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], } if args['model_type'] != 'distilbert': inputs['token_type_ids'] = None if args[ 'model_type'] == 'xlm' else batch[2] example_indices = batch[3] if args['model_type'] in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[4], 'p_mask': batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) if args['model_type'] in ['xlnet', 'xlm']: # XLNet uses a more complex post-processing procedure result = RawResultExtended( unique_id=unique_id, start_top_log_probs=to_list(outputs[0][i]), start_top_index=to_list(outputs[1][i]), end_top_log_probs=to_list(outputs[2][i]), end_top_index=to_list(outputs[3][i]), cls_logits=to_list(outputs[4][i])) else: result = RawResult(unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i])) all_results.append(result) if args['model_type'] in ['xlnet', 'xlm']: answers = get_best_predictions_extended( examples, features, all_results, n_best_size, args['max_answer_length'], model.config.start_n_top, model.config.end_n_top, True, tokenizer, args['null_score_diff_threshold']) else: answers = get_best_predictions(examples, features, all_results, n_best_size, args['max_answer_length'], False, False, True, False) return answers
def evaluate(self, eval_data, output_dir): """ Evaluates the model on eval_data. Utility function to be used by the eval_model() method. Not intended to be used directly. """ tokenizer = self.tokenizer device = self.device model = self.model args = self.args if isinstance(eval_data, str): with open(eval_data, "r") as f: eval_examples = json.load(f) else: eval_examples = eval_data eval_dataset, examples, features = self.load_and_cache_examples( eval_examples, evaluate=True, output_examples=True ) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"]) model.eval() all_results = [] for batch in tqdm(eval_dataloader, disable=args["silent"]): batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], } if args["model_type"] != "distilbert": inputs["token_type_ids"] = None if args["model_type"] == "xlm" else batch[2] example_indices = batch[3] if args["model_type"] in ["xlnet", "xlm"]: inputs.update({"cls_index": batch[4], "p_mask": batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) if args["model_type"] in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure result = RawResultExtended( unique_id=unique_id, start_top_log_probs=to_list(outputs[0][i]), start_top_index=to_list(outputs[1][i]), end_top_log_probs=to_list(outputs[2][i]), end_top_index=to_list(outputs[3][i]), cls_logits=to_list(outputs[4][i]), ) else: result = RawResult( unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i]), ) all_results.append(result) prefix = "test" os.makedirs(output_dir, exist_ok=True) output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix)) output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix)) if args["model_type"] in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure (all_predictions, all_nbest_json, scores_diff_json,) = write_predictions_extended( examples, features, all_results, args["n_best_size"], args["max_answer_length"], output_prediction_file, output_nbest_file, output_null_log_odds_file, eval_data, model.config.start_n_top, model.config.end_n_top, True, tokenizer, not args["silent"], ) else: all_predictions, all_nbest_json, scores_diff_json = write_predictions( examples, features, all_results, args["n_best_size"], args["max_answer_length"], False, output_prediction_file, output_nbest_file, output_null_log_odds_file, not args["silent"], True, args["null_score_diff_threshold"], ) return all_predictions, all_nbest_json, scores_diff_json
def evaluate(self, eval_data, output_dir): """ Evaluates the model on eval_data. Utility function to be used by the eval_model() method. Not intended to be used directly. """ tokenizer = self.tokenizer device = self.device model = self.model args = self.args eval_output_dir = output_dir results = {} if isinstance(eval_data, str): with open(eval_data, 'r') as f: eval_examples = json.load(f) else: eval_examples = eval_data eval_dataset, examples, features = self.load_and_cache_examples( eval_examples, evaluate=True, output_examples=True) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"]) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None model.eval() all_results = [] for batch in tqdm(eval_dataloader, disable=args['silent']): batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], } if args['model_type'] != 'distilbert': inputs['token_type_ids'] = None if args[ 'model_type'] == 'xlm' else batch[2] example_indices = batch[3] if args['model_type'] in ['xlnet', 'xlm']: inputs.update({'cls_index': batch[4], 'p_mask': batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) if args['model_type'] in ['xlnet', 'xlm']: # XLNet uses a more complex post-processing procedure result = RawResultExtended( unique_id=unique_id, start_top_log_probs=to_list(outputs[0][i]), start_top_index=to_list(outputs[1][i]), end_top_log_probs=to_list(outputs[2][i]), end_top_index=to_list(outputs[3][i]), cls_logits=to_list(outputs[4][i])) else: result = RawResult(unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i])) all_results.append(result) prefix = 'test' if not os.path.isdir(output_dir): os.mkdir(output_dir) output_prediction_file = os.path.join( output_dir, "predictions_{}.json".format(prefix)) output_nbest_file = os.path.join( output_dir, "nbest_predictions_{}.json".format(prefix)) output_null_log_odds_file = os.path.join( output_dir, "null_odds_{}.json".format(prefix)) if args['model_type'] in ['xlnet', 'xlm']: # XLNet uses a more complex post-processing procedure all_predictions, all_nbest_json, scores_diff_json = write_predictions_extended( examples, features, all_results, args['n_best_size'], args['max_answer_length'], output_prediction_file, output_nbest_file, output_null_log_odds_file, eval_data, model.config.start_n_top, model.config.end_n_top, True, tokenizer, not args['silent']) else: all_predictions, all_nbest_json, scores_diff_json = write_predictions( examples, features, all_results, args['n_best_size'], args['max_answer_length'], False, output_prediction_file, output_nbest_file, output_null_log_odds_file, not args['silent'], True, args['null_score_diff_threshold']) return all_predictions, all_nbest_json, scores_diff_json
def predict(self, to_predict, n_best_size=None): """ Performs predictions on a list of python dicts containing contexts and qas. Args: to_predict: A python list of python dicts containing contexts and questions to be sent to the model for prediction. E.g: predict([ { 'context': "Some context as a demo", 'qas': [ {'id': '0', 'question': 'What is the context here?'}, {'id': '1', 'question': 'What is this for?'} ] } ]) n_best_size (Optional): Number of predictions to return. args.n_best_size will be used if not specified. Returns: preds: A python list containing the predicted answer, and id for each question in to_predict. """ # noqa: ignore flake8" tokenizer = self.tokenizer device = self.device model = self.model args = self.args if not n_best_size: n_best_size = args.n_best_size self._move_model_to_device() eval_examples = build_examples(to_predict) eval_dataset, examples, features = self.load_and_cache_examples( eval_examples, evaluate=True, output_examples=True, no_cache=True ) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) model.eval() all_results = [] for batch in tqdm(eval_dataloader, disable=args.silent, desc="Running Prediction"): batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], } if self.args.model_type in ["xlm", "roberta", "distilbert", "camembert", "electra", "xlmroberta"]: del inputs["token_type_ids"] example_indices = batch[3] if args.model_type in ["xlnet", "xlm"]: inputs.update({"cls_index": batch[4], "p_mask": batch[5]}) outputs = model(**inputs) for i, example_index in enumerate(example_indices): eval_feature = features[example_index.item()] unique_id = int(eval_feature.unique_id) if args.model_type in ["xlnet", "xlm"]: # XLNet uses a more complex post-processing procedure result = RawResultExtended( unique_id=unique_id, start_top_log_probs=to_list(outputs[0][i]), start_top_index=to_list(outputs[1][i]), end_top_log_probs=to_list(outputs[2][i]), end_top_index=to_list(outputs[3][i]), cls_logits=to_list(outputs[4][i]), ) else: result = RawResult( unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i]), ) all_results.append(result) if args.model_type in ["xlnet", "xlm"]: answers = get_best_predictions_extended( examples, features, all_results, n_best_size, args.max_answer_length, model.config.start_n_top, model.config.end_n_top, True, tokenizer, args.null_score_diff_threshold, ) else: answers = get_best_predictions( examples, features, all_results, n_best_size, args.max_answer_length, False, False, True, False, ) answer_list = [{"id": answer["id"], "answer": answer["answer"]} for answer in answers] probability_list = [{"id": answer["id"], "probability": answer["probability"]} for answer in answers] return answer_list, probability_list