def post_processing_function(examples, features, predictions, stage="eval"): # Post-processing: we match the start logits and end logits to answers in the original context. predictions, scores_diff_json = postprocess_qa_predictions_with_beam_search( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, start_n_top=model.config.start_n_top, end_n_top=model.config.end_n_top, output_dir=training_args.output_dir, is_world_process_zero=trainer.is_world_process_zero(), prefix=stage, ) # Format the result to the format the metric expects. if data_args.version_2_with_negative: formatted_predictions = [ {"id": k, "prediction_text": v, "no_answer_probability": scores_diff_json[k]} for k, v in predictions.items() ] else: formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def post_processing_function(examples, features, predictions, stage="eval"): # Post-processing: we match the start logits and end logits to # answers in the original context. if data_args.beam_search: predictions, scores_diff_json = \ postprocess_qa_predictions_with_beam_search( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, start_n_top=model.config.start_n_top, end_n_top=model.config.end_n_top, output_dir=training_args.output_dir, # log_level=log_level, prefix=stage, ) else: predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, output_dir=training_args.output_dir, prefix=stage, ) if data_args.version_2_with_negative: if data_args.beam_search: formatted_predictions = [ { "id": k, "prediction_text": v, "no_answer_probability": scores_diff_json[k] } # noqa E501 for k, v in predictions.items() ] else: formatted_predictions = [ { "id": k, "prediction_text": v, "no_answer_probability": 0.0 } for k, v in predictions.items() # noqa E501 ] else: formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in predictions.items()] # noqa E501 references = [{ "id": ex["id"], "answers": ex[answer_column_name] } for ex in examples] # noqa E501 return EvalPrediction(predictions=formatted_predictions, label_ids=references)