def concat_all(self, tensor: Union[Tensor, List[Tensor]], padding_index: int = -100): if type(tensor) is list and len(tensor) == 1: tensor = tensor[0] self.collector = tensor if self.collector is None else nested_concat( self.collector, tensor, padding_index)
def prediction_loop(self, data_loader, world_size): num_examples = len(data_loader.dataset) batch_size = data_loader.batch_size eval_losses_gatherer = DistributedTensorGatherer( world_size, num_examples, make_multiple_of=batch_size) preds_gatherer = DistributedTensorGatherer(world_size, num_examples) labels_gatherer = DistributedTensorGatherer(world_size, num_examples) losses_host, preds_host, labels_host = None, None, None self.model.eval() for step, inputs in enumerate(data_loader): loss, logits, labels = self.prediction_step(inputs) losses = loss.repeat(batch_size) losses_host = losses if losses_host is None else torch.cat( (losses_host, losses), dim=0) preds_host = logits if preds_host is None else trainer_pt_utils.nested_concat( preds_host, logits, padding_index=-100) labels_host = labels if labels_host is None else trainer_pt_utils.nested_concat( labels_host, labels, padding_index=-100) eval_losses_gatherer.add_arrays( trainer_pt_utils.nested_numpify(losses_host)) preds_gatherer.add_arrays( trainer_pt_utils.nested_numpify(preds_host)) labels_gatherer.add_arrays( trainer_pt_utils.nested_numpify(labels_host)) losses_host, preds_host, labels_host = None, None, None eval_loss = eval_losses_gatherer.finalize() preds = preds_gatherer.finalize() labels_ids = labels_gatherer.finalize() if self.type_score == "PER": preds_ids = np.argmax(preds, axis=-1) predicted_phonemes = self.processor.batch_decode( torch.from_numpy(preds_ids)) true_phonemes = self.processor.batch_decode( torch.from_numpy(labels_ids)) per = generate_per_score(true_phonemes, predicted_phonemes) return per elif self.type_score == "WER": pred = EvalPrediction(predictions=preds, label_ids=labels_ids) pred_logits = pred.predictions pred_ids = np.argmax(pred_logits, axis=-1) pred.label_ids[pred.label_ids == -100] = self.processor.tokenizer.pad_token_id pred_str = self.processor.batch_decode(pred_ids) # we do not want to group tokens when computing the metrics label_str = self.processor.batch_decode(pred.label_ids, group_tokens=False) metrics = compute_wer(pred_str, label_str) metrics = denumpify_detensorize(metrics) metrics["t_loss"] = eval_loss.mean().item() wer = PredictionOutput(preds, labels_ids, metrics).metrics["wer"] return wer
start_logits, end_logits = outputs start_logits = torch.tensor(start_logits) end_logits = torch.tensor(end_logits) # necessary to pad predictions and labels for being gathered start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100) logits = (accelerator.gather(start_logits).cpu().numpy(), accelerator.gather(end_logits).cpu().numpy()) all_preds = logits if all_preds is None else nested_concat( all_preds, logits, padding_index=-100) if all_preds is not None: all_preds = nested_truncate(all_preds, len(eval_dataset)) evalTime = timeit.default_timer() - start_time logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(eval_dataset)) # Inference time from TRT logger.info("Average Inference Time = {:.3f} ms".format(total_time * 1000 / niter)) logger.info("Total Inference Time = {:.3f} ms".format(total_time * 1000)) logger.info("Total Number of Inference = %d", niter) prediction = post_processing_function(eval_examples, eval_dataset, all_preds) eval_metric = metric.compute(predictions=prediction.predictions,
def prediction_loop( self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "eval", ) -> PredictionOutput: """ Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`. Works both with or without labels. """ if not isinstance(dataloader.dataset, collections.abc.Sized): raise ValueError("dataset must implement __len__") prediction_loss_only = ( prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only ) model = self.model # multi-gpu eval if self.args.n_gpu > 1: model = torch.nn.DataParallel(model) # Note: in torch.distributed mode, there's no point in wrapping the model # inside a DistributedDataParallel as we'll be under `no_grad` anyways. batch_size = dataloader.batch_size num_examples = self.num_examples(dataloader) logger.info("***** Running %s *****", description) logger.info(" Num examples = %d", num_examples) logger.info(" Batch size = %d", batch_size) losses_host: torch.Tensor = None preds_host: Union[torch.Tensor, List[torch.Tensor]] = None labels_host: Union[torch.Tensor, List[torch.Tensor]] = None world_size = 1 if is_torch_tpu_available(): world_size = xm.xrt_world_size() elif self.args.local_rank != -1: world_size = torch.distributed.get_world_size() world_size = max(1, world_size) eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size) if not prediction_loss_only: preds_gatherer = DistributedTensorGatherer(world_size, num_examples) labels_gatherer = DistributedTensorGatherer(world_size, num_examples) model.eval() if is_torch_tpu_available(): dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device) if self.args.past_index >= 0: self._past = None self.callback_handler.eval_dataloader = dataloader for step, inputs in enumerate(dataloader): loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) if loss is not None: losses = loss.repeat(batch_size) losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0) if logits is not None: # preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100) logits_reduced = logits.argmax(-1) preds_host = logits_reduced if preds_host is None else nested_concat(preds_host, logits_reduced, padding_index=-100) if labels is not None: labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100) self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control) # Gather all tensors and put them back on the CPU if we have done enough accumulation steps. if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0: eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses")) if not prediction_loss_only: preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) # Set back to None to begin a new accumulation losses_host, preds_host, labels_host = None, None, None if self.args.past_index and hasattr(self, "_past"): # Clean the state at the end of the evaluation loop delattr(self, "_past") # Gather all remaining tensors and put them back on the CPU eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses")) if not prediction_loss_only: preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) eval_loss = eval_losses_gatherer.finalize() preds = preds_gatherer.finalize() if not prediction_loss_only else None label_ids = labels_gatherer.finalize() if not prediction_loss_only else None if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids)) else: metrics = {} if eval_loss is not None: metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item() # Prefix all keys with metric_key_prefix + '_' for key in list(metrics.keys()): if not key.startswith(f"{metric_key_prefix}_"): metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
def evaluate(self, dataset, data_collator=None, description="", metric_key_prefix="eval", compute_metrics=None): # predicition with single device eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader( dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size, collate_fn=self.data_collator if data_collator is None else data_collator, num_workers=self.args.dataloader_num_workers) batch_size = eval_dataloader.batch_size num_examples = len(eval_dataloader.dataset) logger.info("***** Running {} *****".format(description)) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) losses_host: torch.Tensor = None preds_host: Union[torch.Tensor, List[torch.Tensor]] = None labels_host: Union[torch.Tensor, List[torch.Tensor]] = None world_size = max(1, self.args.world_size) compute_metrics = self.compute_metrics if compute_metrics is None else compute_metrics prediction_loss_only = True if compute_metrics is None else None eval_losses_gatherer = DistributedTensorGatherer( world_size, num_examples, make_multiple_of=batch_size) if not prediction_loss_only: # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass # a batch size to the sampler) make_multiple_of = None if hasattr(eval_dataloader, "sampler") and isinstance( eval_dataloader.sampler, SequentialDistributedSampler): make_multiple_of = eval_dataloader.sampler.batch_size preds_gatherer = DistributedTensorGatherer( world_size, num_examples, make_multiple_of=make_multiple_of) labels_gatherer = DistributedTensorGatherer( world_size, num_examples, make_multiple_of=make_multiple_of) model = self._wrap_model(self.model) model.eval() all_example_ids = [] start_time = timeit.default_timer() for step, inputs in enumerate(tqdm(eval_dataloader)): if 'example_ids' in inputs.keys(): example_ids = inputs.pop('example_ids') all_example_ids += example_ids loss, logits, labels = self.prediction_step( model, inputs, prediction_loss_only) if loss is not None: losses = loss.repeat(eval_dataloader.batch_size) losses_host = losses if losses_host is None else torch.cat( (losses_host, losses), dim=0) if logits is not None: preds_host = logits if preds_host is None else nested_concat( preds_host, logits, padding_index=-100) if labels is not None: labels_host = labels if labels_host is None else nested_concat( labels_host, labels, padding_index=-100) # Gather all remaining tensors and put them back on the CPU eval_losses_gatherer.add_arrays(nested_numpify(losses_host)) if not prediction_loss_only: preds_gatherer.add_arrays(nested_numpify(preds_host)) labels_gatherer.add_arrays(nested_numpify(labels_host)) eval_loss = eval_losses_gatherer.finalize() preds = preds_gatherer.finalize() if not prediction_loss_only else None label_ids = labels_gatherer.finalize( ) if not prediction_loss_only else None if compute_metrics is not None and preds is not None and label_ids is not None: metrics = compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids), all_example_ids=all_example_ids if len(all_example_ids) > 0 else None) else: metrics = {} # To be JSON-serializable, we need to remove numpy types or zero-d tensors metrics = denumpify_detensorize(metrics) eval_time = timeit.default_timer() - start_time logger.info(" Evaluation done in total %f secs (%f sec per example)", eval_time, eval_time / len(dataset)) if eval_loss is not None: metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item() # Prefix all keys with metric_key_prefix + '_' for key in list(metrics.keys()): if not key.startswith(f"{metric_key_prefix}_"): metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key) return PredictionOutput( predictions=preds, label_ids=label_ids, metrics=metrics, example_ids=None if len(all_example_ids) == 0 else all_example_ids)
def prediction_loop( self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "eval", ) -> PredictionOutput: """ Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`. Works both with or without labels. """ if not isinstance(dataloader.dataset, collections.abc.Sized): raise ValueError("dataset must implement __len__") prediction_loss_only = ( prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only ) if self.args.deepspeed and not self.args.do_train: # no harm, but flagging to the user that deepspeed config is ignored for eval # flagging only for when --do_train wasn't passed as only then it's redundant logger.info("Detected the deepspeed argument but it will not be used for evaluation") model = self.model # multi-gpu eval if self.args.n_gpu > 1: model = torch.nn.DataParallel(model) # Note: in torch.distributed mode, there's no point in wrapping the model # inside a DistributedDataParallel as we'll be under `no_grad` anyways # if full fp16 is wanted on eval and this ``evaluation`` or ``predict`` isn't called while # ``train`` is running, half it first and then put on device batch_size = dataloader.batch_size num_examples = self.num_examples(dataloader) logger.info("***** Running %s *****", description) logger.info(" Num examples = %d", num_examples) logger.info(" Batch size = %d", batch_size) losses_host: torch.Tensor = None preds_host: Union[torch.Tensor, List[torch.Tensor]] = None labels_host: Union[torch.Tensor, List[torch.Tensor]] = None gumbel_host: Union[torch.Tensor, List[torch.Tensor]] = None sentence_labels_host: Union[torch.Tensor, List[torch.Tensor]] = None sentence_indicator_host: Union[torch.Tensor, List[torch.Tensor]] = None world_size = 1 if is_torch_tpu_available(): world_size = xm.xrt_world_size() elif self.args.local_rank != -1: world_size = dist.get_world_size() world_size = max(1, world_size) eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size) if not prediction_loss_only: preds_gatherer = DistributedTensorGatherer(world_size, num_examples) labels_gatherer = DistributedTensorGatherer(world_size, num_examples) gumbel_gatherer = DistributedTensorGatherer(world_size, num_examples) sentence_labels_gatherer = DistributedTensorGatherer(world_size, num_examples) sentence_indicator_gatherer = DistributedTensorGatherer(world_size, num_examples) model.eval() if self.args.past_index >= 0: self._past = None self.callback_handler.eval_dataloader = dataloader for step, inputs in enumerate(dataloader): loss, logits, labels, gumbel_output, sentence_labels, sentence_indicator = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) if loss is not None: losses = loss.repeat(batch_size) losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0) if logits is not None: preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100) if labels is not None: labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100) if gumbel_output is not None: gumbel_host = gumbel_output if gumbel_host is None else nested_concat(gumbel_host, gumbel_output, padding_index=-1) if sentence_labels is not None: sentence_labels_host = sentence_labels if sentence_labels_host is None else nested_concat(sentence_labels_host, sentence_labels, padding_index=-1) if sentence_indicator is not None: sentence_indicator_host = sentence_indicator if sentence_indicator_host is None else nested_concat(sentence_indicator_host, sentence_indicator, padding_index=-100) self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control) # Gather all tensors and put them back on the CPU if we have done enough accumulation steps. if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0: eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses")) if not prediction_loss_only: preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) gumbel_gatherer.add_arrays(self._gather_and_numpify(gumbel_host, "eval_gumbel_output")) sentence_labels_gatherer.add_arrays(self._gather_and_numpify(sentence_labels_host, "eval_sentence_idxs")) sentence_indicator_gatherer.add_arrays(self._gather_and_numpify(sentence_indicator_host, "eval_sentence_indicator")) # Set back to None to begin a new accumulation losses_host, preds_host, labels_host, gumbel_host, sentence_labels_host, sentence_indicator_host = None, None, None, None, None, None if self.args.past_index and hasattr(self, "_past"): # Clean the state at the end of the evaluation loop delattr(self, "_past") # Gather all remaining tensors and put them back on the CPU eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses")) if not prediction_loss_only: preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) gumbel_gatherer.add_arrays(self._gather_and_numpify(gumbel_host, "eval_gumbel_output")) sentence_labels_gatherer.add_arrays(self._gather_and_numpify(sentence_labels_host, "eval_sentence_idxs")) sentence_indicator_gatherer.add_arrays(self._gather_and_numpify(sentence_indicator_host, "eval_sentence_indicator")) eval_loss = eval_losses_gatherer.finalize() preds = preds_gatherer.finalize() if not prediction_loss_only else None label_ids = labels_gatherer.finalize() if not prediction_loss_only else None gumbel_outputs = gumbel_gatherer.finalize() if not prediction_loss_only else None sentence_idxs = sentence_labels_gatherer.finalize() if not prediction_loss_only else None sentence_indicators = sentence_indicator_gatherer.finalize() if not prediction_loss_only else None print(sentence_idxs, 'test') if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics(preds, label_ids, gumbel_outputs, sentence_idxs, sentence_indicators) else: metrics = {} if eval_loss is not None: metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item() # Prefix all keys with metric_key_prefix + '_' for key in list(metrics.keys()): if not key.startswith(f"{metric_key_prefix}_"): metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)