Esempio n. 1
0
 def concat_all(self,
                tensor: Union[Tensor, List[Tensor]],
                padding_index: int = -100):
     if type(tensor) is list and len(tensor) == 1:
         tensor = tensor[0]
     self.collector = tensor if self.collector is None else nested_concat(
         self.collector, tensor, padding_index)
Esempio n. 2
0
    def prediction_loop(self, data_loader, world_size):
        num_examples = len(data_loader.dataset)
        batch_size = data_loader.batch_size
        eval_losses_gatherer = DistributedTensorGatherer(
            world_size, num_examples, make_multiple_of=batch_size)
        preds_gatherer = DistributedTensorGatherer(world_size, num_examples)
        labels_gatherer = DistributedTensorGatherer(world_size, num_examples)
        losses_host, preds_host, labels_host = None, None, None
        self.model.eval()

        for step, inputs in enumerate(data_loader):
            loss, logits, labels = self.prediction_step(inputs)
            losses = loss.repeat(batch_size)
            losses_host = losses if losses_host is None else torch.cat(
                (losses_host, losses), dim=0)
            preds_host = logits if preds_host is None else trainer_pt_utils.nested_concat(
                preds_host, logits, padding_index=-100)
            labels_host = labels if labels_host is None else trainer_pt_utils.nested_concat(
                labels_host, labels, padding_index=-100)
            eval_losses_gatherer.add_arrays(
                trainer_pt_utils.nested_numpify(losses_host))
            preds_gatherer.add_arrays(
                trainer_pt_utils.nested_numpify(preds_host))
            labels_gatherer.add_arrays(
                trainer_pt_utils.nested_numpify(labels_host))
            losses_host, preds_host, labels_host = None, None, None

        eval_loss = eval_losses_gatherer.finalize()
        preds = preds_gatherer.finalize()
        labels_ids = labels_gatherer.finalize()

        if self.type_score == "PER":
            preds_ids = np.argmax(preds, axis=-1)

            predicted_phonemes = self.processor.batch_decode(
                torch.from_numpy(preds_ids))
            true_phonemes = self.processor.batch_decode(
                torch.from_numpy(labels_ids))

            per = generate_per_score(true_phonemes, predicted_phonemes)

            return per

        elif self.type_score == "WER":
            pred = EvalPrediction(predictions=preds, label_ids=labels_ids)
            pred_logits = pred.predictions
            pred_ids = np.argmax(pred_logits, axis=-1)

            pred.label_ids[pred.label_ids ==
                           -100] = self.processor.tokenizer.pad_token_id

            pred_str = self.processor.batch_decode(pred_ids)

            # we do not want to group tokens when computing the metrics
            label_str = self.processor.batch_decode(pred.label_ids,
                                                    group_tokens=False)

            metrics = compute_wer(pred_str, label_str)
            metrics = denumpify_detensorize(metrics)
            metrics["t_loss"] = eval_loss.mean().item()
            wer = PredictionOutput(preds, labels_ids, metrics).metrics["wer"]

            return wer
        start_logits, end_logits = outputs
        start_logits = torch.tensor(start_logits)
        end_logits = torch.tensor(end_logits)

        # necessary to pad predictions and labels for being gathered
        start_logits = accelerator.pad_across_processes(start_logits,
                                                        dim=1,
                                                        pad_index=-100)
        end_logits = accelerator.pad_across_processes(end_logits,
                                                      dim=1,
                                                      pad_index=-100)

        logits = (accelerator.gather(start_logits).cpu().numpy(),
                  accelerator.gather(end_logits).cpu().numpy())
        all_preds = logits if all_preds is None else nested_concat(
            all_preds, logits, padding_index=-100)

    if all_preds is not None:
        all_preds = nested_truncate(all_preds, len(eval_dataset))

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)",
                evalTime, evalTime / len(eval_dataset))
    # Inference time from TRT
    logger.info("Average Inference Time = {:.3f} ms".format(total_time * 1000 /
                                                            niter))
    logger.info("Total Inference Time =  {:.3f} ms".format(total_time * 1000))
    logger.info("Total Number of Inference =  %d", niter)

prediction = post_processing_function(eval_examples, eval_dataset, all_preds)
eval_metric = metric.compute(predictions=prediction.predictions,
Esempio n. 4
0
    def prediction_loop(
        self,
        dataloader: DataLoader,
        description: str,
        prediction_loss_only: Optional[bool] = None,
        ignore_keys: Optional[List[str]] = None,
        metric_key_prefix: str = "eval",
    ) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.

        Works both with or without labels.
        """
        if not isinstance(dataloader.dataset, collections.abc.Sized):
            raise ValueError("dataset must implement __len__")
        prediction_loss_only = (
            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
        )

        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        # Note: in torch.distributed mode, there's no point in wrapping the model
        # inside a DistributedDataParallel as we'll be under `no_grad` anyways.

        batch_size = dataloader.batch_size
        num_examples = self.num_examples(dataloader)
        logger.info("***** Running %s *****", description)
        logger.info("  Num examples = %d", num_examples)
        logger.info("  Batch size = %d", batch_size)
        losses_host: torch.Tensor = None
        preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
        labels_host: Union[torch.Tensor, List[torch.Tensor]] = None

        world_size = 1
        if is_torch_tpu_available():
            world_size = xm.xrt_world_size()
        elif self.args.local_rank != -1:
            world_size = torch.distributed.get_world_size()
        world_size = max(1, world_size)

        eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
        if not prediction_loss_only:
            preds_gatherer = DistributedTensorGatherer(world_size, num_examples)
            labels_gatherer = DistributedTensorGatherer(world_size, num_examples)

        model.eval()

        if is_torch_tpu_available():
            dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)

        if self.args.past_index >= 0:
            self._past = None

        self.callback_handler.eval_dataloader = dataloader

        for step, inputs in enumerate(dataloader):
            loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
            if loss is not None:
                losses = loss.repeat(batch_size)
                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
            if logits is not None:
                # preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
                logits_reduced = logits.argmax(-1)
                preds_host = logits_reduced if preds_host is None else nested_concat(preds_host, logits_reduced, padding_index=-100)
            if labels is not None:
                labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
            self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control)

            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
            if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0:
                eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
                if not prediction_loss_only:
                    preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
                    labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))

                # Set back to None to begin a new accumulation
                losses_host, preds_host, labels_host = None, None, None

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        # Gather all remaining tensors and put them back on the CPU
        eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
        if not prediction_loss_only:
            preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
            labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))

        eval_loss = eval_losses_gatherer.finalize()
        preds = preds_gatherer.finalize() if not prediction_loss_only else None
        label_ids = labels_gatherer.finalize() if not prediction_loss_only else None

        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
        else:
            metrics = {}

        if eval_loss is not None:
            metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()

        # Prefix all keys with metric_key_prefix + '_'
        for key in list(metrics.keys()):
            if not key.startswith(f"{metric_key_prefix}_"):
                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)

        return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
Esempio n. 5
0
    def evaluate(self,
                 dataset,
                 data_collator=None,
                 description="",
                 metric_key_prefix="eval",
                 compute_metrics=None):
        # predicition with single device

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(
            dataset,
            sampler=eval_sampler,
            batch_size=self.args.eval_batch_size,
            collate_fn=self.data_collator
            if data_collator is None else data_collator,
            num_workers=self.args.dataloader_num_workers)

        batch_size = eval_dataloader.batch_size
        num_examples = len(eval_dataloader.dataset)
        logger.info("***** Running {} *****".format(description))
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        losses_host: torch.Tensor = None
        preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
        labels_host: Union[torch.Tensor, List[torch.Tensor]] = None

        world_size = max(1, self.args.world_size)
        compute_metrics = self.compute_metrics if compute_metrics is None else compute_metrics
        prediction_loss_only = True if compute_metrics is None else None

        eval_losses_gatherer = DistributedTensorGatherer(
            world_size, num_examples, make_multiple_of=batch_size)
        if not prediction_loss_only:
            # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
            # a batch size to the sampler)
            make_multiple_of = None
            if hasattr(eval_dataloader, "sampler") and isinstance(
                    eval_dataloader.sampler, SequentialDistributedSampler):
                make_multiple_of = eval_dataloader.sampler.batch_size
            preds_gatherer = DistributedTensorGatherer(
                world_size, num_examples, make_multiple_of=make_multiple_of)
            labels_gatherer = DistributedTensorGatherer(
                world_size, num_examples, make_multiple_of=make_multiple_of)

        model = self._wrap_model(self.model)
        model.eval()

        all_example_ids = []
        start_time = timeit.default_timer()
        for step, inputs in enumerate(tqdm(eval_dataloader)):
            if 'example_ids' in inputs.keys():
                example_ids = inputs.pop('example_ids')
                all_example_ids += example_ids
            loss, logits, labels = self.prediction_step(
                model, inputs, prediction_loss_only)

            if loss is not None:
                losses = loss.repeat(eval_dataloader.batch_size)
                losses_host = losses if losses_host is None else torch.cat(
                    (losses_host, losses), dim=0)
            if logits is not None:
                preds_host = logits if preds_host is None else nested_concat(
                    preds_host, logits, padding_index=-100)
            if labels is not None:
                labels_host = labels if labels_host is None else nested_concat(
                    labels_host, labels, padding_index=-100)

            # Gather all remaining tensors and put them back on the CPU
        eval_losses_gatherer.add_arrays(nested_numpify(losses_host))
        if not prediction_loss_only:
            preds_gatherer.add_arrays(nested_numpify(preds_host))
            labels_gatherer.add_arrays(nested_numpify(labels_host))

        eval_loss = eval_losses_gatherer.finalize()
        preds = preds_gatherer.finalize() if not prediction_loss_only else None
        label_ids = labels_gatherer.finalize(
        ) if not prediction_loss_only else None

        if compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = compute_metrics(EvalPrediction(predictions=preds,
                                                     label_ids=label_ids),
                                      all_example_ids=all_example_ids
                                      if len(all_example_ids) > 0 else None)
        else:
            metrics = {}

        # To be JSON-serializable, we need to remove numpy types or zero-d tensors
        metrics = denumpify_detensorize(metrics)

        eval_time = timeit.default_timer() - start_time
        logger.info("  Evaluation done in total %f secs (%f sec per example)",
                    eval_time, eval_time / len(dataset))

        if eval_loss is not None:
            metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()

        # Prefix all keys with metric_key_prefix + '_'
        for key in list(metrics.keys()):
            if not key.startswith(f"{metric_key_prefix}_"):
                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)

        return PredictionOutput(
            predictions=preds,
            label_ids=label_ids,
            metrics=metrics,
            example_ids=None if len(all_example_ids) == 0 else all_example_ids)
    def prediction_loop(
            self,
            dataloader: DataLoader,
            description: str,
            prediction_loss_only: Optional[bool] = None,
            ignore_keys: Optional[List[str]] = None,
            metric_key_prefix: str = "eval",
    ) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.
        Works both with or without labels.
        """
        if not isinstance(dataloader.dataset, collections.abc.Sized):
            raise ValueError("dataset must implement __len__")
        prediction_loss_only = (
            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
        )

        if self.args.deepspeed and not self.args.do_train:
            # no harm, but flagging to the user that deepspeed config is ignored for eval
            # flagging only for when --do_train wasn't passed as only then it's redundant
            logger.info("Detected the deepspeed argument but it will not be used for evaluation")

        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
            # Note: in torch.distributed mode, there's no point in wrapping the model
            # inside a DistributedDataParallel as we'll be under `no_grad` anyways

        # if full fp16 is wanted on eval and this ``evaluation`` or ``predict`` isn't called while
        # ``train`` is running, half it first and then put on device

        batch_size = dataloader.batch_size
        num_examples = self.num_examples(dataloader)
        logger.info("***** Running %s *****", description)
        logger.info("  Num examples = %d", num_examples)
        logger.info("  Batch size = %d", batch_size)
        losses_host: torch.Tensor = None
        preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
        labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
        gumbel_host: Union[torch.Tensor, List[torch.Tensor]] = None
        sentence_labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
        sentence_indicator_host: Union[torch.Tensor, List[torch.Tensor]] = None

        world_size = 1
        if is_torch_tpu_available():
            world_size = xm.xrt_world_size()
        elif self.args.local_rank != -1:
            world_size = dist.get_world_size()
        world_size = max(1, world_size)

        eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
        if not prediction_loss_only:
            preds_gatherer = DistributedTensorGatherer(world_size, num_examples)
            labels_gatherer = DistributedTensorGatherer(world_size, num_examples)
            gumbel_gatherer = DistributedTensorGatherer(world_size, num_examples)
            sentence_labels_gatherer = DistributedTensorGatherer(world_size, num_examples)
            sentence_indicator_gatherer = DistributedTensorGatherer(world_size, num_examples)

        model.eval()

        if self.args.past_index >= 0:
            self._past = None

        self.callback_handler.eval_dataloader = dataloader

        for step, inputs in enumerate(dataloader):
            loss, logits, labels, gumbel_output, sentence_labels, sentence_indicator = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)

            if loss is not None:
                losses = loss.repeat(batch_size)
                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
            if logits is not None:
                preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
            if labels is not None:
                labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
            if gumbel_output is not None:
                gumbel_host = gumbel_output if gumbel_host is None else nested_concat(gumbel_host, gumbel_output, padding_index=-1)
            if sentence_labels is not None:
                sentence_labels_host = sentence_labels if sentence_labels_host is None else nested_concat(sentence_labels_host, sentence_labels, padding_index=-1)
            if sentence_indicator is not None:
                sentence_indicator_host = sentence_indicator if sentence_indicator_host is None else nested_concat(sentence_indicator_host, sentence_indicator, padding_index=-100)

            self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control)

            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
            if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0:
                eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
                if not prediction_loss_only:
                    preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
                    labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
                    gumbel_gatherer.add_arrays(self._gather_and_numpify(gumbel_host, "eval_gumbel_output"))
                    sentence_labels_gatherer.add_arrays(self._gather_and_numpify(sentence_labels_host, "eval_sentence_idxs"))
                    sentence_indicator_gatherer.add_arrays(self._gather_and_numpify(sentence_indicator_host, "eval_sentence_indicator"))

                # Set back to None to begin a new accumulation
                losses_host, preds_host, labels_host, gumbel_host, sentence_labels_host, sentence_indicator_host = None, None, None, None, None, None

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        # Gather all remaining tensors and put them back on the CPU
        eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
        if not prediction_loss_only:
            preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
            labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
            gumbel_gatherer.add_arrays(self._gather_and_numpify(gumbel_host, "eval_gumbel_output"))
            sentence_labels_gatherer.add_arrays(self._gather_and_numpify(sentence_labels_host, "eval_sentence_idxs"))
            sentence_indicator_gatherer.add_arrays(self._gather_and_numpify(sentence_indicator_host, "eval_sentence_indicator"))

        eval_loss = eval_losses_gatherer.finalize()
        preds = preds_gatherer.finalize() if not prediction_loss_only else None
        label_ids = labels_gatherer.finalize() if not prediction_loss_only else None
        gumbel_outputs = gumbel_gatherer.finalize() if not prediction_loss_only else None
        sentence_idxs = sentence_labels_gatherer.finalize() if not prediction_loss_only else None
        sentence_indicators = sentence_indicator_gatherer.finalize() if not prediction_loss_only else None
        print(sentence_idxs, 'test')

        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(preds, label_ids, gumbel_outputs, sentence_idxs, sentence_indicators)
        else:
            metrics = {}

        if eval_loss is not None:
            metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()

        # Prefix all keys with metric_key_prefix + '_'
        for key in list(metrics.keys()):
            if not key.startswith(f"{metric_key_prefix}_"):
                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)

        return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)