Esempio n. 1
0
    def evaluate_by_epochs(self, dataloader):
        """Evaluate dataset using the averaged models.

        In each epoch each process loads models and averages them. The averaged model is
        used to evaluate train / validation dataset.

        Args:
            dataloader (:obj:`torch.utils.data.DataLoader`): The dataset to be evaluated.

        Returns:
            list: list of stats of models in each epoch.
        """
        stats_list = []
        for epoch in range(self.epochs):
            # Same model for all workers.
            model = self._load_model(epoch)
            model.eval()

            stats = {"epoch": epoch, "count": 0, "total_loss": 0}
            for metric in self.metrics:
                stats['total_' + metric.name] = 0

            data_iter = iterate_dataloader(
                dataloader,
                self.dtype,
                self.max_batch_per_epoch,
                self.use_cuda)

            with torch.no_grad():
                for i, (data, target) in enumerate(data_iter):
                    output = model(data)

                    # Compute loss and metrics.
                    count = len(target)
                    stats["count"] += count
                    stats['total_loss'] += self.loss_function(
                        output, target) * count
                    for metric in self.metrics:
                        stats['total_' +
                              metric.name] += metric(output, target) * count

                    logger.info("E{:4}B{:4}: total loss={:10.3e}"
                                .format(epoch, i, stats['total_loss'] / stats['count']))

            # Keep globally averaged loss / metrics, etc.
            stats["loss"] = global_average(
                stats["total_loss"], stats["count"]).item()
            for metric in self.metrics:
                stats[metric.name] = global_average(
                    stats['total_' + metric.name], stats['count']).item()
                del stats['total_' + metric.name]
            del stats['count'], stats['total_loss']

            stats_list.append(stats)
        return stats_list
Esempio n. 2
0
def _validate(
    dataloader,
    model,
    loss_function,
    metrics,
    dtype,
    transform_target_type=None,
    use_cuda=False,
    max_batch_per_epoch=None,
):
    """Evaluate the model on the test dataset.

    Args:
        dataloader (:obj:`torch.utils.data.DataLoader`): The validation set
        model (`obj`:torch.nn.Module): The model to train
        loss_function (`obj`:torch.nn.Module): The loss function
        metrics (list): List of metrics to track
        dtype (str): The datatype to use, one of `fp32`or `fp64`
        transform_target_type (str): Datatype to convert data to, default: `None`
        use_cuda (bool): Whether to use GPU for training, default: `False`
        max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch,
                                   default: `None` (all batches)
        """
    # Initialize the accumulators for loss and metrics
    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    # Each worker computer their own losses and metrics
    with torch.no_grad():
        data_iter = iterate_dataloader(
            dataloader, dtype, max_batch_per_epoch, use_cuda, transform_target_type
        )

        for data, target in data_iter:
            # Inference
            output = model(data)

            # Compute loss
            loss = loss_function(output, target)

            # Update loss
            losses.update(loss.item(), data.size(0))

            # Update metrics
            for metric in metrics:
                metric_value = metric(loss, output, target)
                metric.update(metric_value, data.size(0))

    # Aggregate metrics and loss for all workers
    metrics_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count).item()
    return metrics_averages, loss_average
Esempio n. 3
0
def validation_round(
    val_loader,
    metrics,
    model,
    loss_func,
    iter_size,
    translator,
    tracker=None,
    use_cuda=False,
):
    # Set tracker and model in eval mode
    model.eval()
    if tracker:
        tracker.validation()
        tracker.validation_start()

    losses = AverageMeter()

    # Reset metrics
    for metric in metrics:
        metric.reset()

    with torch.no_grad():
        for (data, target) in val_loader:
            data, target = prepare_batch(data, target, use_cuda=use_cuda)
            output = compute_model_output(model, data, target)

            # Compute loss
            loss, loss_per_token = compute_loss(data, target, output,
                                                loss_func, iter_size)

            # Update loss
            losses.update(loss_per_token, 1)

            # Update metrics
            translated, targets = translator.translate(data, target)
            for metric in metrics:
                metric_value = metric(translated, targets)
                size = data[0].shape[1]

                metric.update(metric_value, size)

    metrics_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count).item()

    if tracker:
        tracker.validation_end()
    return metrics_averages, loss_average
Esempio n. 4
0
def record_validation_stats(metrics_values, loss, tracker=None, rank=0):
    """Records the stats of a previously run validation

    Args:
        metrics_values (dict): Dictionary of each metric's average.
        loss (float): Validation loss
        tracker (`obj`:mlbench_core.utils.Tracker, optional): Tracker object to use.
        rank (int): Current distributed rank

    Returns:
        (bool): Whether this validation round is the best
    """
    if len(metrics_values) > 0:
        # Save
        if tracker:
            for metric, value in metrics_values.items():
                tracker.record_metric(metric, value, log_to_api=rank == 0)

                tracker.record_stat(
                    "global_{}".format(metric.name),
                    value,
                    log_to_api=rank == 0,
                )

        if rank == 0 and tracker:
            logger.info(
                "{} for rank {}:(best epoch {}, current epoch {}): {:.3f}".
                format(
                    tracker.primary_metric.name,
                    tracker.rank,
                    tracker.best_epoch,
                    tracker.current_epoch,
                    tracker.best_metric_value,
                ))
    else:
        if rank == 0:
            logger.info("Validation loss={:.3f}".format(loss))

    if tracker:
        tracker.record_loss(loss, log_to_api=True)

        global_loss = global_average(loss, 1).item()

        if rank == 0:
            tracker.record_stat("global_loss", global_loss, log_to_api=True)

    return tracker.is_best() if tracker else False
Esempio n. 5
0
def validation_round(loader, metrics, criterion, translator, tracker, use_cuda=False):
    """Performs one round of validation for the Transformer model

    Args:
        loader (:obj:`torch.utils.data.DataLoader`): Data loader
        metrics (list): List of metrics for evaluation
        criterion (:obj:`torch.nn.Module): Loss function
        translator (:obj:`mlbench_core.models.pytorch.transformer.SequenceGenerator`): Translator module
        tracker (:obj:`mlbench_core.utils.Tracker`): Current Tracker
        use_cuda (bool): Use GPU acceleration. Default: `False`.

    Returns:
        (dict of :obj:`mlbench_core.evaluation.pytorch.MLBenchMetric`: float, float):
            The metrics averages over all workers, and the loss average.
    """
    model = translator.model
    model.eval()
    tracker.validation()
    tracker.validation_start()

    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    with torch.no_grad():
        for batch in loader:
            batch = prepare_batch(batch, use_cuda=use_cuda)
            output = model(**batch["net_input"])

            loss, sample_size = compute_loss(batch, output, criterion)

            losses.update(loss.item() / sample_size, 1)

            translated, targets = translator.translate_batch(batch)
            for metric in metrics:
                metric_value = metric(translated, targets)
                size = batch["target"].size(0)  # Number of translated sentences
                metric.update(metric_value, size)

    metric_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count)

    tracker.validation_end()

    return metric_averages, loss_average
Esempio n. 6
0
    def validate(self, dataloader):
        r"""Validate the quality of the model in terms of loss and metrics.

        Args:
            dataloader (:obj:`torch.utils.data.DataLoader`): The validation set
        """
        # Turn on evaluation mode for the model
        self.model.eval()

        # Initialize the accumulators for loss and metrics
        losses = AverageMeter()
        for metric in self.metrics:
            metric.reset()

        # Each worker computer their own losses and metrics
        with torch.no_grad():
            data_iter = iterate_dataloader(dataloader, self.dtype,
                                           self.max_batch_per_epoch,
                                           self.use_cuda,
                                           self.transform_target_type)

            for data, target in data_iter:
                # Inference
                output = self.model(data)

                # Compute loss
                loss = self.loss_function(output, target)

                # Update loss
                losses.update(loss.item(), data.size(0))

                # Update metrics
                for metric in self.metrics:
                    metric_value = metric(output, target)
                    metric.update(metric_value, data.size(0))

        # Aggregate metrics and loss for all workers
        metrics_averages = {
            metric.name: metric.average().item()
            for metric in self.metrics
        }
        loss_average = global_average(losses.sum, losses.count).item()
        return metrics_averages, loss_average
def validation_round(loader,
                     metrics,
                     criterion,
                     translator,
                     tracker=None,
                     use_cuda=False):
    model = translator.model
    model.eval()
    if tracker:
        tracker.validation()
        tracker.validation_start()

    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    with torch.no_grad():
        for batch in loader:
            batch = prepare_batch(batch, use_cuda=use_cuda)
            output = model(**batch["net_input"])

            loss, sample_size = compute_loss(batch, output, criterion)

            losses.update(loss.item() / sample_size, 1)

            translated, targets = translator.translate_batch(batch)
            for metric in metrics:
                metric_value = metric(loss.item(), translated, targets)
                size = batch["target"].size(
                    0)  # Number of translated sentences
                metric.update(metric_value, size)

    metric_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count)

    if tracker:
        tracker.validation_end()

    return metric_averages, loss_average
Esempio n. 8
0
 def average(self):
     """Average stats."""
     return global_average(self.top.sum, self.top.count)
Esempio n. 9
0
 def average(self):
     return global_average(self.average_meter.sum, self.average_meter.count)
Esempio n. 10
0
def validation_round(
    dataloader,
    model,
    loss_function,
    metrics,
    run_id,
    rank,
    dtype,
    transform_target_type=None,
    use_cuda=False,
    max_batch_per_epoch=None,
    tracker=None,
):
    """ Handles one full iteration of validation on the whole validation set.

    Args:
        dataloader (:obj:`torch.utils.data.DataLoader`): The validation set
        model (`obj`:torch.nn.Module): The model to train
        loss_function (`obj`:torch.nn.Module): The loss function
        metrics (list): List of metrics to track
        run_id (int): The id of the current run
        rank (int): The rank of the current worker node
        dtype (str): The datatype to use, one of `fp32`or `fp64`
        transform_target_type (str): Datatype to convert data to, default: `None`
        use_cuda (bool): Whether to use GPU for training, default: `False`
        max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch,
                                   default: `None` (all batches)
        tracker (`obj`:mlbench_core.utils.Tracker): Tracker object to use. Will be
                                                    created if not supplied
    """
    model.eval()

    if tracker:
        tracker.validation()

        tracker.validation_start()

    metrics_values, loss = _validate(
        dataloader,
        model,
        loss_function,
        metrics,
        dtype,
        transform_target_type,
        use_cuda,
        max_batch_per_epoch,
    )
    if tracker:
        tracker.validation_end()

    if len(metrics_values) > 0:
        # Save
        if tracker:
            for metric, value in metrics_values.items():
                tracker.record_metric(metric, value, log_to_api=True)

                global_metric_value = global_average(value, 1).item()

                if rank == 0:
                    tracker.record_stat(
                        "global_{}".format(metric.name),
                        global_metric_value,
                        log_to_api=True,
                    )

        if rank == 0 and tracker:
            logger.info(
                "{} for rank {}:(best epoch {}, current epoch {}): {:.3f}".
                format(
                    tracker.primary_metric.name,
                    tracker.rank,
                    tracker.best_epoch,
                    tracker.current_epoch,
                    tracker.best_metric_value,
                ))
    else:
        if rank == 0:
            logger.info("Validation loss={:.3f}".format(loss))

    if tracker:
        tracker.record_loss(loss, log_to_api=True)

        global_loss = global_average(loss, 1).item()

        if rank == 0:
            tracker.record_stat("global_loss", global_loss, log_to_api=True)

    return tracker.is_best() if tracker else False
Esempio n. 11
0
def validation_round(
    dataloader,
    model,
    loss_function,
    metrics,
    dtype,
    tracker=None,
    transform_target_type=False,
    use_cuda=False,
    max_batches=None,
):
    """Evaluate the model on the test dataset.

    Args:
        dataloader (`obj`:torch.utils.data.DataLoader): The validation set
        model (`obj`:torch.nn.Module): The model to train
        loss_function (`obj`:torch.nn.Module): The loss function
        metrics (list): List of metrics to track
        dtype (str): The datatype to use, one of `fp32`or `fp64`
        tracker (`obj`:mlbench_core.utils.Tracker | None): Tracker object to use.
        transform_target_type (bool): Convert target to `dtype`. Default `False`
        use_cuda (bool): Whether to use GPU for training, default: `False`
        max_batches (int | None): Maximum number of batches to validate on

    Returns:
          (dict, float): Dictionary of average of each metric, and average validation loss
    """

    model.eval()
    if tracker:
        tracker.validation()
        tracker.validation_start()

    # Initialize the accumulators for loss and metrics
    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    # Each worker computer their own losses and metrics
    with torch.no_grad():

        data_iter = iterate_dataloader(dataloader, dtype, max_batches,
                                       use_cuda, transform_target_type)

        for data, target in data_iter:
            output = model(data)

            # Compute loss
            loss = loss_function(output, target)

            # Update loss
            losses.update(loss.item(), data.size(0))

            # Update metrics
            for metric in metrics:
                metric_value = metric(output, target)
                metric.update(metric_value, data.size(0))

    # Aggregate metrics and loss for all workers
    metrics_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count).item()

    if tracker:
        tracker.validation_end()
    return metrics_averages, loss_average
Esempio n. 12
0
def validation_round(
    val_set, model, batch_size, metrics, loss_function, tracker, use_cuda=False
):
    """Performs a validation round

    Args:
        val_set (:obj:): Validation set
        model (:obj:`torch.nn.Module`): Model to evaluate
        batch_size (int): Validation batch size
        metrics (list): List of metrics to compute
        loss_function (:obj:`torch.nn.Module`): Loss function
        tracker (:obj:`mlbench_core.utils.Tracker`): Tracker object
        use_cuda (bool): Use GPU acceleration

    Returns:
        (dict, float): Metric averages and total loss average
    """
    # finish one epoch training and to decide if we want to val our model.
    tracker.validation()
    tracker.validation_start()

    # each worker finish one epoch training.
    model.eval()

    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    # Each worker computer their own losses and metrics
    with torch.no_grad():
        hidden = model.init_hidden(batch_size)

        num_batches = val_set.num_batches()
        for batch_idx in range(num_batches):
            data, target = val_set.get_batch(batch_idx, cuda=use_cuda)
            batch_seq_len = data.size(0)
            # Inference
            output, hidden = model(data, hidden)

            # Compute loss
            loss = loss_function(output, target)

            # Update loss
            losses.update(loss.item(), batch_seq_len)

            hidden = repackage_hidden(hidden)

            # Update metrics
            for metric in metrics:
                metric_value = metric(output, target)
                metric.update(metric_value, 1)

    # Aggregate metrics and loss for all workers
    loss_average = global_average(losses.sum, losses.count)
    metrics_averages = {
        metric: torch.exp(loss_average).item()
        if metric.name == "Perplexity"
        else metric.average().item()
        for metric in metrics
    }

    logger.info(
        "Got loss {}, avg metric={}".format(
            loss_average,
            [m.average().item() for m in metrics if m.name == "Perplexity"][0],
        )
    )
    tracker.validation_end()

    return metrics_averages, loss_average.item()