コード例 #1
0
    def evaluate_by_epochs(self, dataloader):
        """Evaluate dataset using the averaged models.

        In each epoch each process loads models and averages them. The averaged model is
        used to evaluate train / validation dataset.

        Args:
            dataloader (:obj:`torch.utils.data.DataLoader`): The dataset to be evaluated.

        Returns:
            list: list of stats of models in each epoch.
        """
        stats_list = []
        for epoch in range(self.epochs):
            # Same model for all workers.
            model = self._load_model(epoch)
            model.eval()

            stats = {"epoch": epoch, "count": 0, "total_loss": 0}
            for metric in self.metrics:
                stats['total_' + metric.name] = 0

            data_iter = iterate_dataloader(
                dataloader,
                self.dtype,
                self.max_batch_per_epoch,
                self.use_cuda)

            with torch.no_grad():
                for i, (data, target) in enumerate(data_iter):
                    output = model(data)

                    # Compute loss and metrics.
                    count = len(target)
                    stats["count"] += count
                    stats['total_loss'] += self.loss_function(
                        output, target) * count
                    for metric in self.metrics:
                        stats['total_' +
                              metric.name] += metric(output, target) * count

                    logger.info("E{:4}B{:4}: total loss={:10.3e}"
                                .format(epoch, i, stats['total_loss'] / stats['count']))

            # Keep globally averaged loss / metrics, etc.
            stats["loss"] = global_average(
                stats["total_loss"], stats["count"]).item()
            for metric in self.metrics:
                stats[metric.name] = global_average(
                    stats['total_' + metric.name], stats['count']).item()
                del stats['total_' + metric.name]
            del stats['count'], stats['total_loss']

            stats_list.append(stats)
        return stats_list
コード例 #2
0
ファイル: controlflow.py プロジェクト: ineiti/mlbench-core
def _validate(
    dataloader,
    model,
    loss_function,
    metrics,
    dtype,
    transform_target_type=None,
    use_cuda=False,
    max_batch_per_epoch=None,
):
    """Evaluate the model on the test dataset.

    Args:
        dataloader (:obj:`torch.utils.data.DataLoader`): The validation set
        model (`obj`:torch.nn.Module): The model to train
        loss_function (`obj`:torch.nn.Module): The loss function
        metrics (list): List of metrics to track
        dtype (str): The datatype to use, one of `fp32`or `fp64`
        transform_target_type (str): Datatype to convert data to, default: `None`
        use_cuda (bool): Whether to use GPU for training, default: `False`
        max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch,
                                   default: `None` (all batches)
        """
    # Initialize the accumulators for loss and metrics
    losses = AverageMeter()
    for metric in metrics:
        metric.reset()

    # Each worker computer their own losses and metrics
    with torch.no_grad():
        data_iter = iterate_dataloader(
            dataloader, dtype, max_batch_per_epoch, use_cuda, transform_target_type
        )

        for data, target in data_iter:
            # Inference
            output = model(data)

            # Compute loss
            loss = loss_function(output, target)

            # Update loss
            losses.update(loss.item(), data.size(0))

            # Update metrics
            for metric in metrics:
                metric_value = metric(loss, output, target)
                metric.update(metric_value, data.size(0))

    # Aggregate metrics and loss for all workers
    metrics_averages = {metric: metric.average().item() for metric in metrics}
    loss_average = global_average(losses.sum, losses.count).item()
    return metrics_averages, loss_average
コード例 #3
0
    def train_epoch(self, dataloader):
        """Train model for one epoch of data.

        Args:
            dataloader (:obj:`torch.utils.data.DataLoader`): The train set
        """
        self.tracker.epoch_stats = {
            k: AverageMeter()
            for k in ["loss"] + [m.name for m in self.metrics]
        }
        # switch to train mode
        self.model.train()
        data_iter = iterate_dataloader(dataloader, self.dtype,
                                       self.max_batch_per_epoch, self.use_cuda,
                                       self.transform_target_type)

        for batch_idx, (data, target) in enumerate(data_iter):
            self.tracker.batch_stats = [("start", time.time())]

            if self.schedule_per == 'batch':
                self.scheduler.step()

            # Clear gradients in the optimizer.
            self.optimizer.zero_grad()
            self.tracker.batch_stats.append(('init', time.time()))

            # Compute the output
            output = self.model(data)
            self.tracker.batch_stats.append(('fwd_pass', time.time()))

            # Compute the loss
            loss = self.loss_function(output, target)
            self.tracker.batch_stats.append(('comp_loss', time.time()))

            # Backprop
            loss.backward()
            self.tracker.batch_stats.append(('backprop', time.time()))

            # Aggregate gradients/parameters from all workers and apply updates to model
            self.optimizer.step()
            self.tracker.batch_stats.append(('opt_step', time.time()))

            self.record_train_batch_stats(batch_idx, loss.item(), output,
                                          target)
コード例 #4
0
    def validate(self, dataloader):
        r"""Validate the quality of the model in terms of loss and metrics.

        Args:
            dataloader (:obj:`torch.utils.data.DataLoader`): The validation set
        """
        # Turn on evaluation mode for the model
        self.model.eval()

        # Initialize the accumulators for loss and metrics
        losses = AverageMeter()
        for metric in self.metrics:
            metric.reset()

        # Each worker computer their own losses and metrics
        with torch.no_grad():
            data_iter = iterate_dataloader(dataloader, self.dtype,
                                           self.max_batch_per_epoch,
                                           self.use_cuda,
                                           self.transform_target_type)

            for data, target in data_iter:
                # Inference
                output = self.model(data)

                # Compute loss
                loss = self.loss_function(output, target)

                # Update loss
                losses.update(loss.item(), data.size(0))

                # Update metrics
                for metric in self.metrics:
                    metric_value = metric(output, target)
                    metric.update(metric_value, data.size(0))

        # Aggregate metrics and loss for all workers
        metrics_averages = {
            metric.name: metric.average().item()
            for metric in self.metrics
        }
        loss_average = global_average(losses.sum, losses.count).item()
        return metrics_averages, loss_average
コード例 #5
0
def test_iterate_dataloader(mocker):
    dataloader = [(torch.IntTensor([0]), torch.IntTensor([1])),
                  (torch.IntTensor([2]), torch.IntTensor([3]))]

    it = iterate_dataloader(dataloader,
                            'fp32',
                            max_batch_per_epoch=2,
                            transform_target_type=True)

    first = next(it)

    assert first[0].dtype == torch.float32
    assert first[1].dtype == torch.float32
    assert first[0].data.item() == 0.0
    assert first[1].item() == 1.0

    second = next(it)

    assert second[0].dtype == torch.float32
    assert second[1].dtype == torch.float32
    assert second[0].data.item() == 2.0
    assert second[1].item() == 3.0
コード例 #6
0
def train_round(
    dataloader,
    model,
    optimizer,
    loss_function,
    metrics,
    scheduler,
    dtype,
    schedule_per="epoch",
    transform_target_type=None,
    use_cuda=False,
    max_batch_per_epoch=None,
    tracker=None,
):
    """ Performs max_batch_per_epoch batches of training (or full trainset if
    not specified)

    Args:
        dataloader (:obj:`torch.utils.data.DataLoader`): The train set
        model (`obj`:torch.nn.Module): The model to train
        optimizer (`obj`:torch.optim): The optimizer
        loss_function (`obj`:torch.nn.Module): The loss function
        metrics (list): List of metrics to track
        scheduler (`obj`:torch.optim.lr_scheduler): Learning Rate scheduler
        dtype (str): The datatype to use, one of `fp32`or `fp64`
        scheduler_per (str): Learning Rate scheduler mode, one of `batch` or `epoch`
        transform_target_type (str): Datatype to convert data to, default: `None`
        use_cuda (bool): Whether to use GPU for training, default: `False`
        max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch,
                                   default: `None` (all batches)
        tracker (`obj`:mlbench_core.utils.Tracker): Tracker object to use.
    """
    model.train()

    if tracker:
        tracker.train()

    data_iter = iterate_dataloader(dataloader, dtype, max_batch_per_epoch,
                                   use_cuda, transform_target_type)

    num_batches_per_device_train = len(dataloader)

    for batch_idx, (data, target) in enumerate(data_iter):
        if tracker:
            tracker.batch_start()

        # Clear gradients in the optimizer.
        optimizer.zero_grad()
        if tracker:
            tracker.record_batch_step("init")

        # Compute the output
        output = model(data)
        if tracker:
            tracker.record_batch_step("fwd_pass")

        # Compute the loss
        loss = loss_function(output, target)
        if tracker:
            tracker.record_batch_step("comp_loss")

        # Backprop
        loss.backward()
        if tracker:
            tracker.record_batch_step("backprop")

        # Aggregate gradients/parameters from all workers and apply updates to model
        optimizer.step()
        if tracker:
            tracker.record_batch_step("opt_step")

        if schedule_per == "batch":
            scheduler.step()

        if tracker:
            tracker.batch_end()

        _record_train_batch_stats(
            batch_idx,
            loss.item(),
            output,
            target,
            metrics,
            tracker,
            num_batches_per_device_train,
        )
    if schedule_per == "epoch":
        scheduler.step()