Exemple #1
0
def create_multigpu_supervised_trainer(
    net: torch.nn.Module,
    optimizer: Optimizer,
    loss_fn: Callable,
    devices: Optional[Sequence[torch.device]] = None,
    non_blocking: bool = False,
    prepare_batch: Callable = _prepare_batch,
    output_transform: Callable = _default_transform,
    distributed: bool = False,
):
    """
    Derived from `create_supervised_trainer` in Ignite.

    Factory function for creating a trainer for supervised models.

    Args:
        net: the network to train.
        optimizer: the optimizer to use.
        loss_fn: the loss function to use.
        devices: device(s) type specification (default: None).
            Applies to both model and batches. None is all devices used, empty list is CPU only.
        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
            with respect to the host. For other cases, this argument has no effect.
        prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
            tuple of tensors `(batch_x, batch_y)`.
        output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
            to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
        distributed: whether convert model to `DistributedDataParallel`, if `True`, `devices` must contain
            only 1 GPU or CPU for current distributed rank.

    Returns:
        Engine: a trainer engine with supervised update function.

    Note:
        `engine.state.output` for this engine is defined by `output_transform` parameter and is the loss
        of the processed batch by default.
    """

    devices_ = get_devices_spec(devices)
    if distributed:
        if len(devices_) > 1:
            raise ValueError(
                f"for distributed training, `devices` must contain only 1 GPU or CPU, but got {devices_}."
            )
        net = DistributedDataParallel(net, device_ids=devices_)
    elif len(devices_) > 1:
        net = DataParallel(net)

    return create_supervised_trainer(net, optimizer, loss_fn, devices_[0],
                                     non_blocking, prepare_batch,
                                     output_transform)
Exemple #2
0
def create_multigpu_supervised_evaluator(
    net: torch.nn.Module,
    metrics: Optional[Dict[str, Metric]] = None,
    devices: Optional[Sequence[torch.device]] = None,
    non_blocking: bool = False,
    prepare_batch: Callable = _prepare_batch,
    output_transform: Callable = _default_eval_transform,
    distributed: bool = False,
):
    """
    Derived from `create_supervised_evaluator` in Ignite.

    Factory function for creating an evaluator for supervised models.

    Args:
        net: the model to train.
        metrics: a map of metric names to Metrics.
        devices: device(s) type specification (default: None).
            Applies to both model and batches. None is all devices used, empty list is CPU only.
        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
            with respect to the host. For other cases, this argument has no effect.
        prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
            tuple of tensors `(batch_x, batch_y)`.
        output_transform: function that receives 'x', 'y', 'y_pred' and returns value
            to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)`
            which fits output expected by metrics. If you change it you should use `output_transform` in metrics.
        distributed: whether convert model to `DistributedDataParallel`, if `True`, `devices` must contain
            only 1 GPU or CPU for current distributed rank.

    Note:
        `engine.state.output` for this engine is defined by `output_transform` parameter and is
        a tuple of `(batch_pred, batch_y)` by default.

    Returns:
        Engine: an evaluator engine with supervised inference function.
    """

    devices_ = get_devices_spec(devices)

    if distributed:
        net = DistributedDataParallel(net, device_ids=devices_)
        if len(devices_) > 1:
            raise ValueError(
                f"for distributed evaluation, `devices` must contain only 1 GPU or CPU, but got {devices_}."
            )
    elif len(devices_) > 1:
        net = DataParallel(net)

    return create_supervised_evaluator(net, metrics, devices_[0], non_blocking,
                                       prepare_batch, output_transform)
def create_multigpu_supervised_trainer(
    net: torch.nn.Module,
    optimizer,
    loss_fn,
    devices=None,
    non_blocking: bool = False,
    prepare_batch: Callable = _prepare_batch,
    output_transform: Callable = _default_transform,
):
    """
    Derived from `create_supervised_trainer` in Ignite.

    Factory function for creating a trainer for supervised models.

    Args:
        net (`torch.nn.Module`): the network to train.
        optimizer (`torch.optim.Optimizer`): the optimizer to use.
        loss_fn (`torch.nn` loss function): the loss function to use.
        devices (list, optional): device(s) type specification (default: None).
            Applies to both model and batches. None is all devices used, empty list is CPU only.
        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
            with respect to the host. For other cases, this argument has no effect.
        prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
            tuple of tensors `(batch_x, batch_y)`.
        output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
            to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.

    Returns:
        Engine: a trainer engine with supervised update function.

    Note:
        `engine.state.output` for this engine is defined by `output_transform` parameter and is the loss
        of the processed batch by default.
    """

    devices = get_devices_spec(devices)

    if len(devices) > 1:
        net = torch.nn.parallel.DataParallel(net)

    return create_supervised_trainer(net, optimizer, loss_fn, devices[0],
                                     non_blocking, prepare_batch,
                                     output_transform)
def create_multigpu_supervised_evaluator(
    net: torch.nn.Module,
    metrics=None,
    devices=None,
    non_blocking: bool = False,
    prepare_batch: Callable = _prepare_batch,
    output_transform: Callable = _default_eval_transform,
):
    """
    Derived from `create_supervised_evaluator` in Ignite.

    Factory function for creating an evaluator for supervised models.

    Args:
        net (`torch.nn.Module`): the model to train.
        metrics (dict of str - :class:`~ignite.metrics.Metric`): a map of metric names to Metrics.
        devices (list, optional): device(s) type specification (default: None).
            Applies to both model and batches. None is all devices used, empty list is CPU only.
        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
            with respect to the host. For other cases, this argument has no effect.
        prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
            tuple of tensors `(batch_x, batch_y)`.
        output_transform: function that receives 'x', 'y', 'y_pred' and returns value
            to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)` which fits
            output expected by metrics. If you change it you should use `output_transform` in metrics.

    Note:
        `engine.state.output` for this engine is defined by `output_transform` parameter and is
        a tuple of `(batch_pred, batch_y)` by default.

    Returns:
        Engine: an evaluator engine with supervised inference function.
    """

    devices = get_devices_spec(devices)

    if len(devices) > 1:
        net = torch.nn.parallel.DataParallel(net)

    return create_supervised_evaluator(net, metrics, devices[0], non_blocking,
                                       prepare_batch, output_transform)