def create_multigpu_supervised_trainer( net: torch.nn.Module, optimizer: Optimizer, loss_fn: Callable, devices: Optional[Sequence[torch.device]] = None, non_blocking: bool = False, prepare_batch: Callable = _prepare_batch, output_transform: Callable = _default_transform, distributed: bool = False, ): """ Derived from `create_supervised_trainer` in Ignite. Factory function for creating a trainer for supervised models. Args: net: the network to train. optimizer: the optimizer to use. loss_fn: the loss function to use. devices: device(s) type specification (default: None). Applies to both model and batches. None is all devices used, empty list is CPU only. non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously with respect to the host. For other cases, this argument has no effect. prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs tuple of tensors `(batch_x, batch_y)`. output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`. distributed: whether convert model to `DistributedDataParallel`, if `True`, `devices` must contain only 1 GPU or CPU for current distributed rank. Returns: Engine: a trainer engine with supervised update function. Note: `engine.state.output` for this engine is defined by `output_transform` parameter and is the loss of the processed batch by default. """ devices_ = get_devices_spec(devices) if distributed: if len(devices_) > 1: raise ValueError( f"for distributed training, `devices` must contain only 1 GPU or CPU, but got {devices_}." ) net = DistributedDataParallel(net, device_ids=devices_) elif len(devices_) > 1: net = DataParallel(net) return create_supervised_trainer(net, optimizer, loss_fn, devices_[0], non_blocking, prepare_batch, output_transform)
def create_multigpu_supervised_evaluator( net: torch.nn.Module, metrics: Optional[Dict[str, Metric]] = None, devices: Optional[Sequence[torch.device]] = None, non_blocking: bool = False, prepare_batch: Callable = _prepare_batch, output_transform: Callable = _default_eval_transform, distributed: bool = False, ): """ Derived from `create_supervised_evaluator` in Ignite. Factory function for creating an evaluator for supervised models. Args: net: the model to train. metrics: a map of metric names to Metrics. devices: device(s) type specification (default: None). Applies to both model and batches. None is all devices used, empty list is CPU only. non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously with respect to the host. For other cases, this argument has no effect. prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs tuple of tensors `(batch_x, batch_y)`. output_transform: function that receives 'x', 'y', 'y_pred' and returns value to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)` which fits output expected by metrics. If you change it you should use `output_transform` in metrics. distributed: whether convert model to `DistributedDataParallel`, if `True`, `devices` must contain only 1 GPU or CPU for current distributed rank. Note: `engine.state.output` for this engine is defined by `output_transform` parameter and is a tuple of `(batch_pred, batch_y)` by default. Returns: Engine: an evaluator engine with supervised inference function. """ devices_ = get_devices_spec(devices) if distributed: net = DistributedDataParallel(net, device_ids=devices_) if len(devices_) > 1: raise ValueError( f"for distributed evaluation, `devices` must contain only 1 GPU or CPU, but got {devices_}." ) elif len(devices_) > 1: net = DataParallel(net) return create_supervised_evaluator(net, metrics, devices_[0], non_blocking, prepare_batch, output_transform)
def create_multigpu_supervised_trainer( net: torch.nn.Module, optimizer, loss_fn, devices=None, non_blocking: bool = False, prepare_batch: Callable = _prepare_batch, output_transform: Callable = _default_transform, ): """ Derived from `create_supervised_trainer` in Ignite. Factory function for creating a trainer for supervised models. Args: net (`torch.nn.Module`): the network to train. optimizer (`torch.optim.Optimizer`): the optimizer to use. loss_fn (`torch.nn` loss function): the loss function to use. devices (list, optional): device(s) type specification (default: None). Applies to both model and batches. None is all devices used, empty list is CPU only. non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously with respect to the host. For other cases, this argument has no effect. prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs tuple of tensors `(batch_x, batch_y)`. output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`. Returns: Engine: a trainer engine with supervised update function. Note: `engine.state.output` for this engine is defined by `output_transform` parameter and is the loss of the processed batch by default. """ devices = get_devices_spec(devices) if len(devices) > 1: net = torch.nn.parallel.DataParallel(net) return create_supervised_trainer(net, optimizer, loss_fn, devices[0], non_blocking, prepare_batch, output_transform)
def create_multigpu_supervised_evaluator( net: torch.nn.Module, metrics=None, devices=None, non_blocking: bool = False, prepare_batch: Callable = _prepare_batch, output_transform: Callable = _default_eval_transform, ): """ Derived from `create_supervised_evaluator` in Ignite. Factory function for creating an evaluator for supervised models. Args: net (`torch.nn.Module`): the model to train. metrics (dict of str - :class:`~ignite.metrics.Metric`): a map of metric names to Metrics. devices (list, optional): device(s) type specification (default: None). Applies to both model and batches. None is all devices used, empty list is CPU only. non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously with respect to the host. For other cases, this argument has no effect. prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs tuple of tensors `(batch_x, batch_y)`. output_transform: function that receives 'x', 'y', 'y_pred' and returns value to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)` which fits output expected by metrics. If you change it you should use `output_transform` in metrics. Note: `engine.state.output` for this engine is defined by `output_transform` parameter and is a tuple of `(batch_pred, batch_y)` by default. Returns: Engine: an evaluator engine with supervised inference function. """ devices = get_devices_spec(devices) if len(devices) > 1: net = torch.nn.parallel.DataParallel(net) return create_supervised_evaluator(net, metrics, devices[0], non_blocking, prepare_batch, output_transform)