def run(self, func: Callable, *args, **kwargs): """Execute ``func`` with provided arguments in distributed context. Example .. code-block:: python def training(local_rank, config, **kwargs): # ... print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend()) # ... Args: func (Callable): function to execute. First argument of the function should be `local_rank` - local process index. *args: positional arguments of ``func`` (without `local_rank`). **kwargs: keyword arguments of ``func``. """ if self._spawn_params is not None: self.logger.info("Spawn function '{}' in {} processes".format( func, self._spawn_params["nproc_per_node"])) idist.spawn(self.backend, func, args=args, kwargs_dict=kwargs, **self._spawn_params) else: self.logger.info("- Run '{}' in {} processes".format( func, idist.get_world_size())) local_rank = idist.get_local_rank() func(local_rank, *args, **kwargs) self.logger.info("End of run")
def auto_model(model: nn.Module) -> nn.Module: """Helper method to adapt provided model for non-distributed and distributed configurations (supporting all available backends from :meth:`~ignite.distributed.utils.available_backends()`). Internally, we perform to following: - send model to current :meth:`~ignite.distributed.utils.device()`. - wrap the model to `torch DistributedDataParallel`_ for native torch distributed if world size is larger than 1 - wrap the model to `torch DataParallel`_ if no distributed context found and more than one CUDA devices available. Examples: .. code-block:: python import ignite.distribted as idist model = idist.auto_model(model) Args: model (torch.nn.Module): model to adapt. Returns: torch.nn.Module .. _torch DistributedDataParallel: https://pytorch.org/docs/stable/nn.html#torch.nn.parallel.DistributedDataParallel .. _torch DataParallel: https://pytorch.org/docs/stable/nn.html#torch.nn.DataParallel """ logger = setup_logger(__name__ + ".auto_model") model.to(idist.device()) # distributed data parallel model if idist.get_world_size() > 1: if idist.backend() == idist_native.NCCL: lrank = idist.get_local_rank() logger.info( "Apply torch DistributedDataParallel on model, device id: {}". format(lrank)) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[ lrank, ]) elif idist.backend() == idist_native.GLOO: logger.info("Apply torch DistributedDataParallel on model") model = torch.nn.parallel.DistributedDataParallel(model) # not distributed but multiple GPUs reachable so data parallel model elif torch.cuda.device_count() > 1 and "cuda" in idist.device().type: logger.info("Apply torch DataParallel on model") model = torch.nn.parallel.DataParallel(model) return model
def run(self, func: Callable, *args: Any, **kwargs: Any) -> None: """Execute ``func`` with provided arguments in distributed context. Example .. code-block:: python def training(local_rank, config, **kwargs): # ... print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend()) # ... with idist.Parallel(backend=backend) as parallel: parallel.run(training, config, a=1, b=2) Args: func: function to execute. First argument of the function should be `local_rank` - local process index. args: positional arguments of ``func`` (without `local_rank`). kwargs: keyword arguments of ``func``. """ if self._spawn_params is not None and self.backend is not None: self._logger.info( # type: ignore[attr-defined] f"Spawn function '{func}' in {self._spawn_params['nproc_per_node']} processes" ) idist.spawn(self.backend, func, args=args, kwargs_dict=kwargs, **self._spawn_params) else: self._logger.info( f"- Run '{func}' in {idist.get_world_size()} processes" ) # type: ignore[attr-defined] local_rank = idist.get_local_rank() func(local_rank, *args, **kwargs) self._logger.info("End of run") # type: ignore[attr-defined]
def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Module: """Helper method to adapt provided model for non-distributed and distributed configurations (supporting all available backends from :meth:`~ignite.distributed.utils.available_backends()`). Internally, we perform to following: - send model to current :meth:`~ignite.distributed.utils.device()` if model's parameters are not on the device. - wrap the model to `torch DistributedDataParallel`_ for native torch distributed if world size is larger than 1. - wrap the model to `torch DataParallel`_ if no distributed context found and more than one CUDA devices available. - broadcast the initial variable states from rank 0 to all other processes if Horovod distributed framework is used. Examples: .. code-block:: python import ignite.distribted as idist model = idist.auto_model(model) In addition with NVidia/Apex, it can be used in the following way: .. code-block:: python import ignite.distribted as idist model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) model = idist.auto_model(model) Args: model: model to adapt. sync_bn: if True, applies `torch convert_sync_batchnorm`_ to the model for native torch distributed only. Default, False. Note, if using Nvidia/Apex, batchnorm conversion should be applied before calling ``amp.initialize``. kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_ if applicable. Please, make sure to use acceptable kwargs for given backend. Returns: torch.nn.Module .. _torch DistributedDataParallel: https://pytorch.org/docs/stable/generated/torch.nn.parallel. DistributedDataParallel.html .. _torch DataParallel: https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html .. _torch convert_sync_batchnorm: https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html# torch.nn.SyncBatchNorm.convert_sync_batchnorm .. versionchanged:: 0.4.2 - Added Horovod distributed framework. - Added ``sync_bn`` argument. .. versionchanged:: 0.4.3 Added kwargs to ``idist.auto_model``. """ logger = setup_logger(__name__ + ".auto_model") # Put model's parameters to device if its parameters are not on the device device = idist.device() if not all([p.device == device for p in model.parameters()]): model.to(device) # distributed data parallel model if idist.get_world_size() > 1: bnd = idist.backend() if idist.has_native_dist_support and bnd in (idist_native.NCCL, idist_native.GLOO, idist_native.MPI): if sync_bn: logger.info("Convert batch norm to sync batch norm") model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if torch.cuda.is_available(): if "device_ids" in kwargs: raise ValueError( f"Argument kwargs should not contain 'device_ids', but got {kwargs}" ) lrank = idist.get_local_rank() logger.info( f"Apply torch DistributedDataParallel on model, device id: {lrank}" ) kwargs["device_ids"] = [ lrank, ] else: logger.info("Apply torch DistributedDataParallel on model") model = torch.nn.parallel.DistributedDataParallel(model, **kwargs) elif idist.has_hvd_support and bnd == idist_hvd.HOROVOD: import horovod.torch as hvd logger.info( "Broadcast the initial variable states from rank 0 to all other processes" ) hvd.broadcast_parameters(model.state_dict(), root_rank=0) # not distributed but multiple GPUs reachable so data parallel model elif torch.cuda.device_count() > 1 and "cuda" in idist.device().type: logger.info("Apply torch DataParallel on model") model = torch.nn.parallel.DataParallel(model, **kwargs) return model
def auto_model(model: nn.Module, sync_bn: bool = False) -> nn.Module: """Helper method to adapt provided model for non-distributed and distributed configurations (supporting all available backends from :meth:`~ignite.distributed.utils.available_backends()`). Internally, we perform to following: - send model to current :meth:`~ignite.distributed.utils.device()` if model's parameters are not on the device. - wrap the model to `torch DistributedDataParallel`_ for native torch distributed if world size is larger than 1. - wrap the model to `torch DataParallel`_ if no distributed context found and more than one CUDA devices available. - broadcast the initial variable states from rank 0 to all other processes if Horovod distributed framework is used. Examples: .. code-block:: python import ignite.distribted as idist model = idist.auto_model(model) In addition with NVidia/Apex, it can be used in the following way: .. code-block:: python import ignite.distribted as idist model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) model = idist.auto_model(model) Args: model (torch.nn.Module): model to adapt. sync_bn (bool): if True, applies `torch convert_sync_batchnorm`_ to the model for native torch distributed only. Default, False. Note, if using Nvidia/Apex, batchnorm conversion should be applied before calling ``amp.initialize``. Returns: torch.nn.Module .. _torch DistributedDataParallel: https://pytorch.org/docs/stable/generated/torch.nn.parallel. DistributedDataParallel.html .. _torch DataParallel: https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html .. _torch convert_sync_batchnorm: https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html# torch.nn.SyncBatchNorm.convert_sync_batchnorm """ logger = setup_logger(__name__ + ".auto_model") # Put model's parameters to device if its parameters are not on the device device = idist.device() if not all([p.device == device for p in model.parameters()]): model.to(device) # distributed data parallel model if idist.get_world_size() > 1: bnd = idist.backend() if idist.has_native_dist_support and bnd == idist_native.NCCL: if sync_bn: logger.info("Convert batch norm to sync batch norm") model = nn.SyncBatchNorm.convert_sync_batchnorm(model) lrank = idist.get_local_rank() logger.info( "Apply torch DistributedDataParallel on model, device id: {}". format(lrank)) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[ lrank, ]) elif idist.has_native_dist_support and bnd == idist_native.GLOO: if sync_bn: logger.info("Convert batch norm to sync batch norm") model = nn.SyncBatchNorm.convert_sync_batchnorm(model) logger.info("Apply torch DistributedDataParallel on model") model = torch.nn.parallel.DistributedDataParallel(model) elif idist.has_hvd_support and bnd == idist_hvd.HOROVOD: import horovod.torch as hvd logger.info( "Broadcast the initial variable states from rank 0 to all other processes" ) hvd.broadcast_parameters(model.state_dict(), root_rank=0) # not distributed but multiple GPUs reachable so data parallel model elif torch.cuda.device_count() > 1 and "cuda" in idist.device().type: logger.info("Apply torch DataParallel on model") model = torch.nn.parallel.DataParallel(model) return model