def __init__( self, backend: str = None, nproc_per_node: Optional[int] = None, nnodes: Optional[int] = None, node_rank: Optional[int] = None, master_addr: Optional[str] = None, master_port: Optional[str] = None, ): if backend is not None: if backend not in idist.available_backends(): raise ValueError( "Unknown backend '{}'. Available backends: {}".format( backend, idist.available_backends())) else: arg_names = [ "nproc_per_node", "nnodes", "node_rank", "master_addr", "master_port" ] arg_values = [ nproc_per_node, nnodes, node_rank, master_addr, master_port ] for name, value in zip(arg_names, arg_values): if value is not None: raise ValueError( "If backend is None, argument '{}' should be also None, but given {}" .format(name, value)) self.backend = backend self._spawn_params = None self.logger = setup_logger(__name__ + "." + self.__class__.__name__, distributed_rank=0) # distributed_rank=0 <=> explicit rank 0, avoid call idist. Critical for TPU on Colab, avoid context is setup if self.backend is not None: if nproc_per_node is not None: self._spawn_params = self._setup_spawn_params( nproc_per_node, nnodes, node_rank, master_addr, master_port) if self._spawn_params is not None: self.logger.info( "Initialized distributed launcher with backend: '{}'".format( self.backend)) msg = "\n\t".join([ "{}: {}".format(k, v) for k, v in self._spawn_params.items() if v is not None ]) self.logger.info( "- Parameters to spawn processes: \n\t{}".format(msg))
def __init__( self, backend: Optional[str] = None, nproc_per_node: Optional[int] = None, nnodes: Optional[int] = None, node_rank: Optional[int] = None, master_addr: Optional[str] = None, master_port: Optional[int] = None, init_method: Optional[str] = None, **spawn_kwargs: Any, ) -> None: if backend is not None: if backend not in idist.available_backends(): raise ValueError( f"Unknown backend '{backend}'. Available backends: {idist.available_backends()}" ) else: arg_names = [ "nproc_per_node", "nnodes", "node_rank", "master_addr", "master_port" ] arg_values = [ nproc_per_node, nnodes, node_rank, master_addr, master_port ] for name, value in zip(arg_names, arg_values): if value is not None: raise ValueError( f"If backend is None, argument '{name}' should be also None, but given {value}" ) self.backend = backend self._spawn_params = None self.init_method = init_method self.logger = setup_logger(__name__ + "." + self.__class__.__name__, distributed_rank=0) # distributed_rank=0 <=> explicit rank 0, avoid call idist. Critical for TPU on Colab, avoid context setup if self.backend is not None: if nproc_per_node is not None: self._spawn_params = self._setup_spawn_params( nproc_per_node, nnodes, node_rank, master_addr, master_port, init_method, **spawn_kwargs) if self._spawn_params is not None: self.logger.info( f"Initialized distributed launcher with backend: '{self.backend}'" ) msg = "\n\t".join([ f"{k}: {v}" for k, v in self._spawn_params.items() if v is not None ]) self.logger.info(f"- Parameters to spawn processes: \n\t{msg}")
def __init__( self, backend: Optional[str] = None, nproc_per_node: Optional[int] = None, nnodes: Optional[int] = None, node_rank: Optional[int] = None, master_addr: Optional[str] = None, master_port: Optional[int] = None, init_method: Optional[str] = None, **spawn_kwargs: Any, ) -> None: if backend is not None: if backend not in idist.available_backends(): raise ValueError( f"Unknown backend '{backend}'. Available backends: {idist.available_backends()}" ) else: arg_names = [ "nproc_per_node", "nnodes", "node_rank", "master_addr", "master_port" ] arg_values = [ nproc_per_node, nnodes, node_rank, master_addr, master_port ] for name, value in zip(arg_names, arg_values): if value is not None: raise ValueError( f"If backend is None, argument '{name}' should be also None, but given {value}" ) self.backend = backend self._spawn_params = None self.init_method = init_method if self.backend is not None: if nproc_per_node is not None: self._spawn_params = self._setup_spawn_params( nproc_per_node, nnodes, node_rank, master_addr, master_port, init_method, **spawn_kwargs) # The logger will be setup after the idist.initialize() call self._logger = None