def test_pick_multiple_gpus(nb, expected_gpu_idxs, expected_error): if expected_error: with pytest.raises( expected_error, match=re.escape( "auto_select_gpus=True, gpus=0 is not a valid configuration." " Please select a valid number of GPU resources when using auto_select_gpus." ), ): pick_multiple_gpus(nb) else: assert expected_gpu_idxs == pick_multiple_gpus(nb)
def _parse_devices( gpus: Optional[Union[List[int], str, int]], auto_select_gpus: bool, tpu_cores: Optional[Union[List[int], str, int]], ) -> Tuple[Optional[List[int]], Optional[Union[List[int], int]]]: if auto_select_gpus and isinstance(gpus, int): gpus = pick_multiple_gpus(gpus) # TODO (@seannaren, @kaushikb11): Include IPU parsing logic here gpu_ids = parse_gpu_ids(gpus) tpu_cores = parse_tpu_cores(tpu_cores) return gpu_ids, tpu_cores
def __init__( self, num_processes, tpu_cores, ipus, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm, benchmark, replace_sampler_ddp, deterministic, precision, amp_type, amp_level, plugins, ): # initialization self._device_type = DeviceType.CPU self._distrib_type = None self.num_processes = num_processes self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores) self.ipus = ipus self.distributed_backend = distributed_backend self.auto_select_gpus = auto_select_gpus self.gpus = gpus self.num_nodes = num_nodes self.sync_batchnorm = sync_batchnorm self.benchmark = benchmark self.replace_sampler_ddp = replace_sampler_ddp self.deterministic = deterministic self.precision = precision self.amp_type = amp_type.lower() if isinstance(amp_type, str) else None self.amp_level = amp_level self.is_slurm_managing_tasks = False self._precision_plugin: Optional[PrecisionPlugin] = None self._training_type_plugin: Optional[TrainingTypePlugin] = None self._cluster_environment: Optional[ClusterEnvironment] = None plugins = plugins if plugins is not None else [] if isinstance(plugins, str): plugins = [plugins] if not isinstance(plugins, Sequence): plugins = [plugins] self.plugins = plugins # for gpus allow int, string and gpu list if auto_select_gpus and isinstance(gpus, int): self.gpus = pick_multiple_gpus(gpus) self.parallel_device_ids = device_parser.parse_gpu_ids(self.gpus) self.set_distributed_mode() self.configure_slurm_ddp() self.handle_given_plugins() self._training_type_plugin_resolved = False self.accelerator = self.select_accelerator() # override dist backend when using tpus if self.on_tpu: self.distributed_backend = "tpu" # init flags for SLURM+DDP to work self.world_size = 1 self.interactive_ddp_procs = [] self.global_rank = 0 # benchmarking # TODO: should this be moved to GPU accelerator? torch.backends.cudnn.benchmark = self.benchmark # determinism for cudnn # TODO: should this be moved to GPU accelerator? torch.backends.cudnn.deterministic = deterministic if deterministic: # fixing non-deterministic part of horovod # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383 os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0) self.replace_sampler_ddp = replace_sampler_ddp
def __init__( self, num_processes, tpu_cores, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm, benchmark, replace_sampler_ddp, deterministic, precision, amp_type, amp_level, cluster_environment, ): # initialization self._device_type = DeviceType.CPU self._distrib_type = None self.num_processes = num_processes self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores) self.distributed_backend = distributed_backend self.auto_select_gpus = auto_select_gpus self.gpus = gpus self.num_nodes = num_nodes self.sync_batchnorm = sync_batchnorm self.benchmark = benchmark self.replace_sampler_ddp = replace_sampler_ddp self.deterministic = deterministic self.precision = precision self.amp_type = amp_type.lower() if isinstance(amp_type, str) else None self.amp_level = amp_level self.cluster_environment = cluster_environment self.is_slurm_managing_tasks = False # init the default rank if exists # we need to call this here or NVIDIA flags and other messaging in init will show on all ranks # this way we only show it on rank 0 if "LOCAL_RANK" in os.environ: rank_zero_only.rank = int(os.environ["LOCAL_RANK"]) # for gpus allow int, string and gpu list if auto_select_gpus and isinstance(gpus, int): self.gpus = pick_multiple_gpus(gpus) self.parallel_device_ids = device_parser.parse_gpu_ids(self.gpus) self.root_gpu = device_parser.determine_root_gpu_device( self.parallel_device_ids) self.set_distributed_mode() self.configure_slurm_ddp() self.accelerator = self.select_accelerator() # override dist backend when using tpus if self.on_tpu: self.distributed_backend = "tpu" self.use_tpu = True # init flags for SLURM+DDP to work self.world_size = 1 self.interactive_ddp_procs = [] self.global_rank = 0 # NVIDIA setup # self.set_nvidia_flags(self.trainer.is_slurm_managing_tasks, self.trainer.data_parallel_device_ids) # benchmarking # TODO: should this be moved to GPU accelerator? torch.backends.cudnn.benchmark = self.benchmark # determinism for cudnn # TODO: should this be moved to GPU accelerator? torch.backends.cudnn.deterministic = deterministic if deterministic: # fixing non-deterministic part of horovod # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383 os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0) # TODO: move this to TPU accelerator/plugin self.on_colab_kaggle = os.getenv("COLAB_GPU") or os.getenv( "KAGGLE_URL_BASE") self.replace_sampler_ddp = replace_sampler_ddp
def pick_multiple_gpus(self, num_gpus: int): return pick_multiple_gpus(num_gpus)
def test_pick_multiple_gpus_more_than_available(*_): with pytest.raises( MisconfigurationException, match="You requested 3 GPUs but your machine only has 1 GPUs"): pick_multiple_gpus(3)
def _set_devices_flag_if_auto_select_gpus_passed(self) -> None: if self._auto_select_gpus and isinstance(self._gpus, int) and isinstance(self.accelerator, GPUAccelerator): self._devices_flag = pick_multiple_gpus(self._gpus) log.info(f"Auto select gpus: {self._devices_flag}")