def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ([] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device) nprocesses = (1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins( self.NUM_PROCESSES, len(gpu_ids))) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.TESTING_GPUS else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = (SensorPreprocessorGraph( source_observation_spaces=SensorSuite( self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else None) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sensor_preprocessor_graph=sensor_preprocessor_graph, )
def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) sampler_devices = self.SAMPLER_GPU_IDS elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": nprocesses = 5 if torch.cuda.is_available() else 1 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] if mode != "train": sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(sensors).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, )
def machine_params(self, mode="train", **kwargs): sampler_devices: Sequence[torch.device] = [] devices: Sequence[torch.device] if mode == "train": workers_per_device = 1 devices = ([torch.device("cpu")] if not torch.cuda.is_available() else cast(Tuple, self.train_gpu_ids) * workers_per_device) nprocesses = evenly_distribute_count_into_bins( self.num_train_processes, max(len(devices), 1)) sampler_devices = self.sampler_devices elif mode == "valid": nprocesses = 1 devices = ([torch.device("cpu")] if not torch.cuda.is_available() else self.val_gpu_ids) elif mode == "test": nprocesses = 10 if torch.cuda.is_available() else 1 devices = ([torch.device("cpu")] if not torch.cuda.is_available() else self.test_gpu_ids) else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] if mode != "train": sensors = [ s for s in sensors if not isinstance(s, ExpertActionSensor) ] sensor_preprocessor_graph = (SensorPreprocessorGraph( source_observation_spaces=SensorSuite(sensors).observation_spaces, preprocessors=self.preprocessors(), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else None) return MachineParams( nprocesses=nprocesses, devices=devices, sampler_devices=sampler_devices if mode == "train" else devices, # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, )