def update_model_for_multiple_gpus(model_and_info: ModelAndInfo, args: ModelConfigBase, execution_mode: ModelExecutionMode = ModelExecutionMode.TRAIN) -> \ ModelAndInfo: """ Updates a given torch model as such input mini-batches are parallelized across the batch dimension to utilise multiple gpus. If model parallel is set to True and execution is in test mode, then model is partitioned to perform full volume inference. :param model_and_info: The torch module object representing the network and the optimizer. :param args: The arguments object with attributes used to enable amp training and create the parallel model. :param execution_mode: mode, i.e. train or test :return: Updated torch model and optimizer. """ if model_and_info.is_adjusted: logging.debug("model_and_info.is_adjusted is already True") return model_and_info if args.use_gpu: # In the normal training codepath, the model should already be on the GPU, but in some tests not. model_and_info.to_cuda() logging.info("Adjusting the model to use mixed precision training.") # If model parallel is set to True, then partition the network across all available gpus. if args.use_model_parallel: devices = args.get_cuda_devices() assert devices is not None # for mypy model_and_info.model.partition_model( devices=devices) # type: ignore else: logging.info( "Making no adjustments to the model because no GPU was found.") # Update model related config attributes (After Model Parallel Activated) args.adjust_after_mixed_precision_and_parallel(model_and_info.model) # DataParallel enables running the model with multiple gpus by splitting samples across GPUs # If the model is used in training mode, data parallel is activated by default. # Similarly, if model parallel is not activated, data parallel is used as a backup option use_data_parallel = (execution_mode == ModelExecutionMode.TRAIN) or ( not args.use_model_parallel) if args.use_gpu and use_data_parallel: logging.info("Adjusting the model to use DataParallel") # Move all layers to the default GPU before activating data parallel. # This needs to happen even though we put the model to the GPU at the beginning of the method, # but we may have spread it across multiple GPUs later. model_and_info.to_cuda() model_and_info.set_data_parallel(device_ids=args.get_cuda_devices()) model_and_info.is_adjusted = True logging.debug("model_and_info.is_adjusted set to True") return model_and_info
def _adjust_for_gpus( cls, model: DeviceAwareModule, config: ModelConfigBase, model_execution_mode: ModelExecutionMode) -> DeviceAwareModule: """ Updates a torch model so that input mini-batches are parallelized across the batch dimension to utilise multiple gpus. If model parallel is set to True and execution is in test mode, then model is partitioned to perform full volume inference. This assumes the model has been created, that the optimizer has not yet been created, and the the model has not been adjusted twice. This method should not be called externally. Use instead adjust_model_for_gpus or adjust_mean_teacher_model_for_gpus :returns Adjusted model """ if config.use_gpu: model = model.cuda() logging.info( "Adjusting the model to use mixed precision training.") # If model parallel is set to True, then partition the network across all available gpus. if config.use_model_parallel: devices = config.get_cuda_devices() assert devices is not None # for mypy model.partition_model(devices=devices) # type: ignore else: logging.info( "Making no adjustments to the model because no GPU was found.") # Update model related config attributes (After Model Parallel Activated) config.adjust_after_mixed_precision_and_parallel(model) # DataParallel enables running the model with multiple gpus by splitting samples across GPUs # If the model is used in training mode, data parallel is activated by default. # Similarly, if model parallel is not activated, data parallel is used as a backup option use_data_parallel = (model_execution_mode == ModelExecutionMode.TRAIN ) or (not config.use_model_parallel) if config.use_gpu and use_data_parallel: logging.info("Adjusting the model to use DataParallel") # Move all layers to the default GPU before activating data parallel. # This needs to happen even though we put the model to the GPU at the beginning of the method, # but we may have spread it across multiple GPUs later. model = model.cuda() model = DataParallelModel(model, device_ids=config.get_cuda_devices()) return model