def profile( model, batchsize_per_replica=32, input_shape=(3, 224, 224), use_nvprof=False, input_key=None, ): """ Performs CPU or GPU profiling of the specified model on the specified input. """ # assertions: if use_nvprof: raise NotImplementedError # FIXME (mannatsingh): in case of use_nvprof, exit() is called at the end # and we do not return a profile. assert is_on_gpu(model), "can only nvprof model that lives on GPU" logging.info("CUDA profiling: Make sure you are running under nvprof!") # input for model: input = get_model_dummy_input( model, input_shape, input_key, batchsize=batchsize_per_replica, non_blocking=False, ) # perform profiling: with torch.no_grad(): model(input) # warm up CUDA memory allocator and profiler if use_nvprof: # nvprof profiling (TODO: Can we infer this?) cudart().cudaProfilerStart() model(input) cudart().cudaProfilerStop() exit() # exit gracefully else: # regular profiling with torch.autograd.profiler.profile(use_cuda=True) as profiler: model(input) return profiler
def profile( model: nn.Module, batchsize_per_replica: int = 32, input_shape: Tuple[int] = (3, 224, 224), use_nvprof: bool = False, input_key: Optional[Union[str, List[str]]] = None, ): """ Performs CPU or GPU profiling of the specified model on the specified input. """ # assertions: if use_nvprof: raise ClassyProfilerError("Profiling not supported with nvprof") # FIXME (mannatsingh): in case of use_nvprof, exit() is called at the end # and we do not return a profile. assert is_on_gpu(model), "can only nvprof model that lives on GPU" logging.info("CUDA profiling: Make sure you are running under nvprof!") # input for model: input = get_model_dummy_input( model, input_shape, input_key, batchsize=batchsize_per_replica, non_blocking=False, ) # perform profiling in eval mode with eval_model(model), torch.no_grad(): model(input) # warm up CUDA memory allocator and profiler if use_nvprof: # nvprof profiling (TODO: Can we infer this?) cudart().cudaProfilerStart() model(input) cudart().cudaProfilerStop() exit() # exit gracefully else: # regular profiling with torch.autograd.profiler.profile(use_cuda=True) as profiler: model(input) return profiler