def visit(path, module): context = null_context() if isinstance(module, FullyShardedDataParallel): context = cls._summon_params(module) with context: yield path, module for name, child in module._modules.items(): next_path = path + "." + name if path else name yield from visit(next_path, child)
def record_function(name: str, with_tag: str = "##"): """ Context manager to annotate a scope with meta data used for profiling. The tag is used to surround the name. """ import torch.autograd.profiler as profiler if with_tag: name = " ".join([with_tag, name, with_tag]) if is_nvtx_available(): import nvtx nvtx_context = nvtx.annotate(message=name) else: nvtx_context = null_context() with profiler.record_function(name), nvtx_context: yield
def create_runtime_profiler( enabled: bool, use_cpu: bool, use_cuda: bool, wait: int, warmup: int, active: int, legacy_profiler: bool, ): """ Create a runtime profiler with the provided options. The type of runtime profiler depends on the pytorch version: newer version (above 1.8.1) will use "torch.profiler" instead of "torch.autograd.profiler". """ if not enabled: return null_context() if is_pytorch_profiler_available() and not legacy_profiler: import torch.profiler profiled_activities = [] if use_cpu: profiled_activities.append(torch.profiler.ProfilerActivity.CPU) if use_cuda: profiled_activities.append(torch.profiler.ProfilerActivity.CUDA) schedule = torch.profiler.schedule(wait=wait, warmup=warmup, active=active) profiler = torch.profiler.profile(activities=profiled_activities, schedule=schedule) return Profiler(profiler) else: import torch.autograd.profiler profiler = torch.autograd.profiler.profile(enabled=enabled, use_cuda=use_cuda, profile_memory=False) return AutoGradProfiler(profiler, wait=wait, warmup=warmup, active=active)