def with_profiler(): with torch.profiler.profile( on_trace_ready=tensorboard_trace_handler("derp"), with_stack=True, with_flops=True, ) as profiler: call_model() profiler.step()
def on_trace_ready(profiler): filename = f"{action_name}_{self.local_rank}" if self.dirpath is not None: if self._export_to_chrome: handler = tensorboard_trace_handler(self.dirpath, filename) handler(profiler) if self._export_to_flame_graph: path = os.path.join(self.dirpath, self._prepare_filename(extension=".stack")) profiler.export_stacks(path, metric=self._metric) else: rank_zero_warn("The PyTorchProfiler failed to export trace as `dirpath` is None")
def __init__(self, record_func_name='inference', activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=False, profile_memory=True, scheduler=schedule(wait=1, warmup=1, active=2), trace_handler=tensorboard_trace_handler('./log')): self.activities = activities self.profile = profile(activities=activities, record_shapes=record_shapes, profile_memory=profile_memory, with_flops=True, schedule=scheduler, on_trace_ready=trace_handler) self.record_function = record_function(record_func_name)
device = "cuda" model = GoogleModel(64, 4, 8, 64, True) model = torch.jit.script(model) model.eval() model.to(device) data = load_data("../../data/esat/test_data.csv") batch = data.pick_batch(torch.arange(batch_size)).to(device) o = o_tensor(device) print("Kineto available:", torch.autograd.kineto_available()) with torch.profiler.profile( # schedule=torch.profiler.schedule( # wait=2, # warmup=2, # active=6, # repeat=1, # ), on_trace_ready=tensorboard_trace_handler("foobar"), # with_stack=True, # use_cuda=True, ) as profiler: for _ in range(10): _ = model(o, batch.mask_o, batch.tiles_o, batch.macros) profiler.step() print("step done") print("done")