def test_kineto_profiler_api(self): called_num = [0] use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) with profile(activities=supported_activities()): self.payload(use_cuda=use_cuda) def trace_handler(p): output = p.key_averages().table( sort_by="self_cuda_time_total" if use_cuda else "self_cpu_time_total", row_limit=-1) # print(output) # p.export_chrome_trace("/tmp/test_trace_" + str(called_num[0]) + ".json") called_num[0] += 1 with profile(activities=supported_activities(), schedule=torch.profiler.schedule(wait=1, warmup=1, active=2), on_trace_ready=trace_handler) as p: for idx in range(8): self.payload(use_cuda=use_cuda) p.step() self.assertEqual(called_num[0], 2) # case without schedule with profile(activities=supported_activities()) as p: self.payload(use_cuda=use_cuda) self.payload(use_cuda=use_cuda) output = p.key_averages().table(sort_by="self_cuda_time_total" if use_cuda else "self_cpu_time_total", row_limit=-1)
def _test_profiler_tracing(self, use_kineto): with _profile(use_kineto=use_kineto) as prof: t1, t2 = torch.ones(1), torch.ones(1) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # read the trace and expect valid json # if the JSON generated by export_chrome_trace is not valid, this will throw and fail the test. with io.open(fname, 'r') as f: json.load(f) # test empty trace with _profile(use_kineto=use_kineto) as prof: pass # saving an empty trace with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # Same test but for cuda. use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) if not use_cuda: return device = torch.device("cuda:0") with _profile(use_cuda=True, use_kineto=use_kineto) as prof: t1, t2 = torch.ones(1, device=device), torch.ones(1, device=device) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # Now validate the json with io.open(fname, 'r') as f: json.load(f)
def test_kineto(self): use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) with _profile(use_cuda=use_cuda, use_kineto=True): self.payload(use_cuda=use_cuda) # rerun to avoid initial start overhead with _profile(use_cuda=use_cuda, use_kineto=True) as p: self.payload(use_cuda=use_cuda) output = p.key_averages().table(sort_by="self_cuda_time_total" if use_cuda else "self_cpu_time_total", row_limit=-1) # print(output) found_gemm = False found_memcpy = False found_mm = False for e in p.function_events: if "aten::mm" in e.name: found_mm = True if "gemm" in e.name: found_gemm = True if "Memcpy" in e.name or "memcpy" in e.name: found_memcpy = True if use_cuda: self.assertTrue(found_gemm) self.assertTrue(found_memcpy) else: self.assertTrue(found_mm)
def test_execution_graph_start_stop(self): use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) # Create a temp file to save execution graph data. fp = tempfile.NamedTemporaryFile('w+t', suffix='.json', delete=False) fp.close() expected_loop_events = 0 eg = ExecutionGraphObserver() eg.register_callback(fp.name) for idx in range(10): if idx == 3: eg.start() elif idx == 5: eg.stop() elif idx == 8: eg.start() elif idx == 9: eg.stop() eg.unregister_callback() if eg._execution_graph_running: expected_loop_events += 1 with record_function(f"## LOOP {idx} ##"): self.payload(use_cuda=use_cuda) assert fp.name == eg.get_output_file_path() nodes = self.get_execution_graph_root(fp.name) loop_count = 0 for n in nodes: assert "name" in n if "[pytorch|profiler|execution_graph|process]" in n["name"]: found_root_node = True if n["name"].startswith("## LOOP "): loop_count += 1 assert found_root_node assert loop_count == expected_loop_events
def test_tensorboard_trace_handler(self): use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) with _profile(use_cuda=use_cuda, use_kineto=True): self.payload(use_cuda=use_cuda) with TemporaryDirectoryName() as dname: with profile( activities=[torch.profiler.ProfilerActivity.CPU] + ([torch.profiler.ProfilerActivity.CUDA] if use_cuda else []), schedule=torch.profiler.schedule(wait=1, warmup=1, active=2, repeat=3), on_trace_ready=torch.profiler.tensorboard_trace_handler( dname)) as p: for _ in range(18): self.payload(use_cuda=use_cuda) p.step() self.assertTrue(os.path.exists(dname)) file_num = 0 for file_name in os.listdir(dname): parts = file_name.split('.') self.assertTrue(len(parts) > 4) self.assertTrue(parts[-4].isdigit() and int(parts[-4]) > 0, "Wrong tracing file name pattern") self.assertEqual(parts[-3:], ['pt', 'trace', 'json']) file_num += 1 self.assertEqual(file_num, 3) # test case for gzip file format with TemporaryDirectoryName() as dname: p = profile( activities=[torch.profiler.ProfilerActivity.CPU] + ([torch.profiler.ProfilerActivity.CUDA] if use_cuda else []), schedule=torch.profiler.schedule(wait=1, warmup=1, active=2, repeat=3), on_trace_ready=torch.profiler.tensorboard_trace_handler( dname, use_gzip=True)) p.start() for _ in range(18): self.payload(use_cuda=use_cuda) p.step() p.stop() self.assertTrue(os.path.exists(dname)) file_num = 0 for file_name in os.listdir(dname): parts = file_name.split('.') self.assertTrue(len(parts) > 4) self.assertTrue(parts[-5].isdigit() and int(parts[-5]) > 0, "Wrong tracing file name pattern") self.assertEqual(parts[-4:], ['pt', 'trace', 'json', 'gz']) file_num += 1 self.assertEqual(file_num, 3)
def test_execution_graph_with_kineto(self): trace_called_num = 0 def trace_handler(p): nonlocal trace_called_num trace_called_num += 1 use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) # Create a temp file to save execution graph data. fp = tempfile.NamedTemporaryFile('w+t', suffix='.json', delete=False) fp.close() expected_loop_events = 0 eg = ExecutionGraphObserver() eg.register_callback(fp.name) with profile( activities=supported_activities(), schedule=torch.profiler.schedule(skip_first=3, wait=1, warmup=1, active=2), on_trace_ready=trace_handler, ) as p: eg.start() for idx in range(10): expected_loop_events += 1 with record_function(f"## LOOP {idx} ##"): self.payload(use_cuda=use_cuda) p.step() eg.stop() eg.unregister_callback() assert trace_called_num == 2 assert fp.name == eg.get_output_file_path() nodes = self.get_execution_graph_root(fp.name) loop_count = 0 for n in nodes: assert "name" in n if "[pytorch|profiler|execution_graph|process]" in n["name"]: found_root_node = True if n["name"].startswith("## LOOP "): loop_count += 1 assert found_root_node assert loop_count == expected_loop_events