def _test_profiler_tracing(self, use_kineto): with _profile(use_kineto=use_kineto) as prof: t1, t2 = torch.ones(1), torch.ones(1) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # read the trace and expect valid json # if the JSON generated by export_chrome_trace is not valid, this will throw and fail the test. with io.open(fname, 'r') as f: json.load(f) # test empty trace with _profile(use_kineto=use_kineto) as prof: pass # saving an empty trace with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # Same test but for cuda. use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities( ) if not use_cuda: return device = torch.device("cuda:0") with _profile(use_cuda=True, use_kineto=use_kineto) as prof: t1, t2 = torch.ones(1, device=device), torch.ones(1, device=device) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # Now validate the json with io.open(fname, 'r') as f: json.load(f)
def test_profiler_tracing(self): with _profile(use_kineto=kineto_available()) as prof: t1, t2 = torch.ones(1), torch.ones(1) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # read the trace and expect valid json # if the JSON generated by export_chrome_trace is not valid, this will throw and fail the test. with io.open(fname, 'r') as f: json.load(f) # Same test but for cuda. if not torch.cuda.is_available(): return device = torch.device("cuda:0") with _profile(use_cuda=True, use_kineto=kineto_available()) as prof: t1, t2 = torch.ones(1, device=device), torch.ones(1, device=device) torch.add(t1, t2) with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) # Now validate the json with io.open(fname, 'r') as f: json.load(f)
def _test_pickle_checkpoint_qtensor(self, device): with TemporaryFileName() as fname: class M(torch.jit.ScriptModule): __constants__ = ['fname'] def __init__(self): super(M, self).__init__() self.fname = fname @torch.jit.script_method def forward(self, x, y): torch.save((x, y), self.fname) return y q = torch.quantize_per_tensor(torch.rand(2, 3, dtype=torch.float), scale=0.1, zero_point=10, dtype=torch.quint8).to(device) qc = torch.quantize_per_channel( torch.rand(2, 3, dtype=torch.float), scales=torch.tensor([0.1, 0.5, 0.01]), zero_points=torch.tensor([10, 0, 20]), axis=1, dtype=torch.quint8).to(device) m = M() m(q, qc) with open(fname, "rb") as handle: loaded_q, loaded_qc = torch.load(fname) self.assertEqual(loaded_q, q) self.assertEqual(loaded_qc, qc)
def _test_serialization(self, module, inputs): with TemporaryFileName() as fname: torch.jit.save(module, fname) loaded = torch.jit.load(fname) self.assertEqual( module(*inputs).to_dense(), loaded(*inputs).to_dense())
def test_serialization_zipfile_utils(self): data = { 'a': b'12039810948234589', 'b': b'1239081209484958', 'c/d': b'94589480984058' } def test(name_or_buffer): with torch.serialization._open_zipfile_writer( name_or_buffer) as zip_file: for key in data: zip_file.write_record(key, data[key], len(data[key])) if hasattr(name_or_buffer, 'seek'): name_or_buffer.seek(0) with torch.serialization._open_zipfile_reader( name_or_buffer) as zip_file: for key in data: actual = zip_file.get_record(key) expected = data[key] self.assertEqual(expected, actual) with tempfile.NamedTemporaryFile() as f: test(f) with TemporaryFileName() as fname: test(fname) test(io.BytesIO())
def test_pathlike_serialization(self): model = torch.nn.Conv2d(20, 3200, kernel_size=3) with TemporaryFileName() as fname: path = pathlib.Path(fname) torch.save(model, path) torch.load(path)
def test_conv2d_legacy_jit_model(self): """ MKLDNN integration used to serialize models with 5d weight for grouped convolutions, we'd like to preserve this behavior """ g = 4 conv2d = torch.nn.Conv2d(16, 16, 3, groups=g) conv2d_mkldnn = torch.utils.mkldnn.to_mkldnn(conv2d) # contrive legacy conv2d module with a 5-d weight o, i, h, w = conv2d.weight.shape weight_5d = conv2d.weight.reshape((g, o // g, i, h, w)) conv2d_mkldnn.weight = weight_5d.to_mkldnn() x = torch.randn(1, 16, 8, 8) with TemporaryFileName() as fname: torch.jit.save(conv2d_mkldnn, fname) conv2d_loaded = torch.jit.load(fname) self.assertEqual(conv2d_mkldnn.weight.ndimension(), 5) self.assertEqual(conv2d_loaded.weight.ndimension(), 4) self.assertEqual( conv2d(x), conv2d_loaded(x.to_mkldnn()).to_dense())
def test_rref_jit_pickle_not_supported(self): n = self.rank + 1 dst_rank = n % self.world_size rref_var = rpc_return_rref(worker_name(dst_rank)) with TemporaryFileName() as fname: with self.assertRaisesRegex( RuntimeError, "RRef jit pickling is only allowed inside RPC calls"): save_rref(rref_var, fname)
def test_remote_module_py_pickle_not_supported_script(self): if self.rank != 0: return dst_worker_name = dist_utils.worker_name( (self.rank + 1) % self.world_size) for remote_module in self._create_remote_module_iter( dst_worker_name, modes=[ModuleCreationMode.MODULE_CTOR_WITH_INTERFACE]): with TemporaryFileName() as fname: with self.assertRaises(pickle.PickleError): torch.save(remote_module, fname)
def getExportImportCopy(self, m, also_test_file=True, map_location=None): buffer = io.BytesIO() torch.jit.save(m, buffer) buffer.seek(0) imported = torch.jit.load(buffer, map_location=map_location) if not also_test_file: return imported with TemporaryFileName() as fname: torch.jit.save(imported, fname) return torch.jit.load(fname, map_location=map_location)
def test_save_load_with_extra_files(self): class MyMod(torch.jit.ScriptModule): @torch.jit.script_method def forward(self, a): return a # specifically test binary data value = b"bar\x00\xffbaz" expected_extra_files = {} expected_extra_files['foo'] = value # verify that str to bytes conversion also works expected_extra_files['foo2'] = "bar" m = MyMod() # Save to file. with TemporaryFileName() as fname: m.save(fname, _extra_files=expected_extra_files) # values don't matter extra_files = {'foo': '', 'foo2': None} torch.jit.load(fname, _extra_files=extra_files) self.assertEqual(value, extra_files['foo']) # results come back always as bytes self.assertEqual(b"bar", extra_files['foo2']) # Use torch.jit API torch.jit.save(m, fname, _extra_files=expected_extra_files) extra_files['foo'] = '' torch.jit.load(fname, _extra_files=extra_files) self.assertEqual(value, extra_files['foo']) # Save to buffer. buffer = io.BytesIO( m.save_to_buffer(_extra_files=expected_extra_files)) extra_files = {'foo': ''} torch.jit.load(buffer, _extra_files=extra_files) self.assertEqual(value, extra_files['foo']) # Use torch.jit API buffer = io.BytesIO() torch.jit.save(m, buffer, _extra_files=expected_extra_files) buffer.seek(0) extra_files = {'foo': ''} torch.jit.load(buffer, _extra_files=extra_files) self.assertEqual(value, extra_files['foo']) # Non-existent file 'bar' with self.assertRaises(RuntimeError): extra_files['bar'] = '' torch.jit.load(buffer, _extra_files=extra_files)
def test_remote_module_py_pickle_not_supported(self): if self.rank != 0: return dst_worker_name = dist_utils.worker_name( (self.rank + 1) % self.world_size) for remote_module in self._create_remote_module_iter( dst_worker_name, modes=[ModuleCreationMode.MODULE_CTOR]): with TemporaryFileName() as fname: with self.assertRaisesRegex( RuntimeError, "Cannot pickle RemoteModule in python pickler. RemoteModule can only be pickled when using RPC", ): torch.save(remote_module, fname)
def compare_enabled_disabled(self, src): """ Runs the script in `src` with PYTORCH_JIT enabled and disabled and compares their stdout for equality. """ # Write `src` out to a temporary so our source inspection logic works # correctly. with TemporaryFileName() as fname: with open(fname, 'w') as f: f.write(src) with _jit_disabled(): out_disabled = subprocess.check_output( [sys.executable, fname]) out_enabled = subprocess.check_output([sys.executable, fname]) self.assertEqual(out_disabled, out_enabled)
def test_profiler_metadata(self): t1, t2 = torch.ones(1), torch.ones(1) with profile() as prof: torch.add(t1, t2) prof.add_metadata("test_key1", "test_value1") prof.add_metadata_json("test_key2", "[1,2,3]") with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: trace = json.load(f) assert "test_key1" in trace assert trace["test_key1"] == "test_value1" assert "test_key2" in trace assert trace["test_key2"] == [1, 2, 3]
def test_save_load_using_pathlib(self): class MyMod(torch.jit.ScriptModule): @torch.jit.script_method def forward(self, a): return 2 * a m = MyMod() # Save then load. with TemporaryFileName() as fname: path = pathlib.Path(fname) m.save(path) m2 = torch.jit.load(path) x = torch.tensor([1., 2., 3., 4.]) self.assertTrue(torch.equal(m(x), m2(x)))
def test_save_load_with_extra_files(self): class MyMod(torch.jit.ScriptModule): @torch.jit.script_method def forward(self, a): return a expected_extra_files = torch._C.ExtraFilesMap() expected_extra_files['foo'] = 'bar' m = MyMod() # Save to file. with TemporaryFileName() as fname: m.save(fname, _extra_files=expected_extra_files) extra_files = torch._C.ExtraFilesMap() extra_files['foo'] = '' torch.jit.load(fname, _extra_files=extra_files) self.assertEqual('bar', extra_files['foo']) # Use torch.jit API torch.jit.save(m, fname, _extra_files=expected_extra_files) extra_files['foo'] = '' torch.jit.load(fname, _extra_files=extra_files) self.assertEqual('bar', extra_files['foo']) # Save to buffer. buffer = io.BytesIO( m.save_to_buffer(_extra_files=expected_extra_files)) extra_files = torch._C.ExtraFilesMap() extra_files['foo'] = '' torch.jit.load(buffer, _extra_files=extra_files) self.assertEqual('bar', extra_files['foo']) # Use torch.jit API buffer = io.BytesIO() torch.jit.save(m, buffer, _extra_files=expected_extra_files) buffer.seek(0) extra_files = torch._C.ExtraFilesMap() extra_files['foo'] = '' torch.jit.load(buffer, _extra_files=extra_files) self.assertEqual('bar', extra_files['foo']) # Non-existent file 'bar' with self.assertRaises(RuntimeError): extra_files['bar'] = '' torch.jit.load(buffer, _extra_files=extra_files)
def test_profiler_fwd_bwd_link(self): with _profile(use_kineto=True) as prof: t1, t2 = torch.ones(1, requires_grad=True), torch.ones( 1, requires_grad=True) z = torch.add(t1, t2) y = torch.ones(1) loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y) loss.backward() with TemporaryFileName(mode="w+") as fname: prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: j = json.load(f) events = j["traceEvents"] ts_to_name = {} flow_s_to_ts = {} flow_f_to_ts = {} for e in events: if e["ph"] == "X": ts_to_name[e["ts"]] = e["name"] if "cat" in e and "name" in e and e[ "cat"] == "forward_backward" and e[ "name"] == "fwd_bwd": if e["ph"] == "s": flow_s_to_ts[e["id"]] = e["ts"] elif e["ph"] == "f": flow_f_to_ts[e["id"]] = e["ts"] self.assertTrue(len(flow_s_to_ts) == 2) self.assertTrue(len(flow_f_to_ts) == 2) self.assertTrue(1 in flow_s_to_ts.keys()) self.assertTrue(1 in flow_f_to_ts.keys()) self.assertTrue(2 in flow_s_to_ts.keys()) self.assertTrue(2 in flow_f_to_ts.keys()) s_ts_1 = flow_s_to_ts[1] f_ts_1 = flow_f_to_ts[1] s_ts_2 = flow_s_to_ts[2] f_ts_2 = flow_f_to_ts[2] self.assertTrue( all([ ts in ts_to_name.keys() for ts in [s_ts_1, f_ts_1, s_ts_2, f_ts_2] ])) self.assertTrue(ts_to_name[s_ts_1] == "aten::binary_cross_entropy_with_logits") self.assertTrue(ts_to_name[s_ts_2] == "aten::add")
def test_serialization_zipfile(self): data = self._test_serialization_data() def test(name_or_buffer): torch.save(data, name_or_buffer) if hasattr(name_or_buffer, 'seek'): name_or_buffer.seek(0) result = torch.load(name_or_buffer) self.assertEqual(result, data) with tempfile.NamedTemporaryFile() as f: test(f) with TemporaryFileName() as fname: test(fname) test(io.BytesIO())
def test_export_stacks(self): with profile(with_stack=True, use_kineto=kineto_available()) as p: x = torch.randn(10, 10) y = torch.randn(10, 10) z = torch.mm(x, y) z = z + y with TemporaryFileName(mode="w+") as fname: p.export_stacks(fname) with io.open(fname, 'r') as f: lines = f.readlines() assert len(lines) > 0, "Empty stacks file" for line in lines: is_int = False try: assert int(line.split(" ")[-1]) > 0, "Invalid stacks record" is_int = True except ValueError: pass assert is_int, "Invalid stacks record"
def test_serialization(self): # Test serialization with a real file b = self._test_serialization_data() with tempfile.NamedTemporaryFile() as f: torch.save(b, f) f.seek(0) c = torch.load(f) self._test_serialization_assert(b, c) with TemporaryFileName() as fname: torch.save(b, fname) c = torch.load(fname) self._test_serialization_assert(b, c) # test non-ascii encoding of bytes arrays/strings # The following bytes are produced by serializing # [b'\xc5\xbc\xc4\x85\xc4\x85\xc3\xb3\xc5\xbc\xc4\x85\xc5\xbc', torch.zeros(1, dtype=torch.float), 2] # in Python 2.7.12 and PyTorch 0.4.1, where the first element contains # bytes of some utf-8 characters (i.e., `utf8_str.encode('utf-8')`). serialized = ( b'\x80\x02\x8a\nl\xfc\x9cF\xf9 j\xa8P\x19.\x80\x02M\xe9\x03.' b'\x80\x02}q\x01(U\x10protocol_versionq\x02M\xe9\x03U\n' b'type_sizesq\x03}q\x04(U\x03intq\x05K\x04U\x05shortq\x06K\x02U' b'\x04longq\x07K\x04uU\rlittle_endianq\x08\x88u.\x80\x02]q' b'\x01(U\x0e\xc5\xbc\xc4\x85\xc4\x85\xc3\xb3\xc5\xbc\xc4\x85' b'\xc5\xbcq\x02ctorch._utils\n_rebuild_tensor_v2\nq\x03((U' b'\x07storageq\x04ctorch\nFloatStorage\nq\x05U\x0845640624q' b'\x06U\x03cpuq\x07\x8a\x01\x01NtQK\x00K\x01\x85K\x01\x85' b'\x89NtRq\x08K\x02e.\x80\x02]q\x01U\x0845640624q\x02a.\x01\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') buf = io.BytesIO(serialized) utf8_bytes = b'\xc5\xbc\xc4\x85\xc4\x85\xc3\xb3\xc5\xbc\xc4\x85\xc5\xbc' utf8_str = utf8_bytes.decode('utf-8') loaded_utf8 = torch.load(buf, encoding='utf-8') self.assertEqual( loaded_utf8, [utf8_str, torch.zeros(1, dtype=torch.float), 2]) buf.seek(0) loaded_bytes = torch.load(buf, encoding='bytes') self.assertEqual( loaded_bytes, [utf8_bytes, torch.zeros(1, dtype=torch.float), 2])
def test_pickle_future(self): fut = Future() errMsg = "Can not pickle torch.futures.Future" with TemporaryFileName() as fname: with self.assertRaisesRegex(RuntimeError, errMsg): torch.save(fut, fname)
def test_module_hierarchy(self): class A(nn.Module): def __init__(self): super(A, self).__init__() def my_new_method(self, x): return x * 3 def forward_impl_(self, x, y): return self.my_new_method(x) + y def forward(self, x, y): y = y - 2 return self.forward_impl_(x, y) class B(nn.Module): def __init__(self): super(B, self).__init__() def forward(self, x): return x + 2 class C(nn.Module): def __init__(self): super(C, self).__init__() self.A0 = A() self.B0 = B() def call_b(self, x): return self.B0.forward(x) def forward(self, x, y): return self.A0.forward(x, y) + self.call_b(x) model = C() model = torch.jit.script(model) input_a = torch.rand(128, 128) input_b = torch.rand(128, 128) op_to_module_hierarchy = {} op_to_module_hierarchy["aten::sub"] = [ "TOP(C)::forward.A0(A)::forward." ] op_to_module_hierarchy["aten::mul"] = [ "TOP(C)::forward.A0(A)::forward.SELF(A)::forward_impl_.SELF(A)::my_new_method." ] op_to_module_hierarchy["aten::add"] = [ "TOP(C)::forward.A0(A)::forward.SELF(A)::forward_impl_.", "TOP(C)::forward.SELF(C)::call_b.B0(B)::forward.", "TOP(C)::forward." ] with TemporaryFileName(mode="w+") as fname: with profile( activities=[torch.profiler.ProfilerActivity.CPU], with_modules=True, ) as prof: model(input_a, input_b) prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: trace = json.load(f) assert "traceEvents" in trace events = trace["traceEvents"] found_memory_events = False for evt in events: assert "name" in evt if "args" in evt: op_name = evt["name"] if "Module Hierarchy" in evt["args"]: hierarchy = evt["args"]["Module Hierarchy"] if op_name in op_to_module_hierarchy: assert hierarchy in op_to_module_hierarchy[ op_name]
def test_memory_profiler(self): def run_profiler(tensor_creation_fn): # collecting allocs / deallocs with _profile(profile_memory=True, record_shapes=True, use_kineto=kineto_available()) as prof: x = None with record_function("test_user_scope_alloc"): x = tensor_creation_fn() with record_function("test_user_scope_dealloc"): del x return prof.key_averages(group_by_input_shape=True) def check_metrics(stats, metric, allocs=None, deallocs=None): stat_metrics = {} for stat in stats: stat_metrics[stat.key] = getattr(stat, metric) if allocs is not None: for alloc_fn in allocs: self.assertTrue(alloc_fn in stat_metrics) self.assertTrue(stat_metrics[alloc_fn] > 0) if deallocs is not None: for dealloc_fn in deallocs: self.assertTrue(dealloc_fn in stat_metrics) self.assertTrue(stat_metrics[dealloc_fn] < 0) def create_cpu_tensor(): return torch.rand(10, 10) def create_cuda_tensor(): return torch.rand(10, 10).cuda() def create_mkldnn_tensor(): return torch.rand(10, 10, dtype=torch.float32).to_mkldnn() stats = run_profiler(create_cpu_tensor) check_metrics(stats, "cpu_memory_usage", allocs=[ "aten::empty", "aten::rand", "test_user_scope_alloc", ], deallocs=[ "test_user_scope_dealloc", ]) if kineto_available(): with TemporaryFileName(mode="w+") as fname: with profile(profile_memory=True) as prof: x = None with record_function("test_user_scope_alloc"): x = create_cpu_tensor() with record_function("test_user_scope_dealloc"): del x prof.export_chrome_trace(fname) with io.open(fname, 'r') as f: trace = json.load(f) assert "traceEvents" in trace events = trace["traceEvents"] found_memory_events = False for evt in events: assert "name" in evt if evt["name"] == "[memory]": found_memory_events = True assert "args" in evt assert "Device Type" in evt["args"] assert "Device Id" in evt["args"] assert "Bytes" in evt["args"] assert found_memory_events if torch.cuda.is_available(): create_cuda_tensor() stats = run_profiler(create_cuda_tensor) check_metrics(stats, "cuda_memory_usage", allocs=[ "test_user_scope_alloc", "aten::to", "aten::empty_strided", ], deallocs=[ "test_user_scope_dealloc", ]) check_metrics(stats, "cpu_memory_usage", allocs=[ "aten::rand", "aten::empty", ]) if torch._C.has_mkldnn: create_mkldnn_tensor() stats = run_profiler(create_mkldnn_tensor) check_metrics(stats, "cpu_memory_usage", allocs=[ "test_user_scope_alloc", "aten::rand", "aten::empty", "aten::to_mkldnn", ], deallocs=[ "test_user_scope_dealloc", ]) # check top-level memory events with _profile(profile_memory=True, use_kineto=kineto_available()) as prof: x = torch.rand(10, 10) del x if torch.cuda.is_available(): y = torch.rand(10, 10).cuda() del y gc.collect() stats = prof.key_averages(group_by_input_shape=True) check_metrics(stats, "cpu_memory_usage", allocs=["aten::rand", "aten::empty"], deallocs=["[memory]"]) if torch.cuda.is_available(): check_metrics(stats, "cuda_memory_usage", deallocs=["[memory]"])
def test_profiling(self): with TemporaryFileName() as fname: self.linear_test(TwoLayerNetModule, profiler_output_path=fname)
def test_source(self): """Checks that source code attribution works for eager, TS and autograd mode """ # avoid automatic inlining prev_opt = torch._C._get_graph_executor_optimize() torch._C._set_graph_executor_optimize(False) @torch.jit.script def ts_method_2(x, y): return torch.matmul(x, y) @torch.jit.script def ts_method_1(x, y, z): a = x + z w = ts_method_2(x, y) + a return w.sum() class DummyModule(nn.Module): def __init__(self): super(DummyModule, self).__init__() self.conv = torch.nn.Conv2d(3, 2, kernel_size=1, stride=2, padding=3, bias=False) def forward(self, x): return self.conv(x) mod = DummyModule() def call_module(x): return mod(x) with _profile(with_stack=True, use_kineto=kineto_available()) as p: x = torch.randn(10, 10, requires_grad=True) y = torch.randn(10, 10, requires_grad=True) z = x + y w = ts_method_1(x, y, z) v = 2 * w v.backward() a = torch.randn(2, 3, 2, 2, requires_grad=True) b = call_module(a) c = b.sum() c.backward() for e in p.function_events: if "aten::add" in e.name or "AddBackward" in e.name: self.assertTrue( any(["test_profiler" in entry for entry in e.stack])) self.assertTrue( any([("test_source" in entry or "ts_method_1" in entry or "ts_method_2" in entry) for entry in e.stack])) # TODO: https://github.com/pytorch/kineto/issues/617 if kineto_available() and not IS_WINDOWS: with TemporaryFileName(mode="w+") as fname: p.export_chrome_trace(fname) with io.open(fname, 'r') as f: events = json.load(f)["traceEvents"] def extract(pattern: str): matches = [ e for e in events if re.search(pattern, e["name"]) ] self.assertEqual(len(matches), 1, repr([e["name"] for e in matches])) return matches[0] module_event = extract(r"DummyModule_0") wrapper_event = extract(r"call_module") self.assertEqual(module_event["args"]["Python parent id"], wrapper_event["args"]["Python id"]) torch._C._set_graph_executor_optimize(prev_opt)