def _test_output_bf16(self, model, x): modelName = model.__class__.__name__ core.enable_auto_dnnl() core.enable_jit() core.disable_mix_bf16_fp32() model = model.to('dpcpp').eval() x = x.to('dpcpp') x2 = x.clone() fused_model = torch.jit.script(copy.deepcopy(model)) # bn folding, removing it after solve some issue, using mix_preci? to check core.disable_auto_dnnl() fused_model = wrap_cpp_module( torch._C._jit_pass_fold_convbn(fused_model._c)) core.enable_auto_dnnl() core.enable_mix_bf16_fp32() # prepack convolution weight, weight will be a bf16 tensor fused_model = wrap_cpp_module( core._jit_prepack_conv_weight(fused_model._c)) with torch.no_grad(): # bf16, native path result = model(x) # bf16, jit path fresult = fused_model(x2) #print(result) #print(fresult) self.assertEqual(fresult, result)
def _convert_script(model, is_dynamic, debug=False): _check_is_script_module(model) model.eval() model = wrap_cpp_module(torch._C._jit_pass_insert_quant_dequant(model._c, 'forward', False, is_dynamic)) if not debug: model = wrap_cpp_module(torch._C._jit_pass_quant_finalize(model._c, is_dynamic)) return model
def convert_script(model, inplace=False, debug=False): _check_is_script_module(model) if not inplace: model = model.copy() model.eval() model = wrap_cpp_module(torch._C._jit_pass_insert_quant_dequant(model._c, 'forward', False)) if not debug: model = wrap_cpp_module(torch._C._jit_pass_quant_finalize(model._c)) return model
def _convert_script(model, inplace=False, debug=False, quant_type=QuantType.STATIC): assert not inplace, "The inplace support is still in development" _check_is_script_module(model) model.eval() model = wrap_cpp_module(torch._C._jit_pass_insert_quant_dequant(model._c, 'forward', inplace, debug, quant_type)) if not debug: # Moving model parameters to CPU since quantized operators # are only supported on CPU right now model.cpu() model = wrap_cpp_module(torch._C._jit_pass_quant_finalize(model._c, quant_type)) return model
def _convert_script(model, inplace=False, debug=False, is_dynamic=False): assert not inplace, "The inplace support is still in development" _check_is_script_module(model) model.eval() model = wrap_cpp_module( torch._C._jit_pass_insert_quant_dequant(model._c, 'forward', inplace, is_dynamic)) if not debug: model = wrap_cpp_module( torch._C._jit_pass_quant_finalize(model._c, is_dynamic)) return model
def _prepare_script(model, qconfig_dict, inplace=False, quant_type=QuantType.STATIC): assert not inplace, "The inplace support is still in development" _check_is_script_module(model) _check_forward_method(model) if not all(isinstance(x, str) for x in qconfig_dict.keys()): raise ValueError('qconfig_dict should only contain names(str) as keys.') scripted_qconfig_dict = script_qconfig_dict(qconfig_dict) model = wrap_cpp_module(torch._C._jit_pass_fold_convbn(model._c)) return wrap_cpp_module(torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, inplace, quant_type))
def _prepare_script(model, qconfig_dict, is_dynamic): _check_is_script_module(model) _check_forward_method(model) if not all(isinstance(x, str) for x in qconfig_dict.keys()): raise ValueError( 'qconfig_dict should only contain names(str) as keys.') scripted_qconfig_dict = script_qconfig_dict(qconfig_dict) torch._C._jit_pass_dedup_module_uses(model._c) model = wrap_cpp_module(torch._C._jit_pass_fold_convbn(model._c)) return wrap_cpp_module( torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, False, is_dynamic))
def jit_module_from_flatbuffer(f): ff = get_ff_module() if isinstance(f, string_classes): if not os.path.exists(f): # type: ignore[type-var] raise ValueError("The provided filename {} does not exist".format( f)) # type: ignore[str-bytes-safe] if os.path.isdir(f): raise ValueError("The provided filename {} is a directory".format( f)) # type: ignore[str-bytes-safe] if isinstance(f, str) or isinstance(f, pathlib.Path): f = str(f) return wrap_cpp_module(ff._load_jit_module_from_file(f)) else: return wrap_cpp_module(ff._load_jit_module_from_bytes(f.read()))
def _convert_jit(model, inplace=False, debug=False, quant_type=QuantType.STATIC, preserved_attrs=None): _check_is_script_module(model) model.eval() model_c = model._c model_c = torch._C._jit_pass_insert_quant_dequant(model_c, 'forward', inplace, debug, quant_type) if not debug: is_xpu = all(p.device.type == 'xpu' for p in model.parameters()) if not is_xpu: # Moving model parameters to CPU since quantized operators # are only supported on CPU and XPU right now model.cpu() if preserved_attrs is None: preserved_attrs = [] model_c = torch._C._jit_pass_quant_finalize(model_c, quant_type, preserved_attrs) if inplace: model._reconstruct(model_c) else: model = wrap_cpp_module(model_c) torch._C._jit_pass_constant_propagation(model.graph) torch._C._jit_pass_dce(model.graph) return model
def _prepare_script(model, qconfig_dict, is_dynamic): _check_is_script_module(model) scripted_qconfig_dict = get_scripted_qconfig_dict(qconfig_dict) return wrap_cpp_module( torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, False, is_dynamic))
def _reconstruct(self, cpp_module): """ Re-construct an instance of RecursiveScriptModule using an instance of a C++ module. Args: cpp_module: The C++ module that this RecursiveScriptModule will be rebuilt around. """ self.__init__(cpp_module) # type: ignore # Copy the concrete type from the C++ module to this ScriptModule. self._concrete_type = torch._C.ConcreteModuleType.from_jit_type( self._c._type()) # Copy submodules from the C++ module to this ScriptModule. modules = {} for name, cpp_module in torch._C.ModuleDict(self._c).items(): modules[name] = wrap_cpp_module(cpp_module) self._modules = OrderedModuleDict(self._c, modules) # Copy parameters and buffers. self._parameters = OrderedDictWrapper( torch._C.ParameterDict(self._c)) self._buffers = OrderedDictWrapper(torch._C.BufferDict(self._c)) # Get rid of the functions from the old C++ module. self.__dict__ = { k: v for k, v in self.__dict__.items() if not isinstance(v, torch._C.ScriptMethod) } self.__dict__["_initializing"] = False
def prepare_script(model, qconfig_dict, inplace=False): _check_is_script_module(model) if not inplace: model = model.copy() model = wrap_cpp_module( torch._C._jit_pass_insert_observers(model._c, 'forward', qconfig_dict, False)) return model
def prepare_dynamic_script(model, qconfig_dict): _check_is_script_module(model) scripted_qconfig_dict = {k: script_qconfig(v) for k, v in qconfig_dict.items()} model = wrap_cpp_module(torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, False, True)) return model
def convert_script(model, inplace=False): _check_is_script_module(model) if not inplace: model = model.copy() model = wrap_cpp_module( torch._C._jit_pass_insert_quant_dequant(model._c, 'forward', False)) if 'fbgemm' in torch.backends.quantized.supported_engines: torch._C._jit_pass_insert_prepack_unpack(model._c) return model
def _prepare_script(model, qconfig_dict, is_dynamic): _check_is_script_module(model) if any(map(lambda x: not isinstance(x, str), qconfig_dict.keys())): raise ValueError('qconfig_dict should contain names(str) as keys.') scripted_qconfig_dict = get_scripted_qconfig_dict(qconfig_dict) return wrap_cpp_module( torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, False, is_dynamic))
def prepare_script(model, qconfig_dict, inplace=False): _check_is_script_module(model) scripted_qconfig_dict = {k: script_qconfig(v) if v else None for k, v in qconfig_dict.items()} if not inplace: model = model.copy() model = wrap_cpp_module(torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, False)) return model
def jit_module_from_flatbuffer(f): try: import torch._C_flatbuffer as ff except ImportError: print("Please include //caffe2:_C_flatbuffer as dependency.") raise if isinstance(f, string_classes): if not os.path.exists(f): # type: ignore[type-var] raise ValueError("The provided filename {} does not exist".format( f)) # type: ignore[str-bytes-safe] if os.path.isdir(f): raise ValueError("The provided filename {} is a directory".format( f)) # type: ignore[str-bytes-safe] if isinstance(f, str) or isinstance(f, pathlib.Path): f = str(f) return wrap_cpp_module(ff._load_jit_module_from_file(f)) else: return wrap_cpp_module(ff._load_jit_module_from_bytes(f.read()))
def trace_(func, example_inputs, *args, **kwargs): # Disable mix precision. torch.jit.trace will check the traced output # against what is expected. Since mix precision will lead to # loss of accuracy, this will raise warning during torch.jit.trace orig_mixed_type = ipex.get_auto_mix_precision() ipex.enable_auto_mix_precision(None) jit_m = orig_trace(func, example_inputs, *args, **kwargs) if core.get_jit_opt() and hasattr(jit_m, '_c'): jit_m = wrap_cpp_module(torch._C._jit_pass_fold_convbn(jit_m._c)) ipex.enable_auto_mix_precision(orig_mixed_type) return jit_m
def remove_redundant_aliases(scripted_module: torch.nn.Module): """ Running torch.jit.trace on a model with DBR quantization introduces extra alias ops, because we use `torch.Tensor.as_subclass` and tracing through this results in an `aten::alias` function call in TorchScript. This pass removes these alias calls when it is safe to do so. """ module_c = scripted_module._c module_c = \ torch._C._jit_pass_dbr_quant_remove_redundant_aliases(module_c) # type: ignore[attr-defined] scripted_module = wrap_cpp_module(module_c) return scripted_module
def trace_(func, example_inputs, *args, **kwargs): # Disable mix precision. torch.jit.trace will check the traced output # against what is expected. Since mix precision will lead to # loss of accuracy, this will raise warning during torch.jit.trace mix_state = core.get_mix_bf16_fp32() core.disable_mix_bf16_fp32() jit_m = orig_trace(func, example_inputs, *args, **kwargs) if core.get_jit_opt() and hasattr(jit_m, '_c'): jit_m = wrap_cpp_module(torch._C._jit_pass_fold_convbn(jit_m._c)) if mix_state: core.enable_mix_bf16_fp32() return jit_m
def test_module_with_shared_type_instances(self): class Child(nn.Module): def __init__(self): super(Child, self).__init__() self.conv1 = nn.Conv2d(1, 1, 1) def forward(self, x): x = self.conv1(x) return x class Parent(nn.Module): def __init__(self): super(Parent, self).__init__() self.quant = torch.quantization.QuantStub() self.conv1 = nn.Conv2d(1, 1, 1) self.child = Child() self.child2 = Child() self.dequant = torch.quantization.DeQuantStub() def forward(self, x): x = self.quant(x) x = self.conv1(x) x = self.child(x) x = self.child2(x) x = self.dequant(x) return x def _static_quant(model): model.qconfig = torch.quantization.get_default_qconfig('qnnpack') torch.quantization.prepare(model, inplace=True) model(torch.rand(4, 1, 4, 4)) model = torch.quantization.convert(model, inplace=False) return model current_dtype = torch.get_default_dtype() torch.set_default_dtype(torch.float32) data = torch.randn(4, 1, 4, 4) m = Parent() m = _static_quant(m) m = torch.jit.script(m) m.eval() torch._C._jit_pass_inline(m.graph) m_frozen = wrap_cpp_module(torch._C._freeze_module(m._c)) # Earlier bug resulted in _packed_params set to false. FileCheck().check_not('_packed_params = False').run( m_frozen._c.dump_to_str(True, True, False)) m_res = m(data) # It used to segfault while running frozen module. m_frozen_res = m_frozen(data) self.assertEqual(m_res, m_frozen_res) torch.set_default_dtype(current_dtype)
def quantize_script(model, qconfig_dict, run_fn, run_args, inplace=False, debug=False): _check_is_script_module(model) if not model._c._has_method('forward'): raise ValueError('input script module does not have forward method') assert not inplace, "We don't support inplace right now" if not inplace: model = model.copy() torch._C._jit_pass_dedup_module_uses(model._c) model = wrap_cpp_module(torch._C._jit_pass_fold_convbn(model._c)) model = prepare_script(model, qconfig_dict, True) run_fn(model._c._get_method('forward'), *run_args) model = convert_script(model, True, debug) return model
def fuse_conv_bn_jit(model, inplace=False): r""" Fuse conv - bn module Works for eval model only. Args: model: TorchScript model from scripting or tracing """ model_c = model._c model_c = torch._C._jit_pass_fold_convbn(model_c) if inplace: model._reconstruct(model_c) else: model = wrap_cpp_module(model_c) return model
def _quantize_script(model, qconfig_dict, run_fn, run_args, is_dynamic, debug): _check_is_script_module(model) _check_forward_method(model) torch._C._jit_pass_dedup_module_uses(model._c) model = wrap_cpp_module(torch._C._jit_pass_fold_convbn(model._c)) if is_dynamic: model = prepare_dynamic_script(model, qconfig_dict) run_fn(model._c._get_method('forward'), *run_args) model = convert_dynamic_script(model, debug) else: model = prepare_script(model, qconfig_dict, True) run_fn(model._c._get_method('forward'), *run_args) model = convert_script(model, True, debug) return model
def _test_output(self, model, x): modelName = model.__class__.__name__ core.disable_jit() model = model.to('dpcpp').eval() x = x.to('dpcpp') with torch.no_grad(): result = model(x) script_model = torch.jit.script(model) script_model.eval() with torch.no_grad(): sresult = script_model(x) self.assertEqual(result, sresult) core.enable_jit() fused_model = torch.jit.script(model) # bn folding, removing it after solve some issue core.disable_auto_dnnl() fused_model = wrap_cpp_module( torch._C._jit_pass_fold_convbn(fused_model._c)) core.enable_auto_dnnl() # prepack convolution weight fused_model = wrap_cpp_module( core._jit_prepack_conv_weight(fused_model._c)) with torch.no_grad(): # conv relu fusion, conv sum fusion or conv sum relu fusion print(fused_model.graph_for(x)) fresult = fused_model(x) # print(result) # print(sresult) # print(fresult) self.assertEqual(result, fresult)
def fuse_conv_bn_jit(model, inplace=False): r""" Fuse conv - bn module Works for eval model only. Args: model: TorchScript model from scripting or tracing """ torch._C._log_api_usage_once("quantization_api.quantize_jit.fuse_conv_bn_jit") model_c = model._c model_c = torch._C._jit_pass_fold_convbn(model_c) if inplace: model._reconstruct(model_c) else: model = wrap_cpp_module(model_c) return model
def script_(obj, optimize=None, _frames_up=0, _rcb=None): torch.jit.script = orig_script jit_m = orig_script(obj, optimize=optimize, _frames_up=_frames_up + 1, _rcb=_rcb) torch.jit.script = script_ if core.get_jit_opt() and hasattr(jit_m, '_c'): # Disable mix precision in model fusion, since mixed precision cannot # bring any benefits for inference, but will lead to loss of accuracy orig_mixed_type = ipex.get_auto_mix_precision() ipex.enable_auto_mix_precision(None) jit_m = wrap_cpp_module(torch._C._jit_pass_fold_convbn(jit_m._c)) ipex.enable_auto_mix_precision(orig_mixed_type) return jit_m
def test_insert_quant_dequant_conv_dynamic(self): class M(torch.nn.Module): def __init__(self): super(M, self).__init__() self.conv = torch.nn.Conv2d(3, 5, 3).float() def forward(self, x): return self.conv(x) m = torch.jit.script(M()) m = prepare_dynamic_script(m, {'': default_dynamic_qconfig}) data = torch.randn(1, 3, 10, 10, dtype=torch.float) m(data) m = wrap_cpp_module( torch._C._jit_pass_insert_quant_dequant(m._c, "forward", False, True)) assert len(m._modules._c.items()) == 1, \ 'Expected to have single submodule of conv' m(data) quant_func = "aten::quantize_per_tensor" # quantizing activations FileCheck().check("aten::_choose_qparams_per_tensor") \ .check(quant_func) \ .check("prim::CallMethod[name=\"forward\"]") \ .check_not(quant_func) \ .check("return") \ .run(str(get_forward_graph(m._c))) # quantizing weight in forward function of conv module, no choose_qparams FileCheck().check_not("aten::_choose_qparams_per_tensor") \ .check(quant_func) \ .check("prim::CallMethod[name=\"_conv_forward\"]") \ .check_not(quant_func) \ .check("return") \ .run(str(get_forward_graph(m.conv._c))) # shouldn't have quant/dequant in _conv_foward function FileCheck().check_not(quant_func) \ .check("aten::conv2d") \ .check_not(quant_func) \ .check("return") \ .run(str(get_module_method(m, 'conv', '_conv_forward').graph))
def script_(obj, optimize=None, _frames_up=0, _rcb=None): torch.jit.script = orig_script jit_m = orig_script(obj, optimize=optimize, _frames_up=_frames_up+1, _rcb=_rcb) torch.jit.script = script_ mix_state = torch.bfloat16 if core.get_mix_bf16_fp32() else torch.int8 if core.get_mix_int8_fp32() else None # Disable mix precision in model fusion, since mixed precision cannot # bring any benefits for inference, but will lead to loss of accuracy core.disable_mix_bf16_fp32() core.disable_mix_int8_fp32() if core.get_jit_opt() and hasattr(jit_m, '_c'): jit_m = wrap_cpp_module(torch._C._jit_pass_fold_convbn(jit_m._c)) if mix_state == torch.bfloat16: core.enable_mix_bf16_fp32() elif mix_state == torch.int8: core.enable_mix_int8_fp32() return jit_m
def _prepare_jit(model, qconfig_dict, inplace=False, quant_type=QuantType.STATIC): _check_is_script_module(model) _check_forward_method(model) if not all(isinstance(x, str) for x in qconfig_dict.keys()): raise ValueError('qconfig_dict should only contain names(str) as keys.') scripted_qconfig_dict = script_qconfig_dict(qconfig_dict) model = fuse_conv_bn_jit(model, inplace) model_c = torch._C._jit_pass_insert_observers(model._c, 'forward', scripted_qconfig_dict, inplace, quant_type) if inplace: model._reconstruct(model_c) else: model = wrap_cpp_module(model_c) return model