def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = TwoLayerLinearModel().eval() qconfig_dict = { 'fc2': default_dynamic_qconfig } prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkDynamicQuantizedLinear(model.fc2) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API model = quantize_dynamic(TwoLayerLinearModel().eval(), qconfig_dict) checkQuantized(model) # Test set API model = quantize_dynamic(TwoLayerLinearModel().eval(), {'fc2'}) checkQuantized(model)
def test_nested2(self): r"""Another test case for quantized, we will quantize all submodules of submodule sub2 """ model = NestedModel().eval() qconfig_dict = { 'fc3': default_dynamic_qconfig, 'sub2': default_dynamic_qconfig } prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkDynamicQuantizedLinear(model.sub2.fc2) self.checkDynamicQuantizedLinear(model.fc3) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model) # Test set API model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2'}) checkQuantized(model)
def test_nested1(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized """ model = NestedModel().eval() qconfig_dict = { 'fc3': default_dynamic_qconfig, 'sub2.fc1': default_dynamic_qconfig } prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkLinear(model.sub1.fc) self.checkDynamicQuantizedLinear(model.fc3) self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkLinear(model.sub2.fc2) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model) model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2.fc1'}) checkQuantized(model)
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = NestedModel().eval() custum_options = { 'dtype': torch.quint8, 'qscheme': torch.per_tensor_affine } custom_dynamic_qconfig = QConfigDynamic(weight=default_weight_observer) qconfig_dynamic_dict = { 'fc3': default_dynamic_qconfig, 'sub2': default_dynamic_qconfig, 'sub2.fc1': custom_dynamic_qconfig } prepare_dynamic(model, qconfig_dynamic_dict) convert_dynamic(model) def checkQuantized(model): self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkDynamicQuantizedLinear(model.sub2.fc2) self.checkDynamicQuantizedLinear(model.fc3) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dynamic_dict) checkQuantized(model) # Test set API model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2', 'sub2.fc1'}) checkQuantized(model)
def quant(net_i, scheme, trainer, quant_params=None): """ Quantizes the network accoring to the different possibilities post, dynamic and both """ if scheme == "post": net_i.to("cpu") net_i.eval() net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) elif scheme == "dynamic": net_i.to("cpu") net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) elif scheme == "both": net_i.to("cpu") net_i.eval() net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) else: pass return net_i
def test_single_layer(self): r"""Dynamic Quantize SingleLayerLinearDynamicModel which has one Linear module, make sure it is swapped to nnqd.Linear which is the quantized version of the module """ model = SingleLayerLinearDynamicModel().eval() qconfig_dict = { '': default_dynamic_qconfig } prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkDynamicQuantizedLinear(model.fc1) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API - out of place version base = SingleLayerLinearDynamicModel() keys_before = set(list(base.state_dict().keys())) model = quantize_dynamic(base, qconfig_dict) checkQuantized(model) keys_after = set(list(base.state_dict().keys())) self.assertEqual(keys_before, keys_after) # simple check that nothing changed # in-place version model = SingleLayerLinearDynamicModel() quantize_dynamic(model, qconfig_dict, inplace=True) checkQuantized(model)
def quantize_model( model: Model, qconfig_spec: Dict = None, dtype: Union[str, Optional[torch.dtype]] = "qint8" ) -> Model: """Function to quantize model weights. Args: model: model to be quantized qconfig_spec (Dict, optional): quantization config in PyTorch format. Defaults to None. dtype (Union[str, Optional[torch.dtype]], optional): Type of weights after quantization. Defaults to "qint8". Returns: Model: quantized model """ nn_model = get_nn_from_ddp_module(model) if isinstance(dtype, str): type_mapping = {"qint8": torch.qint8, "quint8": torch.quint8} try: quantized_model = quantization.quantize_dynamic( nn_model.cpu(), qconfig_spec=qconfig_spec, dtype=type_mapping[dtype] ) except RuntimeError: torch.backends.quantized.engine = "qnnpack" quantized_model = quantization.quantize_dynamic( nn_model.cpu(), qconfig_spec=qconfig_spec, dtype=type_mapping[dtype] ) return quantized_model
def get_infer_model(model, opt): new_state_dict = get_state_dict(model.state_dict()) model = JitModel(opt) model.load_state_dict(new_state_dict) model.eval() if opt.quantized: # static quantization : Work in progress if opt.static: backend = "qnnpack" model.qconfig = torch.quantization.get_default_qconfig(backend) torch.backends.quantized.engine = backend model_quantized = torch.quantization.prepare(model, inplace=False) model_quantized = torch.quantization.convert(model_quantized, inplace=False) # support for dynamic quantization else: from torch.quantization import quantize_dynamic model_quantized = quantize_dynamic(model=model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8, inplace=False) # quantized model save/load https://pytorch.org/docs/stable/quantization.html model = torch.jit.script(model_quantized) model_scripted = torch.jit.script(model) model_scripted.save(opt.infer_model) return
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = NestedModel().eval() custum_options = { 'dtype': torch.quint8, 'qscheme': torch.per_tensor_affine } custom_qconfig = QConfig(weight=default_weight_observer(), activation=default_observer(**custum_options)) qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig, 'sub2.fc1': custom_qconfig } model = prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkDynamicQuantizedLinear(model.sub2.fc2) self.checkDynamicQuantizedLinear(model.fc3) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model)
def test_compare_model_outputs_lstm_dynamic(self): r"""Compare the output of LSTM layer in dynamic quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, input, hidden): act_compare_dict = compare_model_outputs(float_model, q_model, input, hidden) expected_act_compare_dict_keys = {"lstm.stats"} self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"][0].shape == v["quantized"][0].shape) lstm_input = torch.rand((1, 1, 2)) lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2)) model_list = [LSTMwithHiddenDynamicModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model, lstm_input, lstm_hidden)
def test_nested2(self): r"""Another test case for quantized, we will quantize all submodules of submodule sub2 """ model = NestedModel().eval() qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig } model = prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkDynamicQuantizedLinear(model.sub2.fc2) self.checkDynamicQuantizedLinear(model.fc3) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model)
def test_compare_model_stub_linear_dynamic(self): r"""Compare the output of dynamic quantized linear layer and its float shadow module """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) linear_data = self.calib_data[0][0] model_list = [SingleLayerLinearDynamicModel(qengine)] module_swap_list = [nn.Linear, nn.LSTM] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model, module_swap_list, linear_data)
def test_type_match_rule(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', All 'torch.nn.Linear' modules are quantized """ model = NestedModel().eval() qconfig_dict = { 'fc3': None, 'sub2.fc1': None, torch.nn.Linear: default_dynamic_qconfig } prepare_dynamic(model, qconfig_dict) test_only_eval_fn(model, self.calib_data) convert_dynamic(model) def checkQuantized(model): self.checkDynamicQuantizedLinear(model.sub1.fc) self.checkLinear(model.fc3) self.checkLinear(model.sub2.fc1) self.checkDynamicQuantizedLinear(model.sub2.fc2) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data, check_save_load=True) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model)
def test_compare_model_stub_linear_dynamic(self): r"""Compare the output of dynamic quantized linear layer and its float shadow module """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) img_data = [( torch.rand(3, 5, dtype=torch.float), torch.randint(0, 1, (2, ), dtype=torch.long), ) for _ in range(2)] linear_data = img_data[0][0] model_list = [SingleLayerLinearDynamicModel(qengine)] module_swap_list = [nn.Linear] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model, module_swap_list, linear_data)
def test_compare_model_outputs_linear_dynamic(self): r"""Compare the output of linear layer in dynamic quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, data): act_compare_dict = compare_model_outputs(float_model, q_model, data) expected_act_compare_dict_keys = {"fc1.stats"} self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) linear_data = self.calib_data[0][0] model_list = [SingleLayerLinearDynamicModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model, linear_data)
def test_compare_model_stub_lstm_dynamic(self): r"""Compare the output of dynamic quantized LSTM layer and its float shadow module """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, module_swap_list, input, hidden): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, input, hidden) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) lstm_input = torch.rand((1, 1, 2)) lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2)) model_list = [LSTMwithHiddenDynamicModel(qengine)] module_swap_list = [nn.Linear, nn.LSTM] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model, module_swap_list, lstm_input, lstm_hidden)
def quantize_model_from_checkpoint( logdir: Path, checkpoint_name: str, stage: str = None, qconfig_spec: Optional[Union[Set, Dict]] = None, dtype: Optional[torch.dtype] = torch.qint8, backend: str = None, ) -> Model: """ Quantize model using created experiment and runner. Args: logdir (Union[str, Path]): Path to Catalyst logdir with model checkpoint_name (str): Name of model checkpoint to use stage (str): experiment's stage name qconfig_spec: torch.quantization.quantize_dynamic parameter, you can define layers to be quantize dtype: type of the model parameters, default int8 backend: defines backend for quantization Returns: Quantized model """ if backend is not None: torch.backends.quantized.engine = backend config_path = logdir / "configs" / "_config.json" checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth" logging.info("Load config") config: Dict[str, dict] = load_config(config_path) # Get expdir name config_expdir = Path(config["args"]["expdir"]) # We will use copy of expdir from logs for reproducibility expdir = Path(logdir) / "code" / config_expdir.name logger.info("Import experiment and runner from logdir") experiment: ConfigExperiment = None experiment, _, _ = prepare_config_api_components(expdir=expdir, config=config) logger.info(f"Load model state from checkpoints/{checkpoint_name}.pth") if stage is None: stage = list(experiment.stages)[0] model = experiment.get_model(stage) checkpoint = load_checkpoint(checkpoint_path) unpack_checkpoint(checkpoint, model=model) logger.info("Quantization is running...") quantized_model = quantization.quantize_dynamic( model.cpu(), qconfig_spec=qconfig_spec, dtype=dtype, ) logger.info("Done") return quantized_model
def on_epoch_end(self, runner: IRunner): """ Performing model quantization on epoch end if condition metric is improved Args: runner: current runner """ if not self.do_once: if self.mode == "best": score = runner.valid_metrics[self.metric] if self.best_score is None: self.best_score = score if self.is_better(score, self.best_score) or self.first_time: self.best_score = score quantized_model = quantization.quantize_dynamic( runner.model.cpu(), qconfig_spec=self.qconfig_spec, dtype=self.dtype, ) save_quantized_model( model=quantized_model, logdir=runner.logdir, checkpoint_name=self.mode, out_model=self.out_model, out_dir=self.out_dir, ) self.first_time = False else: quantized_model = quantization.quantize_dynamic( runner.model.cpu(), qconfig_spec=self.qconfig_spec, dtype=self.dtype, ) save_quantized_model( model=quantized_model, logdir=runner.logdir, checkpoint_name=self.mode, out_model=self.out_model, out_dir=self.out_dir, )
def quantization(self): if self.quant_method == 'dynamic': torch.backends.quantized.engine = 'fbgemm' if self.config == 'x86' else 'qnnpack' quant_model = quant.quantize_dynamic( self.model, {nn.Linear, nn.Conv2d, nn.Conv1d}, dtype=torch.qint8) else: # Post-Training Static Quantization quant_model = copy.deepcopy(self.model) quant_model.eval() quant_model.fuse_model() quant_model.qconfig = self.qconfig quant.prepare(quant_model, inplace=True) self.calibrate_model(quant_model, self.calibration_loader) quant.convert(quant_model, inplace=True) self.print_model_size(quant_model, 'Quantized Model') return quant_model
def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = TwoLayerLinearModel().eval() qconfig_dict = {'fc2': default_qconfig} model = prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkDynamicQuantizedLinear(model.fc2) checkQuantized(model) # test one line API model = quantize_dynamic(TwoLayerLinearModel().eval(), qconfig_dict) checkQuantized(model)
def test_single_layer(self): r"""Dynamic Quantize SingleLayerLinearDynamicModel which has one Linear module, make sure it is swapped to nnqd.Linear which is the quantized version of the module """ model = SingleLayerLinearDynamicModel().eval() qconfig_dict = {'': default_dynamic_qconfig} model = prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkDynamicQuantizedLinear(model.fc1) checkQuantized(model) # test one line API model = quantize_dynamic(SingleLayerLinearDynamicModel().eval(), qconfig_dict) checkQuantized(model)
def on_stage_end(self, runner: "IRunner") -> None: """ On stage end action. Args: runner: runner of your experiment """ if self.do_once: quantized_model = quantization.quantize_dynamic( runner.model.cpu(), qconfig_spec=self.qconfig_spec, dtype=self.dtype, ) save_quantized_model( model=quantized_model, logdir=runner.logdir, checkpoint_name=self.mode, out_model=self.out_model, out_dir=self.out_dir, )
def test_compare_weights_lstm_dynamic(self): r"""Compare the weights of float and dynamic quantized LSTM layer """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model): weight_dict = compare_weights(float_model.state_dict(), q_model.state_dict()) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [LSTMwithHiddenDynamicModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize_dynamic(model) compare_and_validate_results(model, q_model)
def test_nested1(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized """ model = NestedModel().eval() qconfig_dict = {'fc3': default_qconfig, 'sub2.fc1': default_qconfig} model = prepare_dynamic(model, qconfig_dict) convert_dynamic(model) def checkQuantized(model): self.checkLinear(model.sub1.fc) self.checkDynamicQuantizedLinear(model.fc3) self.checkDynamicQuantizedLinear(model.sub2.fc1) self.checkLinear(model.sub2.fc2) checkQuantized(model) # test one line API model = quantize_dynamic(NestedModel().eval(), qconfig_dict) checkQuantized(model)
def buildInferenceModel(args, quantize=False): assert os.path.exists( args.weight), 'inference model should have pre-trained weight' device = torch.device(args.device) model = DETR(args).to(device) model.load_state_dict(torch.load(args.weight, map_location=device)) postProcess = PostProcess().to(device) wrapper = DETRWrapper(model, postProcess).to(device) wrapper.eval() if quantize: wrapper = quantize_dynamic(wrapper, {nn.Linear}) print('optimizing model for inference...') return torch.jit.trace( wrapper, (torch.rand(1, 3, args.targetHeight, args.targetWidth).to(device), torch.as_tensor([args.targetWidth, args.targetHeight ]).unsqueeze(0).to(device)))
def test_quantized_rnn(self): d_in, d_hid = 2, 2 model = LSTMDynamicModel().eval() cell = model.lstm # Replace parameter values s.t. the range of values is exactly # 255, thus we will have 0 quantization error in the quantized # GEMM call. This i s for testing purposes. # # Note that the current implementation does not support # accumulation values outside of the range representable by a # 16 bit integer, instead resulting in a saturated value. We # must take care that in our test we do not end up with a dot # product that overflows the int16 range, e.g. # (255*127+255*127) = 64770. So, we hardcode the test values # here and ensure a mix of signedness. vals = [[100, -155], [100, -155], [-155, 100], [-155, 100], [100, -155], [-155, 100], [-155, 100], [100, -155]] if isinstance(cell, torch.nn.LSTM): num_chunks = 4 vals = vals[:d_hid * num_chunks] cell.weight_ih_l0 = torch.nn.Parameter( torch.tensor(vals, dtype=torch.float), requires_grad=False) cell.weight_hh_l0 = torch.nn.Parameter( torch.tensor(vals, dtype=torch.float), requires_grad=False) ref = copy.deepcopy(cell) model_int8 = quantize_dynamic(model=model, dtype=torch.qint8) model_fp16 = quantize_dynamic(model=model, dtype=torch.float16) # Smoke test extra reprs self.assertTrue('DynamicQuantizedLSTM' in str(model_int8)) self.assertTrue('DynamicQuantizedLSTM' in str(model_fp16)) cell_int8 = model_int8.lstm cell_fp16 = model_fp16.lstm assert type(cell_int8) == torch.nn.quantized.dynamic.LSTM, \ 'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic' assert type(cell_fp16) == torch.nn.quantized.dynamic.LSTM, \ 'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic' niter = 10 x = torch.tensor([[100, -155], [-155, 100], [100, -155]], dtype=torch.float).unsqueeze(0).repeat(niter, 1, 1) h0_vals = [[-155, 100], [-155, 155], [100, -155]] hx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0) cx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0) if isinstance(ref, torch.nn.LSTM): hiddens = (hx, cx) ref_out, ref_hid = ref(x, hiddens) # Compare int8 quantized to unquantized output_int8, final_hiddens_int8 = cell_int8(x, hiddens) torch.testing.assert_allclose(output_int8, ref_out) self.assertEqual(output_int8, ref_out) for out_val, ref_val in zip(final_hiddens_int8, ref_hid): torch.testing.assert_allclose(out_val, ref_val) class ScriptWrapper(torch.nn.Module): def __init__(self, cell): super(ScriptWrapper, self).__init__() self.cell = cell def forward(self, x, hiddens): # type: (torch.Tensor, Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]] return self.cell(x, hiddens) # TODO: TorchScript overloads don't work without this wrapper cell_script = torch.jit.script(ScriptWrapper(cell_int8)) out_script, hid_script = cell_script(x, hiddens) self.assertEqual(len(out_script), len(ref_out)) for out_val, ref_val in zip(out_script, ref_out): torch.testing.assert_allclose(out_val, ref_val) # Test save/load b = io.BytesIO() torch.jit.save(cell_script, b) b.seek(0) loaded = torch.jit.load(b) out_loaded, hid_loaded = loaded(x, hiddens) for loaded_val, ref_val in zip(out_loaded, ref_out): torch.testing.assert_allclose(loaded_val, ref_val) # Compare fp16 quantized to unquantized output_fp16, final_hiddens_fp16 = cell_fp16(x, hiddens) torch.testing.assert_allclose(output_fp16, ref_out) self.assertEqual(output_fp16, ref_out) for out, ref in zip(final_hiddens_fp16, ref_hid): torch.testing.assert_allclose(out, ref)
def export_detection_model( cfg: NOD, model_checkpoint: Path, model_export_path: Path = Path("torch_model"), verbose: bool = True, onnx_export: bool = False, strict_jit: bool = False, ) -> None: """ :param verbose: :type verbose: :param cfg: :type cfg: :param model_checkpoint: :type model_checkpoint: :param model_export_path: :type model_export_path: :return: :rtype:""" model = SingleShotDetection(cfg) checkpointer = CheckPointer(model, save_dir=ensure_existence( PROJECT_APP_PATH.user_data / "results")) checkpointer.load(model_checkpoint, use_latest=model_checkpoint is None) print( f"Loaded weights from {model_checkpoint if model_checkpoint else checkpointer.get_checkpoint_file()}" ) model.post_init() model.to(global_torch_device()) transforms = SSDTransform(cfg.input.image_size, cfg.input.pixel_mean, split=SplitEnum.testing) model.eval() # Important! fuse_quantize_model = False if fuse_quantize_model: modules_to_fuse = [ ["conv", "bn", "relu"] ] # Names of modules to fuse, maybe supply directly for architecture class/declaration model = torch.quantization.fuse_modules( model, modules_to_fuse=modules_to_fuse, inplace=False) pre_quantize_model = False if pre_quantize_model: # Accuracy may drop! if True: model = quantization.quantize_dynamic(model, dtype=torch.qint8) else: pass # model = quantization.quantize(model) frame_g = frame_generator(cv2.VideoCapture(0)) for image in tqdm(frame_g): example_input = (transforms(image)[0].unsqueeze(0).to( global_torch_device()), ) try: if onnx_export: exp_path = model_export_path.with_suffix(".onnx") output = onnx.export( model, example_input, str(exp_path), verbose=verbose, # export_params=True, # store the trained parameter weights inside the model file # opset_version=10, # the onnx version to export the model to # do_constant_folding=True, # wether to execute constant folding for optimization # input_names=["input"], # the model's input names # output_names=["output"], # the model's output names # dynamic_axes={ # "input": {0: "batch_size"}, # variable lenght axes # "output": {0: "batch_size"}, # } ) sprint(f"Successfully exported ONNX model at {exp_path}", color="blue") else: raise Exception("Just trace instead, ignore exception") except Exception as e: sprint(f"Torch ONNX export does not work, {e}", color="red") try: traced_script_module = torch.jit.trace( model, example_input, # strict=strict_jit, check_inputs=( transforms(next(frame_g))[0].unsqueeze(0).to( global_torch_device()), transforms(next(frame_g))[0].unsqueeze(0).to( global_torch_device()), ), ) exp_path = model_export_path.with_suffix(".traced") traced_script_module.save(str(exp_path)) print( f"Traced Ops used {torch.jit.export_opnames(traced_script_module)}" ) sprint( f"Successfully exported JIT Traced model at {exp_path}", color="green", ) except Exception as e_i: sprint(f"Torch JIT Trace export does not work!, {e_i}", color="red") break """
def export_detection_model( cfg: NOD, model_ckpt: Path, model_export_path: Path = Path("torch_model"), verbose: bool = True, onnx_export: bool = False, strict_jit: bool = False, ) -> None: """ :param verbose: :type verbose: :param cfg: :type cfg: :param model_ckpt: :type model_ckpt: :param model_export_path: :type model_export_path: :return: :rtype: """ model = SingleShotDectectionNms(cfg) checkpointer = CheckPointer( model, save_dir=ensure_existence(PROJECT_APP_PATH.user_data / "results") ) checkpointer.load(model_ckpt, use_latest=model_ckpt is None) print( f"Loaded weights from {model_ckpt if model_ckpt else checkpointer.get_checkpoint_file()}" ) model.post_init() model.to(global_torch_device()) transforms = SSDTransform( cfg.input.image_size, cfg.input.pixel_mean, split=Split.Testing ) model.eval() pre_quantize_model = False if pre_quantize_model: # Accuracy may drop! if True: model = quantization.quantize_dynamic(model, dtype=torch.qint8) else: pass # model = quantization.quantize(model) frame_g = frame_generator(cv2.VideoCapture(0)) for image in tqdm(frame_g): example_input = (transforms(image)[0].unsqueeze(0).to(global_torch_device()),) try: traced_script_module = torch.jit.script( model, # example_input, ) exp_path = model_export_path.with_suffix(".compiled") traced_script_module.save(str(exp_path)) print(f"Traced Ops used {torch.jit.export_opnames(traced_script_module)}") sprint( f"Successfully exported JIT Traced model at {exp_path}", color="green" ) except Exception as e_i: sprint(f"Torch JIT Trace export does not work!, {e_i}", color="red") break
from sentence_transformers import SentenceTransformer from torch.nn import Embedding, Linear from torch.quantization import quantize_dynamic db_path = sys.argv[1] prefix = os.path.basename(db_path).split('.')[0] tfidf_path = os.path.join('models', prefix + 'tfidf.p') meta_path = os.path.join('models', prefix + 'tfidf_meta.p') model_path = os.path.join('models', prefix + 'model.p') max_train = 5000 max_features = 5000 max_recommendations = 50 cool_nlp_model = quantize_dynamic(SentenceTransformer('paraphrase-xlm-r-multilingual-v1', device='cpu'), {Linear, Embedding}) def generate_tfidf_pickles(): """Gets all the read articles and considers those articles flagged as 's' as 1 and rest as 0 and produces the embeddings """ sqldb = connect_db(db_path) records = query_db(sqldb, '''select feedurl, author, id, title, content, flags from rss_item where unread=0 order by pubDate DESC;''') content_list = [] outcome_list = [] id_list = [] title_list = [] for record in records: # We should not judge the book by it's cover content_list.append('||'+ record['feedurl'] + '|| \n ||' + record['author'] + '|| \n ||' + record['title'] + '|| \n' + record['content'])
def test_quantized_rnn(self): d_in, d_hid = 2, 2 model = LSTMDynamicModel().eval() cell = model.lstm # Replace parameter values s.t. the range of values is exactly # 255, thus we will have 0 quantization error in the quantized # GEMM call. This i s for testing purposes. # # Note that the current implementation does not support # accumulation values outside of the range representable by a # 16 bit integer, instead resulting in a saturated value. We # must take care that in our test we do not end up with a dot # product that overflows the int16 range, e.g. # (255*127+255*127) = 64770. So, we hardcode the test values # here and ensure a mix of signedness. vals = [[100, -155], [100, -155], [-155, 100], [-155, 100], [100, -155], [-155, 100], [-155, 100], [100, -155]] if isinstance(cell, torch.nn.LSTM): num_chunks = 4 vals = vals[:d_hid * num_chunks] cell.weight_ih_l0 = torch.nn.Parameter(torch.tensor(vals, dtype=torch.float), requires_grad=False) cell.weight_hh_l0 = torch.nn.Parameter(torch.tensor(vals, dtype=torch.float), requires_grad=False) ref = copy.deepcopy(cell) qconfig_dynamic_dict = { torch.nn.LSTM: default_dynamic_qconfig, } default_dynamic_module_mapping = { torch.nn.LSTM: torch.nn.quantized.dynamic.LSTM, } model_int8 = quantize_dynamic(model, qconfig_dynamic_dict, default_dynamic_module_mapping) cell_int8 = model_int8.lstm assert type(cell_int8) == torch.nn.quantized.dynamic.LSTM, \ 'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic' niter = 10 x = torch.tensor([[100, -155], [-155, 100], [100, -155]], dtype=torch.float).unsqueeze(0).repeat(niter, 1, 1) h0_vals = [[-155, 100], [-155, 155], [100, -155]] hx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0) cx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0) if isinstance(ref, torch.nn.LSTM): hiddens = (hx, cx) ref_out, ref_hid = ref(x, hiddens) # Compare int8 quantized to unquantized output_int8, final_hiddens_int8 = cell_int8(x, hiddens) torch.testing.assert_allclose(output_int8, ref_out) self.assertEqual(output_int8, ref_out) for out, ref in zip(final_hiddens_int8, ref_hid): torch.testing.assert_allclose(out, ref)