Beispiel #1
0
    def test_two_layers(self):
        r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one
        `fc2`, and `fc1`is not quantized
        """
        model = TwoLayerLinearModel().eval()
        qconfig_dict = {
            'fc2': default_dynamic_qconfig
        }
        prepare_dynamic(model, qconfig_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.assertEqual(type(model.fc1), torch.nn.Linear)
            self.checkDynamicQuantizedLinear(model.fc2)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(TwoLayerLinearModel().eval(), qconfig_dict)
        checkQuantized(model)

        # Test set API
        model = quantize_dynamic(TwoLayerLinearModel().eval(), {'fc2'})
        checkQuantized(model)
Beispiel #2
0
    def test_nested2(self):
        r"""Another test case for quantized, we will quantize all submodules
        of submodule sub2
        """
        model = NestedModel().eval()
        qconfig_dict = {
            'fc3': default_dynamic_qconfig,
            'sub2': default_dynamic_qconfig
        }
        prepare_dynamic(model, qconfig_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.checkLinear(model.sub1.fc)
            self.assertEqual(type(model.sub1.relu), torch.nn.ReLU)
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkDynamicQuantizedLinear(model.sub2.fc2)
            self.checkDynamicQuantizedLinear(model.fc3)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)

        # Test set API
        model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2'})
        checkQuantized(model)
Beispiel #3
0
    def test_nested1(self):
        r"""Test quantization for nested model, top level 'fc3' and
        'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized
        """
        model = NestedModel().eval()
        qconfig_dict = {
            'fc3': default_dynamic_qconfig,
            'sub2.fc1': default_dynamic_qconfig
        }

        prepare_dynamic(model, qconfig_dict)
        convert_dynamic(model)

        def checkQuantized(model):
            self.checkLinear(model.sub1.fc)
            self.checkDynamicQuantizedLinear(model.fc3)
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkLinear(model.sub2.fc2)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)

        model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2.fc1'})
        checkQuantized(model)
Beispiel #4
0
    def test_nested3(self):
        r"""More complicated nested test case with child qconfig overrides
        parent qconfig
        """
        model = NestedModel().eval()
        custum_options = {
            'dtype': torch.quint8,
            'qscheme': torch.per_tensor_affine
        }
        custom_dynamic_qconfig = QConfigDynamic(weight=default_weight_observer)
        qconfig_dynamic_dict = {
            'fc3': default_dynamic_qconfig,
            'sub2': default_dynamic_qconfig,
            'sub2.fc1': custom_dynamic_qconfig
        }
        prepare_dynamic(model, qconfig_dynamic_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkDynamicQuantizedLinear(model.sub2.fc2)
            self.checkDynamicQuantizedLinear(model.fc3)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dynamic_dict)
        checkQuantized(model)

        # Test set API
        model = quantize_dynamic(NestedModel().eval(), {'fc3', 'sub2', 'sub2.fc1'})
        checkQuantized(model)
Beispiel #5
0
def quant(net_i, scheme, trainer, quant_params=None):
    """
    Quantizes the network accoring to the different
    possibilities post, dynamic and both
    """
    if scheme == "post":
        net_i.to("cpu")
        net_i.eval()
        net_i.qconfig = get_default_qconfig("fbgemm")
        net_i.fuse_model()
        prepare(net_i, inplace=True)
        _, net_i = trainer.evaluate(net_i, quant_mode=True)
        convert(net_i, inplace=True)
    elif scheme == "dynamic":
        net_i.to("cpu")
        net_i = quantize_dynamic(net_i, quant_params, dtype=qint8)
    elif scheme == "both":
        net_i.to("cpu")
        net_i.eval()
        net_i = quantize_dynamic(net_i, quant_params, dtype=qint8)
        net_i.qconfig = get_default_qconfig("fbgemm")
        net_i.fuse_model()
        prepare(net_i, inplace=True)
        _, net_i = trainer.evaluate(net_i, quant_mode=True)
        convert(net_i, inplace=True)
    else:
        pass
    return net_i
    def test_single_layer(self):
        r"""Dynamic Quantize SingleLayerLinearDynamicModel which has one Linear module,
        make sure it is swapped to nnqd.Linear which is the quantized version of
        the module
        """
        model = SingleLayerLinearDynamicModel().eval()
        qconfig_dict = {
            '': default_dynamic_qconfig
        }
        prepare_dynamic(model, qconfig_dict)
        convert_dynamic(model)

        def checkQuantized(model):
            self.checkDynamicQuantizedLinear(model.fc1)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API - out of place version
        base = SingleLayerLinearDynamicModel()
        keys_before = set(list(base.state_dict().keys()))
        model = quantize_dynamic(base, qconfig_dict)
        checkQuantized(model)
        keys_after = set(list(base.state_dict().keys()))
        self.assertEqual(keys_before, keys_after)  # simple check that nothing changed

        # in-place version
        model = SingleLayerLinearDynamicModel()
        quantize_dynamic(model, qconfig_dict, inplace=True)
        checkQuantized(model)
Beispiel #7
0
def quantize_model(
    model: Model, qconfig_spec: Dict = None, dtype: Union[str, Optional[torch.dtype]] = "qint8"
) -> Model:
    """Function to quantize model weights.

    Args:
        model: model to be quantized
        qconfig_spec (Dict, optional): quantization config in PyTorch format. Defaults to None.
        dtype (Union[str, Optional[torch.dtype]], optional): Type of weights after quantization.
            Defaults to "qint8".

    Returns:
        Model: quantized model
    """
    nn_model = get_nn_from_ddp_module(model)
    if isinstance(dtype, str):
        type_mapping = {"qint8": torch.qint8, "quint8": torch.quint8}
    try:
        quantized_model = quantization.quantize_dynamic(
            nn_model.cpu(), qconfig_spec=qconfig_spec, dtype=type_mapping[dtype]
        )
    except RuntimeError:
        torch.backends.quantized.engine = "qnnpack"
        quantized_model = quantization.quantize_dynamic(
            nn_model.cpu(), qconfig_spec=qconfig_spec, dtype=type_mapping[dtype]
        )

    return quantized_model
Beispiel #8
0
def get_infer_model(model, opt):
    new_state_dict = get_state_dict(model.state_dict())
    model = JitModel(opt)
    model.load_state_dict(new_state_dict)
    model.eval()
    if opt.quantized:
        # static quantization : Work in progress
        if opt.static:
            backend = "qnnpack"
            model.qconfig = torch.quantization.get_default_qconfig(backend)
            torch.backends.quantized.engine = backend
            model_quantized = torch.quantization.prepare(model, inplace=False)
            model_quantized = torch.quantization.convert(model_quantized,
                                                         inplace=False)
        # support for dynamic quantization
        else:
            from torch.quantization import quantize_dynamic
            model_quantized = quantize_dynamic(model=model,
                                               qconfig_spec={torch.nn.Linear},
                                               dtype=torch.qint8,
                                               inplace=False)
        # quantized model save/load https://pytorch.org/docs/stable/quantization.html
        model = torch.jit.script(model_quantized)

    model_scripted = torch.jit.script(model)
    model_scripted.save(opt.infer_model)
    return
Beispiel #9
0
    def test_nested3(self):
        r"""More complicated nested test case with child qconfig overrides
        parent qconfig
        """
        model = NestedModel().eval()
        custum_options = {
            'dtype': torch.quint8,
            'qscheme': torch.per_tensor_affine
        }
        custom_qconfig = QConfig(weight=default_weight_observer(),
                                 activation=default_observer(**custum_options))
        qconfig_dict = {
            'fc3': default_qconfig,
            'sub2': default_qconfig,
            'sub2.fc1': custom_qconfig
        }
        model = prepare_dynamic(model, qconfig_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkDynamicQuantizedLinear(model.sub2.fc2)
            self.checkDynamicQuantizedLinear(model.fc3)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)
    def test_compare_model_outputs_lstm_dynamic(self):
        r"""Compare the output of LSTM layer in dynamic quantized model and corresponding
        output of conv layer in float model
        """
        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model, input, hidden):
            act_compare_dict = compare_model_outputs(float_model, q_model,
                                                     input, hidden)
            expected_act_compare_dict_keys = {"lstm.stats"}

            self.assertTrue(
                act_compare_dict.keys() == expected_act_compare_dict_keys)
            for k, v in act_compare_dict.items():
                self.assertTrue(v["float"][0].shape == v["quantized"][0].shape)

        lstm_input = torch.rand((1, 1, 2))
        lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2))

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, lstm_input,
                                         lstm_hidden)
Beispiel #11
0
    def test_nested2(self):
        r"""Another test case for quantized, we will quantize all submodules
        of submodule sub2
        """
        model = NestedModel().eval()
        qconfig_dict = {
            'fc3': default_qconfig,
            'sub2': default_qconfig
        }
        model = prepare_dynamic(model, qconfig_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.checkLinear(model.sub1.fc)
            self.assertEqual(type(model.sub1.relu), torch.nn.ReLU)
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkDynamicQuantizedLinear(model.sub2.fc2)
            self.checkDynamicQuantizedLinear(model.fc3)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)
    def test_compare_model_stub_linear_dynamic(self):
        r"""Compare the output of dynamic quantized linear layer and its float shadow module
        """

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model,
                                         module_swap_list, data):
            ob_dict = compare_model_stub(float_model, q_model,
                                         module_swap_list, data)
            self.assertEqual(len(ob_dict), 1)
            for k, v in ob_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        linear_data = self.calib_data[0][0]

        model_list = [SingleLayerLinearDynamicModel(qengine)]
        module_swap_list = [nn.Linear, nn.LSTM]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, module_swap_list,
                                         linear_data)
    def test_type_match_rule(self):
        r"""Test quantization for nested model, top level 'fc3' and
        'fc1' of submodule 'sub2', All 'torch.nn.Linear' modules are quantized
        """
        model = NestedModel().eval()
        qconfig_dict = {
            'fc3': None,
            'sub2.fc1': None,
            torch.nn.Linear: default_dynamic_qconfig
        }

        prepare_dynamic(model, qconfig_dict)
        test_only_eval_fn(model, self.calib_data)
        convert_dynamic(model)

        def checkQuantized(model):
            self.checkDynamicQuantizedLinear(model.sub1.fc)
            self.checkLinear(model.fc3)
            self.checkLinear(model.sub2.fc1)
            self.checkDynamicQuantizedLinear(model.sub2.fc2)
            test_only_eval_fn(model, self.calib_data)
            self.checkScriptable(model, self.calib_data, check_save_load=True)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)
Beispiel #14
0
    def test_compare_model_stub_linear_dynamic(self):
        r"""Compare the output of dynamic quantized linear layer and its float shadow module
        """

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model,
                                         module_swap_list, data):
            ob_dict = compare_model_stub(float_model, q_model,
                                         module_swap_list, data, ShadowLogger)
            self.assertEqual(len(ob_dict), 1)
            for k, v in ob_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        img_data = [(
            torch.rand(3, 5, dtype=torch.float),
            torch.randint(0, 1, (2, ), dtype=torch.long),
        ) for _ in range(2)]
        linear_data = img_data[0][0]
        model_list = [SingleLayerLinearDynamicModel(qengine)]
        module_swap_list = [nn.Linear]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, module_swap_list,
                                         linear_data)
    def test_compare_model_outputs_linear_dynamic(self):
        r"""Compare the output of linear layer in dynamic quantized model and corresponding
        output of conv layer in float model
        """
        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model, data):
            act_compare_dict = compare_model_outputs(float_model, q_model,
                                                     data)
            expected_act_compare_dict_keys = {"fc1.stats"}

            self.assertTrue(
                act_compare_dict.keys() == expected_act_compare_dict_keys)
            for k, v in act_compare_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        linear_data = self.calib_data[0][0]

        model_list = [SingleLayerLinearDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, linear_data)
    def test_compare_model_stub_lstm_dynamic(self):
        r"""Compare the output of dynamic quantized LSTM layer and its float shadow module
        """

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model,
                                         module_swap_list, input, hidden):
            ob_dict = compare_model_stub(float_model, q_model,
                                         module_swap_list, input, hidden)
            self.assertEqual(len(ob_dict), 1)
            for k, v in ob_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        lstm_input = torch.rand((1, 1, 2))
        lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2))

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        module_swap_list = [nn.Linear, nn.LSTM]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, module_swap_list,
                                         lstm_input, lstm_hidden)
Beispiel #17
0
def quantize_model_from_checkpoint(
    logdir: Path,
    checkpoint_name: str,
    stage: str = None,
    qconfig_spec: Optional[Union[Set, Dict]] = None,
    dtype: Optional[torch.dtype] = torch.qint8,
    backend: str = None,
) -> Model:
    """
    Quantize model using created experiment and runner.

    Args:
        logdir (Union[str, Path]): Path to Catalyst logdir with model
        checkpoint_name (str): Name of model checkpoint to use
        stage (str): experiment's stage name
        qconfig_spec: torch.quantization.quantize_dynamic
                parameter, you can define layers to be quantize
        dtype: type of the model parameters, default int8
        backend: defines backend for quantization

    Returns:
        Quantized model
    """
    if backend is not None:
        torch.backends.quantized.engine = backend

    config_path = logdir / "configs" / "_config.json"
    checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
    logging.info("Load config")
    config: Dict[str, dict] = load_config(config_path)

    # Get expdir name
    config_expdir = Path(config["args"]["expdir"])
    # We will use copy of expdir from logs for reproducibility
    expdir = Path(logdir) / "code" / config_expdir.name

    logger.info("Import experiment and runner from logdir")
    experiment: ConfigExperiment = None
    experiment, _, _ = prepare_config_api_components(expdir=expdir,
                                                     config=config)

    logger.info(f"Load model state from checkpoints/{checkpoint_name}.pth")
    if stage is None:
        stage = list(experiment.stages)[0]

    model = experiment.get_model(stage)
    checkpoint = load_checkpoint(checkpoint_path)
    unpack_checkpoint(checkpoint, model=model)

    logger.info("Quantization is running...")
    quantized_model = quantization.quantize_dynamic(
        model.cpu(),
        qconfig_spec=qconfig_spec,
        dtype=dtype,
    )

    logger.info("Done")
    return quantized_model
Beispiel #18
0
    def on_epoch_end(self, runner: IRunner):
        """
        Performing model quantization on epoch end if condition metric is
        improved

        Args:
            runner: current runner
        """
        if not self.do_once:
            if self.mode == "best":
                score = runner.valid_metrics[self.metric]

                if self.best_score is None:
                    self.best_score = score

                if self.is_better(score, self.best_score) or self.first_time:
                    self.best_score = score
                    quantized_model = quantization.quantize_dynamic(
                        runner.model.cpu(),
                        qconfig_spec=self.qconfig_spec,
                        dtype=self.dtype,
                    )
                    save_quantized_model(
                        model=quantized_model,
                        logdir=runner.logdir,
                        checkpoint_name=self.mode,
                        out_model=self.out_model,
                        out_dir=self.out_dir,
                    )
                    self.first_time = False
            else:
                quantized_model = quantization.quantize_dynamic(
                    runner.model.cpu(),
                    qconfig_spec=self.qconfig_spec,
                    dtype=self.dtype,
                )
                save_quantized_model(
                    model=quantized_model,
                    logdir=runner.logdir,
                    checkpoint_name=self.mode,
                    out_model=self.out_model,
                    out_dir=self.out_dir,
                )
 def quantization(self):
     if self.quant_method == 'dynamic':
         torch.backends.quantized.engine = 'fbgemm' if self.config == 'x86' else 'qnnpack'
         quant_model = quant.quantize_dynamic(
             self.model, {nn.Linear, nn.Conv2d, nn.Conv1d},
             dtype=torch.qint8)
     else:
         # Post-Training Static Quantization
         quant_model = copy.deepcopy(self.model)
         quant_model.eval()
         quant_model.fuse_model()
         quant_model.qconfig = self.qconfig
         quant.prepare(quant_model, inplace=True)
         self.calibrate_model(quant_model, self.calibration_loader)
         quant.convert(quant_model, inplace=True)
     self.print_model_size(quant_model, 'Quantized Model')
     return quant_model
Beispiel #20
0
    def test_two_layers(self):
        r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one
        `fc2`, and `fc1`is not quantized
        """
        model = TwoLayerLinearModel().eval()
        qconfig_dict = {'fc2': default_qconfig}
        model = prepare_dynamic(model, qconfig_dict)

        convert_dynamic(model)

        def checkQuantized(model):
            self.assertEqual(type(model.fc1), torch.nn.Linear)
            self.checkDynamicQuantizedLinear(model.fc2)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(TwoLayerLinearModel().eval(), qconfig_dict)
        checkQuantized(model)
Beispiel #21
0
    def test_single_layer(self):
        r"""Dynamic Quantize SingleLayerLinearDynamicModel which has one Linear module,
        make sure it is swapped to nnqd.Linear which is the quantized version of
        the module
        """
        model = SingleLayerLinearDynamicModel().eval()
        qconfig_dict = {'': default_dynamic_qconfig}
        model = prepare_dynamic(model, qconfig_dict)
        convert_dynamic(model)

        def checkQuantized(model):
            self.checkDynamicQuantizedLinear(model.fc1)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(SingleLayerLinearDynamicModel().eval(),
                                 qconfig_dict)
        checkQuantized(model)
Beispiel #22
0
    def on_stage_end(self, runner: "IRunner") -> None:
        """
        On stage end action.

        Args:
            runner: runner of your experiment
        """
        if self.do_once:
            quantized_model = quantization.quantize_dynamic(
                runner.model.cpu(),
                qconfig_spec=self.qconfig_spec,
                dtype=self.dtype,
            )
            save_quantized_model(
                model=quantized_model,
                logdir=runner.logdir,
                checkpoint_name=self.mode,
                out_model=self.out_model,
                out_dir=self.out_dir,
            )
    def test_compare_weights_lstm_dynamic(self):
        r"""Compare the weights of float and dynamic quantized LSTM layer
        """

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model):
            weight_dict = compare_weights(float_model.state_dict(),
                                          q_model.state_dict())
            self.assertEqual(len(weight_dict), 1)
            for k, v in weight_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model)
Beispiel #24
0
    def test_nested1(self):
        r"""Test quantization for nested model, top level 'fc3' and
        'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized
        """
        model = NestedModel().eval()
        qconfig_dict = {'fc3': default_qconfig, 'sub2.fc1': default_qconfig}

        model = prepare_dynamic(model, qconfig_dict)
        convert_dynamic(model)

        def checkQuantized(model):
            self.checkLinear(model.sub1.fc)
            self.checkDynamicQuantizedLinear(model.fc3)
            self.checkDynamicQuantizedLinear(model.sub2.fc1)
            self.checkLinear(model.sub2.fc2)

        checkQuantized(model)

        # test one line API
        model = quantize_dynamic(NestedModel().eval(), qconfig_dict)
        checkQuantized(model)
Beispiel #25
0
def buildInferenceModel(args, quantize=False):
    assert os.path.exists(
        args.weight), 'inference model should have pre-trained weight'
    device = torch.device(args.device)

    model = DETR(args).to(device)
    model.load_state_dict(torch.load(args.weight, map_location=device))

    postProcess = PostProcess().to(device)

    wrapper = DETRWrapper(model, postProcess).to(device)
    wrapper.eval()

    if quantize:
        wrapper = quantize_dynamic(wrapper, {nn.Linear})

    print('optimizing model for inference...')
    return torch.jit.trace(
        wrapper,
        (torch.rand(1, 3, args.targetHeight, args.targetWidth).to(device),
         torch.as_tensor([args.targetWidth, args.targetHeight
                          ]).unsqueeze(0).to(device)))
    def test_quantized_rnn(self):
        d_in, d_hid = 2, 2
        model = LSTMDynamicModel().eval()
        cell = model.lstm

        # Replace parameter values s.t. the range of values is exactly
        # 255, thus we will have 0 quantization error in the quantized
        # GEMM call. This i s for testing purposes.
        #
        # Note that the current implementation does not support
        # accumulation values outside of the range representable by a
        # 16 bit integer, instead resulting in a saturated value. We
        # must take care that in our test we do not end up with a dot
        # product that overflows the int16 range, e.g.
        # (255*127+255*127) = 64770. So, we hardcode the test values
        # here and ensure a mix of signedness.
        vals = [[100, -155],
                [100, -155],
                [-155, 100],
                [-155, 100],
                [100, -155],
                [-155, 100],
                [-155, 100],
                [100, -155]]
        if isinstance(cell, torch.nn.LSTM):
            num_chunks = 4
        vals = vals[:d_hid * num_chunks]
        cell.weight_ih_l0 = torch.nn.Parameter(
            torch.tensor(vals, dtype=torch.float),
            requires_grad=False)
        cell.weight_hh_l0 = torch.nn.Parameter(
            torch.tensor(vals, dtype=torch.float),
            requires_grad=False)

        ref = copy.deepcopy(cell)

        model_int8 = quantize_dynamic(model=model, dtype=torch.qint8)
        model_fp16 = quantize_dynamic(model=model, dtype=torch.float16)

        # Smoke test extra reprs
        self.assertTrue('DynamicQuantizedLSTM' in str(model_int8))
        self.assertTrue('DynamicQuantizedLSTM' in str(model_fp16))
        cell_int8 = model_int8.lstm
        cell_fp16 = model_fp16.lstm

        assert type(cell_int8) == torch.nn.quantized.dynamic.LSTM, \
            'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic'
        assert type(cell_fp16) == torch.nn.quantized.dynamic.LSTM, \
            'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic'

        niter = 10
        x = torch.tensor([[100, -155],
                          [-155, 100],
                          [100, -155]], dtype=torch.float).unsqueeze(0).repeat(niter, 1, 1)

        h0_vals = [[-155, 100],
                   [-155, 155],
                   [100, -155]]

        hx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0)
        cx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0)

        if isinstance(ref, torch.nn.LSTM):
            hiddens = (hx, cx)

        ref_out, ref_hid = ref(x, hiddens)

        # Compare int8 quantized to unquantized
        output_int8, final_hiddens_int8 = cell_int8(x, hiddens)

        torch.testing.assert_allclose(output_int8, ref_out)
        self.assertEqual(output_int8, ref_out)
        for out_val, ref_val in zip(final_hiddens_int8, ref_hid):
            torch.testing.assert_allclose(out_val, ref_val)

        class ScriptWrapper(torch.nn.Module):
            def __init__(self, cell):
                super(ScriptWrapper, self).__init__()
                self.cell = cell

            def forward(self, x, hiddens):
                # type: (torch.Tensor, Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
                return self.cell(x, hiddens)

        # TODO: TorchScript overloads don't work without this wrapper
        cell_script = torch.jit.script(ScriptWrapper(cell_int8))
        out_script, hid_script = cell_script(x, hiddens)
        self.assertEqual(len(out_script), len(ref_out))
        for out_val, ref_val in zip(out_script, ref_out):
            torch.testing.assert_allclose(out_val, ref_val)

        # Test save/load
        b = io.BytesIO()
        torch.jit.save(cell_script, b)
        b.seek(0)
        loaded = torch.jit.load(b)
        out_loaded, hid_loaded = loaded(x, hiddens)
        for loaded_val, ref_val in zip(out_loaded, ref_out):
            torch.testing.assert_allclose(loaded_val, ref_val)

        # Compare fp16 quantized to unquantized
        output_fp16, final_hiddens_fp16 = cell_fp16(x, hiddens)

        torch.testing.assert_allclose(output_fp16, ref_out)
        self.assertEqual(output_fp16, ref_out)
        for out, ref in zip(final_hiddens_fp16, ref_hid):
            torch.testing.assert_allclose(out, ref)
Beispiel #27
0
def export_detection_model(
    cfg: NOD,
    model_checkpoint: Path,
    model_export_path: Path = Path("torch_model"),
    verbose: bool = True,
    onnx_export: bool = False,
    strict_jit: bool = False,
) -> None:
    """

    :param verbose:
    :type verbose:
    :param cfg:
    :type cfg:
    :param model_checkpoint:
    :type model_checkpoint:
    :param model_export_path:
    :type model_export_path:
    :return:
    :rtype:"""
    model = SingleShotDetection(cfg)

    checkpointer = CheckPointer(model,
                                save_dir=ensure_existence(
                                    PROJECT_APP_PATH.user_data / "results"))
    checkpointer.load(model_checkpoint, use_latest=model_checkpoint is None)
    print(
        f"Loaded weights from {model_checkpoint if model_checkpoint else checkpointer.get_checkpoint_file()}"
    )

    model.post_init()
    model.to(global_torch_device())

    transforms = SSDTransform(cfg.input.image_size,
                              cfg.input.pixel_mean,
                              split=SplitEnum.testing)
    model.eval()  # Important!

    fuse_quantize_model = False
    if fuse_quantize_model:
        modules_to_fuse = [
            ["conv", "bn", "relu"]
        ]  # Names of modules to fuse, maybe supply directly for architecture class/declaration
        model = torch.quantization.fuse_modules(
            model, modules_to_fuse=modules_to_fuse, inplace=False)

    pre_quantize_model = False
    if pre_quantize_model:  # Accuracy may drop!
        if True:
            model = quantization.quantize_dynamic(model, dtype=torch.qint8)
        else:
            pass
            # model = quantization.quantize(model)

    frame_g = frame_generator(cv2.VideoCapture(0))
    for image in tqdm(frame_g):
        example_input = (transforms(image)[0].unsqueeze(0).to(
            global_torch_device()), )
        try:
            if onnx_export:
                exp_path = model_export_path.with_suffix(".onnx")
                output = onnx.export(
                    model,
                    example_input,
                    str(exp_path),
                    verbose=verbose,
                    # export_params=True,  # store the trained parameter weights inside the model file
                    # opset_version=10,  # the onnx version to export the model to
                    # do_constant_folding=True,  # wether to execute constant folding for optimization
                    # input_names=["input"],  # the model's input names
                    # output_names=["output"],  # the model's output names
                    # dynamic_axes={
                    #  "input": {0: "batch_size"},  # variable lenght axes
                    #  "output": {0: "batch_size"},
                    #  }
                )
                sprint(f"Successfully exported ONNX model at {exp_path}",
                       color="blue")
            else:
                raise Exception("Just trace instead, ignore exception")
        except Exception as e:
            sprint(f"Torch ONNX export does not work, {e}", color="red")
            try:
                traced_script_module = torch.jit.trace(
                    model,
                    example_input,
                    # strict=strict_jit,
                    check_inputs=(
                        transforms(next(frame_g))[0].unsqueeze(0).to(
                            global_torch_device()),
                        transforms(next(frame_g))[0].unsqueeze(0).to(
                            global_torch_device()),
                    ),
                )
                exp_path = model_export_path.with_suffix(".traced")
                traced_script_module.save(str(exp_path))
                print(
                    f"Traced Ops used {torch.jit.export_opnames(traced_script_module)}"
                )
                sprint(
                    f"Successfully exported JIT Traced model at {exp_path}",
                    color="green",
                )
            except Exception as e_i:
                sprint(f"Torch JIT Trace export does not work!, {e_i}",
                       color="red")

        break
    """
def export_detection_model(
    cfg: NOD,
    model_ckpt: Path,
    model_export_path: Path = Path("torch_model"),
    verbose: bool = True,
    onnx_export: bool = False,
    strict_jit: bool = False,
    ) -> None:
  """

:param verbose:
:type verbose:
:param cfg:
:type cfg:
:param model_ckpt:
:type model_ckpt:
:param model_export_path:
:type model_export_path:
:return:
:rtype:
"""
  model = SingleShotDectectionNms(cfg)

  checkpointer = CheckPointer(
      model, save_dir=ensure_existence(PROJECT_APP_PATH.user_data / "results")
      )
  checkpointer.load(model_ckpt, use_latest=model_ckpt is None)
  print(
      f"Loaded weights from {model_ckpt if model_ckpt else checkpointer.get_checkpoint_file()}"
      )

  model.post_init()
  model.to(global_torch_device())

  transforms = SSDTransform(
      cfg.input.image_size, cfg.input.pixel_mean, split=Split.Testing
      )
  model.eval()

  pre_quantize_model = False
  if pre_quantize_model:  # Accuracy may drop!
    if True:
      model = quantization.quantize_dynamic(model, dtype=torch.qint8)
    else:
      pass
      # model = quantization.quantize(model)

  frame_g = frame_generator(cv2.VideoCapture(0))
  for image in tqdm(frame_g):
    example_input = (transforms(image)[0].unsqueeze(0).to(global_torch_device()),)
    try:
      traced_script_module = torch.jit.script(
          model,
          # example_input,
          )
      exp_path = model_export_path.with_suffix(".compiled")
      traced_script_module.save(str(exp_path))
      print(f"Traced Ops used {torch.jit.export_opnames(traced_script_module)}")
      sprint(
          f"Successfully exported JIT Traced model at {exp_path}", color="green"
          )
    except Exception as e_i:
      sprint(f"Torch JIT Trace export does not work!, {e_i}", color="red")

    break
from sentence_transformers import SentenceTransformer
from torch.nn import Embedding, Linear
from torch.quantization import quantize_dynamic


db_path = sys.argv[1]
prefix = os.path.basename(db_path).split('.')[0]
tfidf_path = os.path.join('models', prefix + 'tfidf.p')
meta_path = os.path.join('models', prefix + 'tfidf_meta.p')
model_path = os.path.join('models', prefix + 'model.p')

max_train = 5000
max_features = 5000
max_recommendations = 50
cool_nlp_model = quantize_dynamic(SentenceTransformer('paraphrase-xlm-r-multilingual-v1', device='cpu'), {Linear, Embedding})


def generate_tfidf_pickles():
    """Gets all the read articles and considers those articles flagged as 's' as 1 and rest as 0 
    and produces the embeddings
    """
    sqldb = connect_db(db_path)
    records = query_db(sqldb, '''select feedurl, author, id, title, content, flags from rss_item where unread=0 order by pubDate DESC;''')
    content_list = []
    outcome_list = []
    id_list = []
    title_list = []
    for record in records:
        # We should not judge the book by it's cover
        content_list.append('||'+ record['feedurl'] + '|| \n ||' + record['author'] + '|| \n ||' + record['title'] + '|| \n' + record['content'])
Beispiel #30
0
    def test_quantized_rnn(self):
        d_in, d_hid = 2, 2
        model = LSTMDynamicModel().eval()
        cell = model.lstm

        # Replace parameter values s.t. the range of values is exactly
        # 255, thus we will have 0 quantization error in the quantized
        # GEMM call. This i s for testing purposes.
        #
        # Note that the current implementation does not support
        # accumulation values outside of the range representable by a
        # 16 bit integer, instead resulting in a saturated value. We
        # must take care that in our test we do not end up with a dot
        # product that overflows the int16 range, e.g.
        # (255*127+255*127) = 64770. So, we hardcode the test values
        # here and ensure a mix of signedness.
        vals = [[100, -155], [100, -155], [-155, 100], [-155, 100],
                [100, -155], [-155, 100], [-155, 100], [100, -155]]
        if isinstance(cell, torch.nn.LSTM):
            num_chunks = 4
        vals = vals[:d_hid * num_chunks]
        cell.weight_ih_l0 = torch.nn.Parameter(torch.tensor(vals,
                                                            dtype=torch.float),
                                               requires_grad=False)
        cell.weight_hh_l0 = torch.nn.Parameter(torch.tensor(vals,
                                                            dtype=torch.float),
                                               requires_grad=False)

        ref = copy.deepcopy(cell)

        qconfig_dynamic_dict = {
            torch.nn.LSTM: default_dynamic_qconfig,
        }
        default_dynamic_module_mapping = {
            torch.nn.LSTM: torch.nn.quantized.dynamic.LSTM,
        }
        model_int8 = quantize_dynamic(model, qconfig_dynamic_dict,
                                      default_dynamic_module_mapping)
        cell_int8 = model_int8.lstm

        assert type(cell_int8) == torch.nn.quantized.dynamic.LSTM, \
            'torch.nn.LSTM should be converted to torch.nn.quantized.dynamic.LSTM after quantize_dynamic'

        niter = 10
        x = torch.tensor([[100, -155], [-155, 100], [100, -155]],
                         dtype=torch.float).unsqueeze(0).repeat(niter, 1, 1)

        h0_vals = [[-155, 100], [-155, 155], [100, -155]]

        hx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0)
        cx = torch.tensor(h0_vals, dtype=torch.float).unsqueeze(0)

        if isinstance(ref, torch.nn.LSTM):
            hiddens = (hx, cx)

        ref_out, ref_hid = ref(x, hiddens)

        # Compare int8 quantized to unquantized
        output_int8, final_hiddens_int8 = cell_int8(x, hiddens)

        torch.testing.assert_allclose(output_int8, ref_out)
        self.assertEqual(output_int8, ref_out)
        for out, ref in zip(final_hiddens_int8, ref_hid):
            torch.testing.assert_allclose(out, ref)