Esempio n. 1
0
def quant(net_i, scheme, trainer, quant_params=None):
    """
    Quantizes the network accoring to the different
    possibilities post, dynamic and both
    """
    if scheme == "post":
        net_i.to("cpu")
        net_i.eval()
        net_i.qconfig = get_default_qconfig("fbgemm")
        net_i.fuse_model()
        prepare(net_i, inplace=True)
        _, net_i = trainer.evaluate(net_i, quant_mode=True)
        convert(net_i, inplace=True)
    elif scheme == "dynamic":
        net_i.to("cpu")
        net_i = quantize_dynamic(net_i, quant_params, dtype=qint8)
    elif scheme == "both":
        net_i.to("cpu")
        net_i.eval()
        net_i = quantize_dynamic(net_i, quant_params, dtype=qint8)
        net_i.qconfig = get_default_qconfig("fbgemm")
        net_i.fuse_model()
        prepare(net_i, inplace=True)
        _, net_i = trainer.evaluate(net_i, quant_mode=True)
        convert(net_i, inplace=True)
    else:
        pass
    return net_i
Esempio n. 2
0
    def graph_mode_quantize(
        self,
        inputs,
        data_loader,
        calibration_num_batches=64,
        qconfig_dict=None,
        force_quantize=False,
    ):
        """Quantize the model during export with graph mode quantization."""
        if force_quantize:
            trace = self.trace(inputs)
            if not qconfig_dict:
                qconfig_dict = {"": get_default_qconfig("fbgemm")}
            prepare_m = prepare_jit(trace, qconfig_dict, inplace=False)
            prepare_m.eval()
            with torch.no_grad():
                for i, (_, batch) in enumerate(data_loader):
                    print("Running calibration with batch {}".format(i))
                    input_data = self.onnx_trace_input(batch)
                    prepare_m(*input_data)
                    if i == calibration_num_batches - 1:
                        break
            trace = convert_jit(prepare_m, inplace=True)
        else:
            super().quantize()
            trace = self.trace(inputs)

        return trace
Esempio n. 3
0
def quantize(model, data_loader, config="fbgemm", name="lanes"):
    # Configuration
    prep_config_dict = {"non_traceable_module_name": ["base", "deconv"]}
    qconfig = get_default_qconfig(config)
    qconfig_dict = {"": qconfig}
    model.load()
    model.eval()
    # Prepare Model
    model_prepared = prepare_fx(model,
                                qconfig_dict,
                                prepare_custom_config_dict=prep_config_dict)

    calibrate(model_prepared, data_loader)
    model_int_8 = convert_fx(model_prepared)
    # Model Description
    params = sum([np.prod(p.size()) for p in model.parameters()])
    print("ORIGINAL")
    print("Number of Parameters: {:.1f}M".format(params / 1e6))
    print(f"Number of Parameters: {params}M")
    params = sum([np.prod(p.size()) for p in model_int_8.parameters()])
    print("QUANTIZED")
    print("Number of Parameters: {:.6f}M".format(params / 1e6))
    print(f"Number of Parameters: {params}M")

    print_size_of_model(model_int_8)

    mobile_model = torch.jit.script(model_int_8)
    torchscript_mobile = optimize_for_mobile(mobile_model)
    torch.jit.save(torchscript_mobile, MODEL_MAIN_DIR + name + "_mobile.pt")

    torch.jit.save(torch.jit.script(model_int_8),
                   MODEL_MAIN_DIR + "quantized_" + name + "Net.pt")

    return model_int_8
Esempio n. 4
0
    def test_compare_model_stub_conv_static_fx(self):
        r"""Compare the output of static quantized conv layer and its float shadow module"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        model_list = [ConvModel(), ConvBnReLUModel()]

        for float_model in model_list:
            float_model.eval()

            prepared_model = prepare_fx(float_model, qconfig_dict)

            prepared_float_model = copy.deepcopy(prepared_model)

            # Run calibration
            test_only_eval_fn(prepared_model, self.img_data_2d)
            q_model = convert_fx(prepared_model)

            module_swap_list = [nn.Conv2d, nni.modules.fused.ConvReLU2d]

            expected_ob_dict_keys = {"conv.stats"}
            self.compare_and_validate_model_stub_results_fx(
                prepared_float_model,
                q_model,
                module_swap_list,
                expected_ob_dict_keys,
                self.img_data_2d[0][0],
            )
    def graph_mode_quantize(self,
                            inputs,
                            data_loader,
                            calibration_num_batches=64):
        """Quantize the model during export with graph mode quantization for linformer encoder."""
        if (isinstance(self.right_encoder, RoBERTaEncoder)
                and self.right_encoder.use_linformer_encoder
                and isinstance(self.left_encoder, RoBERTaEncoder)
                and self.left_encoder.use_linformer_encoder):
            trace = self.trace(inputs)
            qconfig = get_default_qconfig("fbgemm")
            qconfig_dict = {"": qconfig}
            prepare_m = prepare_jit(trace, qconfig_dict, inplace=False)
            prepare_m.eval()
            with torch.no_grad():
                for i, (_, batch) in enumerate(data_loader):
                    print("Running calibration with batch {}".format(i))
                    input_data = self.onnx_trace_input(batch)
                    prepare_m(*input_data)
                    if i == calibration_num_batches - 1:
                        break
            trace = convert_jit(prepare_m, inplace=True)
        else:
            super().quantize()
            trace = self.trace(inputs)

        return trace
Esempio n. 6
0
    def test_compare_model_stub_linear_static_fx(self):
        r"""Compare the output of static quantized linear layer and its float shadow module"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        float_model = SingleLayerLinearModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)

        # Run calibration
        test_only_eval_fn(prepared_model, self.calib_data)
        q_model = convert_fx(prepared_model)

        linear_data = self.calib_data[0][0]
        module_swap_list = [nn.Linear]

        expected_ob_dict_keys = {"fc1.stats"}

        self.compare_and_validate_model_stub_results_fx(
            prepared_float_model,
            q_model,
            module_swap_list,
            expected_ob_dict_keys,
            linear_data,
        )
Esempio n. 7
0
    def test_compare_model_outputs_conv_static_fx(self):
        r"""Compare the output of conv layer in static quantized model and corresponding
        output of conv layer in float model
        """

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        model_list = [ConvModel(), ConvBnReLUModel()]

        for float_model in model_list:
            float_model.eval()
            prepared_model = prepare_fx(float_model, qconfig_dict)
            prepared_float_model = copy.deepcopy(prepared_model)

            # Run calibration
            test_only_eval_fn(prepared_model, self.img_data_2d)
            q_model = convert_fx(prepared_model)

            expected_act_compare_dict_keys = {"x.stats", "conv.stats"}
            self.compare_and_validate_model_outputs_results_fx(
                prepared_float_model,
                q_model,
                expected_act_compare_dict_keys,
                self.img_data_2d[0][0],
            )
    def test_compare_weights_linear_static_fx(self):
        r"""Compare the weights of float and static quantized linear layer"""
        def calibrate(model, calib_data):
            model.eval()
            with torch.no_grad():
                for inp in calib_data:
                    model(*inp)

        def compare_and_validate_results(float_model, q_model):
            weight_dict = compare_weights_fx(float_model.state_dict(),
                                             q_model.state_dict())
            self.assertEqual(len(weight_dict), 1)
            for k, v in weight_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        float_model = SingleLayerLinearModel()
        float_model.eval()

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        prepared_model = prepare_fx(float_model, qconfig_dict)

        backup_prepared_model = copy.deepcopy(prepared_model)
        backup_prepared_model.eval()

        # Run calibration
        calibrate(prepared_model, self.calib_data)
        q_model = convert_fx(prepared_model)

        compare_and_validate_results(backup_prepared_model, q_model)
    def test_compare_weights_conv_static_fx(self):
        r"""Compare the weights of float and static quantized conv layer"""
        def calibrate(model, calib_data):
            model.eval()
            with torch.no_grad():
                for inp in calib_data:
                    model(*inp)

        def compare_and_validate_results(float_model, q_model):
            weight_dict = compare_weights_fx(float_model.state_dict(),
                                             q_model.state_dict())
            self.assertEqual(len(weight_dict), 1)
            for k, v in weight_dict.items():
                self.assertTrue(v["float"].shape == v["quantized"].shape)

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        model_list = [ConvModel(), ConvBnModel(), ConvBNReLU()]
        for float_model in model_list:
            float_model.eval()

            fused = fuse_fx(float_model)
            prepared_model = prepare_fx(float_model, qconfig_dict)

            # Run calibration
            calibrate(prepared_model, self.img_data_2d)
            q_model = convert_fx(prepared_model)

            compare_and_validate_results(fused, q_model)
Esempio n. 10
0
    def test_compare_model_outputs_linear_static_fx(self):
        r"""Compare the output of linear layer in static quantized model and corresponding
        output of linear layer in float model
        """

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        float_model = SingleLayerLinearModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)

        # Run calibration
        test_only_eval_fn(prepared_model, self.calib_data)
        q_model = convert_fx(prepared_model)

        linear_data = self.calib_data[0][0]

        expected_act_compare_dict_keys = {"x.stats", "fc1.stats"}
        self.compare_and_validate_model_outputs_results_fx(
            prepared_float_model, q_model, expected_act_compare_dict_keys,
            linear_data)
    def test_post_training_static_quantization(self, root_dir):
        """ Validate post-training static quantization. """
        seed_everything(100)

        model = TestModule()
        num_epochs = 4
        static_quantization = PostTrainingQuantization(
            qconfig_dicts={"": {"": get_default_qconfig()}}
        )
        trainer = Trainer(
            default_root_dir=os.path.join(root_dir, "quantized"),
            checkpoint_callback=False,
            callbacks=[static_quantization],
            max_epochs=num_epochs,
            logger=False,
        )
        # This will both train the model + quantize it.
        trainer.fit(model)

        self.assertIsNotNone(static_quantization.quantized)
        # Default qconfig requires calibration.
        self.assertTrue(static_quantization.should_calibrate)

        test_in = torch.randn(12, 32)
        with mode(model, training=False) as m:
            base_out = m(test_in)
        with mode(static_quantization.quantized, training=False) as q:
            test_out = q(test_in)

        # While quantized/original won't be exact, they should be close.
        self.assertLess(
            ((((test_out - base_out) ** 2).sum(axis=1)) ** (1 / 2)).mean(),
            0.015,
            "RMSE should be less than 0.015 between quantized and original.",
        )
Esempio n. 12
0
def get_model(framework, model_variant):
    """
    Load the desired EfficientPose model variant using the requested deep learning framework.
    
    Args:
        framework: string
            Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch)
        model_variant: string
            EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite)
            
    Returns:
        Initialized EfficientPose model and corresponding resolution.
    """
    
    # Keras
    if framework in ['keras', 'k']:
        from tensorflow.keras.backend import set_learning_phase
        from tensorflow.keras.models import load_model
        set_learning_phase(0)
        model = load_model(join('models', 'keras', 'EfficientPose{0}.h5'.format(model_variant.upper())), custom_objects={'BilinearWeights': helpers.keras_BilinearWeights, 'Swish': helpers.Swish(helpers.eswish), 'eswish': helpers.eswish, 'swish1': helpers.swish1})
    
    # TensorFlow
    elif framework in ['tensorflow', 'tf']:
        from tensorflow.python.platform.gfile import FastGFile
        from tensorflow.compat.v1 import GraphDef
        from tensorflow.compat.v1.keras.backend import get_session
        from tensorflow import import_graph_def
        f = FastGFile(join('models', 'tensorflow', 'EfficientPose{0}.pb'.format(model_variant.upper())), 'rb')
        graph_def = GraphDef()
        graph_def.ParseFromString(f.read())
        f.close()
        model = get_session()
        model.graph.as_default()
        import_graph_def(graph_def)
    
    # TensorFlow Lite
    elif framework in ['tensorflowlite', 'tflite']:
        from tensorflow import lite
        model = lite.Interpreter(model_path=join('models', 'tflite', 'EfficientPose{0}.tflite'.format(model_variant.upper())))
        model.allocate_tensors()
    
    # PyTorch
    elif framework in ['pytorch', 'torch']:
        from imp import load_source
        from torch import load, quantization, backends
        try:
            MainModel = load_source('MainModel', join('models', 'pytorch', 'EfficientPose{0}.py'.format(model_variant.upper())))
        except:
            print('\n##########################################################################################################')
            print('Desired model "EfficientPose{0}Lite" not available in PyTorch. Please select among "RT", "I", "II", "III" or "IV".'.format(model_variant.split('lite')[0].upper()))
            print('##########################################################################################################\n')
            return False, False
        model = load(join('models', 'pytorch', 'EfficientPose{0}'.format(model_variant.upper())))
        model.eval()
        qconfig = quantization.get_default_qconfig('qnnpack')
        backends.quantized.engine = 'qnnpack'
            
    return model, {'rt': 224, 'i': 256, 'ii': 368, 'iii': 480, 'iv': 600, 'rt_lite': 224, 'i_lite': 256, 'ii_lite': 368}[model_variant]
    def checkGraphModeOp(self,
                         module,
                         data,
                         quantized_op,
                         tracing=False,
                         debug=False,
                         check=True,
                         eval_mode=True,
                         dynamic=False):
        if debug:
            print('Testing:', str(module))
        qconfig_dict = {
            '': get_default_qconfig(torch.backends.quantized.engine)
        }

        if eval_mode:
            module = module.eval()
        if dynamic:
            qconfig_dict = {'': default_dynamic_qconfig}
            inputs = data
        else:
            *inputs, target = data[0]
        model = get_script_module(module, tracing, inputs).eval()
        if debug:
            print('input graph:', model.graph)
        models = {}
        outputs = {}
        for d in [True, False]:
            # TODO: _test_only_eval_fn --> default_eval_fn
            if dynamic:
                models[d] = quantize_dynamic_jit(model, qconfig_dict, debug=d)
                # make sure it runs
                outputs[d] = models[d](inputs)
            else:
                # module under test can contain in-place ops, and we depend on
                # input data staying constant for comparisons
                data_copy = copy.deepcopy(data)
                models[d] = quantize_jit(model,
                                         qconfig_dict,
                                         test_only_eval_fn, [data_copy],
                                         inplace=False,
                                         debug=d)
                # make sure it runs
                outputs[d] = models[d](*inputs)

        if debug:
            print('debug graph:', models[True].graph)
            print('non debug graph:', models[False].graph)

        if check:
            # debug and non-debug option should have the same numerics
            self.assertEqual(outputs[True], outputs[False])

            # non debug graph should produce quantized op
            FileCheck().check(quantized_op) \
                       .run(models[False].graph)

        return models[False]
Esempio n. 14
0
 def __init__(self, input_net, output_file, backend='fbgemm'):
     self.input_net = copy.deepcopy(input_net)
     self.input_net.cpu().eval()
     self.output_file = output_file
     self.dq_output_file = '{}.dq'.format(output_file)
     self.sq_output_file = '{}.sq'.format(output_file)
     self.d_qconfig_dict = {'': per_channel_dynamic_qconfig}
     self.s_qconfig_dict = {'': get_default_qconfig(backend)}
     self.ts = None
Esempio n. 15
0
File: base.py Progetto: MHGL/deepvac
 def __init__(self, deepvac_core_config, output_file, backend = 'fbgemm'):
     self.deepvac_core_config = deepvac_core_config
     self.input_net = copy.deepcopy(self.deepvac_core_config.ema if self.deepvac_core_config.ema else self.deepvac_core_config.net)
     self.input_net.to(self.deepvac_core_config.sample.device)
     self.input_net.eval()
     self.output_file = output_file
     self.backend = backend
     self.dq_output_file = '{}.dq'.format(output_file)
     self.sq_output_file = '{}.sq'.format(output_file)
     self.d_qconfig_dict = {'': per_channel_dynamic_qconfig}
     self.s_qconfig_dict = {'': get_default_qconfig(self.backend) }
Esempio n. 16
0
 def __init__(
     self,
     qconfig_dicts: Optional[QConfigDicts] = None,
     preserved_attrs: Optional[List[str]] = None,
 ) -> None:
     """ Initialize the callback. """
     self.qconfig_dicts = qconfig_dicts or {"": {"": get_default_qconfig()}}
     self.preserved_attrs = set([] if preserved_attrs is None else preserved_attrs)
     self.prepared: Optional[torch.nn.Module] = None
     self.quantized: Optional[torch.nn.Module] = None
     self.should_calibrate = _requires_calibration(self.qconfig_dicts)
    def __init__(self,
                 model,
                 quant_method='dynamic',
                 config='x86',
                 calibration_loader=None):
        '''
        :param config: platform switch
        :type config: x86, pi, jetson
        '''
        self.model = model
        self.print_model_size(model, 'Original Model')
        self.quant_method = quant_method
        self.config = config
        self.qconfig = quant.get_default_qconfig(
            'fbgemm') if config == 'x86' else quant.get_default_qconfig(
                'qnnpack')

        # For post training static quantization calibration, typically the training data loader
        self.calibration_loader = calibration_loader
        assert self.quant_method == 'static' and self.calibration_loader is not None, \
            'Post training static quantization requires calibration loader (training loader)!'
Esempio n. 18
0
    def test_compare_weights_conv_static_fx(self):
        r"""Compare the weights of float and static quantized conv layer"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        model_list = [ConvModel(), ConvBnModel(), ConvBnReLUModel()]
        for float_model in model_list:
            float_model.eval()

            fused = fuse_fx(float_model)
            prepared_model = prepare_fx(float_model, qconfig_dict)

            # Run calibration
            test_only_eval_fn(prepared_model, self.img_data_2d)
            q_model = convert_fx(prepared_model)

            expected_weight_dict_keys = {"conv.weight"}
            self.compare_and_validate_model_weights_results_fx(
                fused, q_model, expected_weight_dict_keys)
Esempio n. 19
0
    def test_remove_qconfig_observer_fx(self):
        r"""Remove activation_post_process node from fx prepred model"""
        float_model = SingleLayerLinearModel()
        float_model.eval()

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)

        qconfig_dict = {"": qconfig}

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        model = remove_qconfig_observer_fx(prepared_float_model)

        modules = dict(model.named_modules())
        for node in model.graph.nodes:
            if node.op == "call_module":
                self.assertFalse(is_activation_post_process(modules[node.target]))
Esempio n. 20
0
    def test_compare_weights_linear_static_fx(self):
        r"""Compare the weights of float and static quantized linear layer"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        float_model = SingleLayerLinearModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        # Run calibration
        test_only_eval_fn(prepared_model, self.calib_data)
        q_model = convert_fx(prepared_model)

        expected_weight_dict_keys = {"fc1._packed_params._packed_params"}
        self.compare_and_validate_model_weights_results_fx(
            prepared_float_model, q_model, expected_weight_dict_keys)
Esempio n. 21
0
#       # **api subject to change**
#       # optional: specify the path for standalone modules
#       # These modules are symbolically traced and quantized as one unit
#       # so that the call to the submodule appears as one call_module
#       # node in the forward graph of the GraphModule
#       "standalone_module_name": [
#           "submodule.standalone"
#       ],
#       "standalone_module_class": [
#           StandaloneModuleClass
#       ]
#   }
#
# Utility functions related to ``qconfig`` can be found in the `qconfig <https://github.com/pytorch/pytorch/blob/master/torch/quantization/qconfig.py>`_ file.

qconfig = get_default_qconfig("fbgemm")
qconfig_dict = {"": qconfig}

######################################################################
# 5. Prepare the Model for Post Training Static Quantization
# ----------------------------------------------------------
#
# .. code:: python
#
#     prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
#
# prepare_fx folds BatchNorm modules into previous Conv2d modules, and insert observers
# in appropriate places in the model.

prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
Esempio n. 22
0
def quantize(model):
    qconfig = get_default_qconfig("fbgemm")
    qconfig_dict = {"": qconfig}
    return convert_fx(prepare_fx(model, qconfig_dict))
Esempio n. 23
0
    def checkGraphModeFxOp(self,
                           model,
                           inputs,
                           quant_type,
                           expected_node=None,
                           expected_node_occurrence=None,
                           expected_node_list=None,
                           debug=False,
                           print_debug_info=False):
        """ Quantizes model with graph mode quantization on fx and check if the
        quantized model contains the quantized_node

        Args:
            model: floating point torch.nn.Module
            inputs: one positional sample input arguments for model
            expected_node: NodeSpec
                  e.g. NodeSpec.call_function(torch.quantize_per_tensor)
            expected_node_occurrence: a dict from NodeSpec to
                  expected number of occurences (int)
                  e.g. {NodeSpec.call_function(torch.quantize_per_tensor) : 1,
                        NodeSpec.call_method('dequantize'): 1}
            expected_node_list: a list of NodeSpec, used to check the order
                  of the occurrence of Node
                  e.g. [NodeSpec.call_function(torch.quantize_per_tensor),
                        NodeSpec.call_module(nnq.Conv2d),
                        NodeSpec.call_function(F.hardtanh_),
                        NodeSpec.call_method('dequantize')]
        """
        # TODO: make img_data a single example instead of a list
        if type(inputs) == list:
            inputs = inputs[0]
        if quant_type == QuantType.QAT:
            model.train()
        else:
            model.eval()
        original = symbolic_trace(model)
        fused = fuse_fx(original)

        qconfig_dict = {
            '': get_default_qconfig(torch.backends.quantized.engine)
        }
        if quant_type == QuantType.DYNAMIC:
            prepare = prepare_dynamic_fx
            convert = convert_dynamic_fx
        else:
            prepare = prepare_fx
            convert = convert_fx

        prepared = prepare(fused, qconfig_dict)
        prepared(*inputs)
        qgraph = convert(prepared)
        qgraph_debug = convert(prepared, debug=True)

        result = qgraph(*inputs)
        result_debug = qgraph_debug(*inputs)

        self.assertEqual((result - result_debug).abs().max(), 0), \
            'Expecting debug and non-debug option to produce identical result'

        if print_debug_info:
            print()
            print('quant type:', quant_type)
            print('origianl graph module:', type(model))
            self.printGraphModule(original)
            print()
            print('quantized graph module:', type(qgraph))
            self.printGraphModule(qgraph)
            print()
        qgraph_to_check = qgraph_debug if debug else qgraph
        self.checkGraphModuleNodes(qgraph_to_check, expected_node,
                                   expected_node_occurrence,
                                   expected_node_list)
def quantMain():

    # Choose quantization engine
    if 'qnnpack' in backquant.supported_engines:
        # This Engine Works ONLY on Linux
        # We will use it
        print("Using qnnpack backend engine")
        BACKEND_ENGINE = 'qnnpack'
    elif 'fbgemm' in backquant.supported_engines:
        # This Engine works on Windows (and Linux?)
        # We won't be using it
        BACKEND_ENGINE = 'fbgemm'
        print(
            "FBGEMM Backend Engine is not supported - are you trying this on windows?"
        )
        exit(-2)
    else:
        BACKEND_ENGINE = 'none'
        print("No Proper Backend Engine found")
        exit(-3)

    # Choose quantization device (cpu/gpu)
    # Static Quantisation works only on cpu
    quantDevice = par.QUANT_DEVICE

    # Load Data
    #TODO: transforms
    transform_for_quant = trans.TRANSFORM_QUANTIZE

    dataset_loader, valset_loader, _ = bank.loadData(
        arg_load_train=True,
        arg_load_val=True,
        arg_load_test=False,
        arg_trans_train=transform_for_quant,
        quantisation_mode=True)

    #Load Our Model
    quant_model = mod.UsedModel(par.MODEL_USED_MODEL_TYPE,
                                arg_load=True,
                                arg_load_path=par.QUANT_MODEL_PATH,
                                arg_load_device=par.QUANT_DEVICE,
                                arg_load_raw=par.DATA_LOAD_RAW_MODEL_ENABLE)
    quant_model.optimizer = torch.optim.Adam(
        quant_model.model.parameters(),
        lr=par.TRAIN_INITIAl_LEARNING_RATE)  ##only if raw load
    quant_model.model.to(par.QUANT_DEVICE)
    print('Loaded trained model')

    quant_model.model.eval()
    quant_model.addQuantStubs()  #needed???? for old 1.6  way

    quant_model.fuzeModel()

    # Evaluate Our Model
    if DO_EVALUATE:
        print("Started Evaluation")
        quant_model.model.eval()
        top1, _, _ = eva.evaluate(quant_model, valset_loader, par.QUANT_DEVICE)
        print('Evaluation accuracy on all val images, %2.2f' % (top1.avg))

    propagation_list = quant.get_default_qconfig_propagation_list()
    propagation_list.remove(torch.nn.modules.linear.Linear)
    q_config_dict = dict()
    for e in propagation_list:
        q_config_dict[e] = quant.get_default_qconfig(BACKEND_ENGINE)
    quant.propagate_qconfig_(quant_model.model, q_config_dict)

    quant.prepare(quant_model.model, inplace=True)

    #Calibrate
    print("\nStarting Quantizising Imputs")
    quant_model.model.eval()
    with torch.no_grad():
        for i, data in enumerate(dataset_loader, 0):
            #if (i+1) % 2 == 0: break
            if i % 1000 == 0: print("Progress = ", i)
            inputs, labels = data['image'], data['class']
            quant_model.model(inputs)
    print("Imputs Quantized")

    #Convert to quantized model
    torch.quantization.convert(quant_model.model, inplace=True)
    print("Model Quantized")

    # Evaluate Our Model

    if DO_EVALUATE:
        print("Started Evaluation")
        quant_model.model.eval()
        top1, _, _ = eva.evaluate(quant_model, valset_loader, par.QUANT_DEVICE)
        print('Evaluation accuracy on all val images, %2.2f' % (top1.avg))

    # save for mobile
    quant_model.saveQuantizedModel(par.QUANT_SAVE_MODEL_PATH, dataset_loader)

    print("Done")
Esempio n. 25
0
def main(args):
    # data
    train_transform = tv.transforms.Compose([])
    if args.data_augmentation:
        train_transform.transforms.append(tv.transforms.RandomCrop(32, padding=4))
        train_transform.transforms.append(tv.transforms.RandomHorizontalFlip())
    train_transform.transforms.append(tv.transforms.ToTensor())
    normalize = tv.transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    train_transform.transforms.append(normalize)

    test_transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        normalize])

    train_dataset = tv.datasets.CIFAR10(root='data/',
                                     train=True,
                                     transform=train_transform,
                                     download=True)

    test_dataset = tv.datasets.CIFAR10(root='data/',
                                    train=False,
                                    transform=test_transform,
                                    download=True)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.bs,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=4)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=args.bs,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=4)

    # net
    net = tv.models.mobilenet_v2(num_classes=10)
    net.load_state_dict(torch.load('mobilenet_v2.pth', map_location='cpu'))
    net.dropout = torch.nn.Sequential()

    # quantization
    model = copy.deepcopy(net).cuda()
    del net
    model.eval()
    graph_module = torch.fx.symbolic_trace(model)
    qconfig = get_default_qconfig("fbgemm")
    qconfig_dict = {"": qconfig}
    model_prepared = prepare_fx(graph_module, qconfig_dict)
    calibrate(model_prepared, test_loader)  # 这一步是做后训练量化
    model_int8 = convert_fx(model_prepared)
    torch.jit.save(torch.jit.script(model_int8), 'int8-ptq.pth')

    # valid
    loaded_quantized_model = torch.jit.load('int8-ptq.pth')
    correct = 0.
    total = 0.
    with torch.no_grad():
        loaded_quantized_model.eval()
        for images, labels in tqdm(test_loader):
            images = images
            labels = labels

            pred = loaded_quantized_model(images)

            pred = torch.max(pred.data, 1)[1]
            total += labels.size(0)
            correct += (pred == labels).sum().item()

        val_acc = correct / total
        print(val_acc)
#
# Right now ``qconfig_dict`` is the only way to configure how the model is quantized, and it is done in the granularity of module, that is, we only support one type of ``qconfig`` for each ``torch.nn.Module``, for example, if we have:
#
# .. code:: python
#
#   qconfig = {
#         '' : qconfig_global,
#        'sub' : qconfig_sub,
#         'sub.fc' : qconfig_fc,
#        'sub.conv': None
#   }
#
# Module ``sub.fc`` will be configured with ``qconfig_fc``, and all other child modules in ``sub`` will be configured with ``qconfig_sub`` and ``sub.conv`` will not be quantized. All other modules in the model will be quantized with ``qconfig_global``
# Utility functions related to ``qconfig`` can be found in https://github.com/pytorch/pytorch/blob/master/torch/quantization/qconfig.py.

qconfig = get_default_qconfig('fbgemm')
qconfig_dict = {'': qconfig}

######################################################################
# 5. Define Calibration Function
# -------------------------
#
# .. code:: python
#
#   def calibrate(model, sample_data, ...):
#       model(sample_data, ...)
#
#
# Calibration function is run after the observers are inserted in the model.
# The purpose for calibration is to run through some sample examples that is representative of the workload
# (for example a sample of the training data set) so that the observers in the model are able to observe
Esempio n. 27
0
 def __init__(self, qconfig_dicts: Optional[QConfigDicts] = None) -> None:
     """ Initialize the callback. """
     self.qconfig_dicts = qconfig_dicts or {"": {"": get_default_qconfig()}}
     self.prepared: Optional[torch.nn.Module] = None
     self.quantized: Optional[torch.nn.Module] = None
     self.should_calibrate = _requires_calibration(self.qconfig_dicts)
Esempio n. 28
0
    def test_sparse_qlinear_serdes(self):
        batch_size = 12
        input_channels = 4
        output_channels = 7
        model = self.SparseQuantizedModel(input_channels, output_channels)

        # For sparse kernels both the activation and weight ZP = 0
        X_scale = 0.2
        X_zp = 0
        W_scale = 1e-2
        W_zp = 0

        with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()):
            X_fp32 = torch.randn(batch_size,
                                 input_channels,
                                 dtype=torch.float32)
            float_bias = torch.randn(output_channels, dtype=torch.float32)

            X_q = torch.quantize_per_tensor(X_fp32,
                                            scale=X_scale,
                                            zero_point=X_zp,
                                            dtype=torch.quint8)
            X_fp32 = X_q.dequantize()

            W_fp32 = torch.randn(output_channels,
                                 input_channels,
                                 dtype=torch.float32)
            mask = torch.randint(0, 2, W_fp32.shape)
            W_fp32 *= mask
            W_q = torch.quantize_per_tensor(W_fp32, W_scale, W_zp, torch.qint8)

            model.weight = nn.Parameter(W_q.dequantize())
            model.eval()

            # Note: At the moment, for sparse kernels
            # fbgemm supports only static quantized sparse linear
            # qnnpack supports only dynamically quantized sparse linear
            # Hence we have two different tests.
            # fbgemm tests static flow, qnnpack tests dynamic.
            # Should be unified later on and tests should be fixed
            # appropriately.
            if qengine_is_fbgemm():
                model.qconfig = tq.get_default_qconfig('fbgemm')
                qmodel = copy.deepcopy(model)
                sqmodel = copy.deepcopy(model)

                tq.prepare(qmodel, inplace=True)
                tq.prepare(sqmodel, inplace=True)

                with torch.no_grad():
                    qmodel(X_fp32)
                    sqmodel(X_fp32)

                # Make sure the quantization parameters are computed the same way
                qparams = qmodel.linear.qconfig.weight().calculate_qparams()
                sqparams = sqmodel.linear.qconfig.weight().calculate_qparams()
                self.assertEqual(qparams, sqparams)

                # Make sure mapping of sparse kernels does not affect the non-sparse
                sparse_mapping = tq.get_default_static_quant_module_mappings()
                sparse_mapping[nn.Linear] = ao_nn_sq.Linear
                tq.convert(sqmodel, inplace=True, mapping=sparse_mapping)
                tq.convert(qmodel, inplace=True)

                assert isinstance(sqmodel.linear,
                                  ao_nn_sq.Linear), "Convert failed"
                assert isinstance(qmodel.linear,
                                  nn.quantized.Linear), "Mapping failed"

                scripted_sqmodel = torch.jit.script(sqmodel)
                scripted_sqmodel.eval()
                buffer = io.BytesIO()
                torch.jit.save(scripted_sqmodel, buffer)
                buffer.seek(0)
                sqmodel = torch.jit.load(buffer)

                # Make sure numerics are right
                Y_ref = qmodel(X_q)
                Y_hat = sqmodel(X_q)
                self.assertEqual(Y_ref.dequantize(), Y_hat.dequantize())

            if qengine_is_qnnpack():
                qconfig = {nn.Linear: tq.qconfig.default_dynamic_qconfig}
                dqmodel = copy.deepcopy(model)
                sdqmodel = copy.deepcopy(model)

                tq.propagate_qconfig_(dqmodel, qconfig)
                tq.propagate_qconfig_(sdqmodel, qconfig)

                # Make sure the quantization parameters are computed the same way
                qparams = dqmodel.linear.qconfig.weight().calculate_qparams()
                sqparams = sdqmodel.linear.qconfig.weight().calculate_qparams()
                self.assertEqual(qparams, sqparams)

                # Make sure mapping of sparse kernels does not affect the non-sparse
                sparse_mapping = copy.deepcopy(
                    tq.get_default_dynamic_quant_module_mappings())
                sparse_mapping[nn.Linear] = ao_nn_sq.dynamic.Linear
                with LinearBlockSparsePattern(1, 4):
                    tq.convert(sdqmodel, inplace=True, mapping=sparse_mapping)
                tq.convert(
                    dqmodel,
                    mapping=tq.get_default_dynamic_quant_module_mappings(),
                    inplace=True)

                assert isinstance(sdqmodel.linear,
                                  ao_nn_sq.dynamic.Linear), "Convert failed"
                assert isinstance(
                    dqmodel.linear,
                    nn.quantized.dynamic.Linear), "Mapping failed"

                scripted_sdqmodel = torch.jit.script(sdqmodel)
                scripted_sdqmodel.eval()
                buffer = io.BytesIO()
                torch.jit.save(scripted_sdqmodel, buffer)
                buffer.seek(0)
                sdqmodel = torch.jit.load(buffer)

                # Make sure numerics are right
                Y_ref = dqmodel(X_fp32)
                Y_hat = sdqmodel(X_fp32)
                self.assertEqual(Y_ref, Y_hat)