Esempio n. 1
    def from_float(cls, mod):
        r"""Create a dynamic quantized module from a float module or qparams_dict

            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        assert type(
        ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()
        dtype = weight_observer.dtype
        assert dtype in [
            torch.qint8, torch.float16
        ], 'The only supported dtypes for dynamic quantized linear are qint8 and float16'
        if dtype == torch.qint8:
            qweight = _quantize_weight(mod.weight.float(), weight_observer)
        elif dtype == torch.float16:
            qweight = mod.weight.float()
            raise RuntimeError(
                'Unsupported dtype specified for dynamic quantized Linear!')
        qlinear = Linear(mod.in_features, mod.out_features, dtype=dtype)
        qlinear.set_weight_bias(qweight, mod.bias)
        return qlinear
Esempio n. 2
    def from_float(cls, mod):
        r"""Create a dynamic quantized module from a float module or qparams_dict

            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        assert type(
        ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()
        assert weight_observer.dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
        wt_scale, wt_zp = weight_observer.calculate_qparams()
        qweight = torch.quantize_per_tensor(mod.weight.float(),
                                            float(wt_scale), int(wt_zp),
        qlinear = Linear(mod.in_features, mod.out_features)
        qlinear.set_weight_bias(qweight, mod.bias)
        return qlinear
Esempio n. 3
    def from_float(cls, mod):
        r"""Create a quantized sparse dynamic module from a float module.

        We only care about the convert at this stage, no need for observers just yet.
        assert type(mod) == cls._FLOAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \
        # TODO: Need to add options to qconfig to avoid the calibration.
        # TODO: Add calibration for the sparsity
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        if type(mod) == nni.LinearReLU:
            mod = mod[0]
        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()

        # It is important to multiply by the mask BEFORE calling the `weight_observer`
        # TODO (zaf): Mask might not be part of the qconfig (T83295194)
        weight = mod.weight
        if getattr(mod.qconfig, 'mask', False):
            weight = mod.qconfig.mask * mod.weight

        dtype = weight_observer.dtype
        assert dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
        w_sc, w_zp = weight_observer.calculate_qparams()
        if isinstance(w_zp, torch.Tensor):
            assert not torch.any(
                w_zp.bool()), "All weight zero points must map to 0"
            assert w_zp == 0, 'Weight zero point must map to 0'
        qweight = _quantize_weight(weight.float(), weight_observer)

        # Use these default values until we figure out how to augment
        # `mod` to contain sparse config
        row_block_size, col_block_size = QNNPACKLinearBlockSparsePattern.block_size(
        qlinear = cls(mod.in_features,
        qlinear.set_weight_bias(qweight, mod.bias, row_block_size,
        return qlinear
Esempio n. 4
    def from_float(cls, mod):
        r"""Create a dynamic quantized module from a float module or qparams_dict

            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        float_modules = [

        assert type(mod) in float_modules, \
            'nn.quantized.dynamic.Linear.from_float only works for one of' + \
            str([float_mod.__name__ for float_mod in float_modules])
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        if type(mod) == nni.LinearReLU:
            mod = mod[0]
        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()
        dtype = weight_observer.dtype
        assert dtype in [torch.qint8, torch.float16], "The only supported dtypes for " \
            "dynamic quantized linear are qint8 and float16 got: {}".format(dtype)
        if dtype == torch.qint8:
            qweight = _quantize_weight(mod.weight.float(), weight_observer)
        elif dtype == torch.float16:
            qweight = mod.weight.float()
            raise RuntimeError(
                'Unsupported dtype specified for dynamic quantized Linear!')
        qlinear = cls(mod.in_features, mod.out_features, dtype=dtype)
        qlinear.set_weight_bias(qweight, mod.bias)
        return qlinear
Esempio n. 5
    def from_float(cls, mod):
        assert type(
        ) == torch.nn.LSTM, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'

        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()

        dtype = weight_observer.dtype
        supported_scalar_types = [torch.qint8, torch.float16]
        if dtype not in supported_scalar_types:
            raise RuntimeError(
                'Unsupported dtype for dynamic RNN quantization: {}'.format(

        if mod.mode == 'LSTM':
            qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers,
                            mod.bias, mod.batch_first, mod.dropout,
                            mod.bidirectional, dtype)
            raise NotImplementedError(
                'Only LSTM is supported for QuantizedRNN for now')

        num_directions = 2 if mod.bidirectional else 1

        assert mod.bias

        qRNNBase._all_weight_names = []
        _all_weight_values = []
        for layer in range(qRNNBase.num_layers):
            for direction in range(num_directions):
                layer_input_size = qRNNBase.input_size if layer == 0 else qRNNBase.hidden_size * num_directions

                def process_weights(ihhh, layer, suffix, dtype):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(mod, weight_name)
                    bias = getattr(mod, bias_name)

                    if dtype == torch.qint8:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #   w_ih, w_hh
                        wt_scale, wt_zp = weight_observer.calculate_qparams()
                        qweight = torch.quantize_per_tensor(
                            weight.float(), float(wt_scale), int(wt_zp),
                        packed_weight = \
                            torch.ops.quantized.linear_prepack(qweight, bias)

                        params = [packed_weight]
                        pos_names = ['w']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        return params, ret_name
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #   packed_ih, packed_hh, b_ih, b_hh
                        packed_weight = torch.fbgemm_pack_gemm_matrix_fp16(

                        params = [packed_weight, bias]
                        pos_names = ['packed', 'b']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        return params, ret_name

                suffix = '_reverse' if direction == 1 else ''
                ih_params, ih_param_names = process_weights(
                    'ih', layer, suffix, dtype)
                hh_params, hh_param_names = process_weights(
                    'hh', layer, suffix, dtype)

                for (ih, ih_name), (hh, hh_name) in zip(
                        zip(ih_params, ih_param_names),
                        zip(hh_params, hh_param_names)):
                    qRNNBase._all_weight_names.extend([ih_name, hh_name])
                        [PackedParameter(p) for p in [ih, hh]])
        qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values)

        return qRNNBase
Esempio n. 6
    def from_float(cls, mod):
        assert type(
        ) == torch.nn.LSTM, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'

        if mod.qconfig is not None and mod.qconfig.weight is not None:
            weight_observer = mod.qconfig.weight()
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.qconfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()

        dtype = weight_observer.dtype
        supported_scalar_types = [torch.qint8, torch.float16]
        if dtype not in supported_scalar_types:
            raise RuntimeError(
                'Unsupported dtype for dynamic RNN quantization: {}'.format(

        if mod.mode == 'LSTM':
            qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers,
                            mod.bias, mod.batch_first, mod.dropout,
                            mod.bidirectional, dtype)
            raise NotImplementedError(
                'Only LSTM is supported for QuantizedRNN for now')

        num_directions = 2 if mod.bidirectional else 1

        assert mod.bias

        _all_weight_values = []
        for layer in range(qRNNBase.num_layers):
            for direction in range(num_directions):
                layer_input_size = qRNNBase.input_size if layer == 0 else qRNNBase.hidden_size * num_directions

                suffix = '_reverse' if direction == 1 else ''

                def retrieve_weight_bias(ihhh):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)
                    weight = getattr(mod, weight_name)
                    bias = getattr(mod, bias_name)
                    return weight, bias

                weight_ih, bias_ih = retrieve_weight_bias('ih')
                weight_hh, bias_hh = retrieve_weight_bias('hh')

                if dtype == torch.qint8:

                    def quantize_and_pack(w, b):
                        wt_scale, wt_zp = weight_observer.calculate_qparams()
                        qweight = torch.quantize_per_tensor(
                            w.float(), float(wt_scale), int(wt_zp),
                        packed_weight = \
                            torch.ops.quantized.linear_prepack(qweight, b)
                        return packed_weight

                    packed_ih = quantize_and_pack(weight_ih, bias_ih)
                    packed_hh = quantize_and_pack(weight_hh, bias_hh)

                    cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic(
                        packed_ih, packed_hh, bias_ih, bias_hh)
                    packed_ih = torch.ops.quantized.linear_prepack_fp16(
                    packed_hh = torch.ops.quantized.linear_prepack_fp16(

                    cell_params = torch.ops.quantized.make_quantized_cell_params_fp16(
                        packed_ih, packed_hh, bias_ih, bias_hh)

        qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values)
        qRNNBase._all_params = ([m.param for m in qRNNBase._all_weight_values])

        return qRNNBase