Example #1
0
                def process_weights(ihhh, layer, suffix, qweight, bias, dtype):
                    if dtype == torch.qint8:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   w_ih, w_hh
                        packed_weight = \
                            torch.ops.quantized.linear_prepack(qweight, bias)

                        params = [packed_weight]
                        pos_names = ['w']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        return params, ret_name
                    else:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   packed_ih, packed_hh, b_ih, b_hh
                        packed_weight = torch.fbgemm_pack_gemm_matrix_fp16(
                            qweight)

                        params = [packed_weight, bias]
                        pos_names = ['packed', 'b']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        return params, ret_name
Example #2
0
 def __init__(self, other):
     super(QuantizedLinearFP16, self).__init__()
     self.in_features = other.in_features
     self.out_features = other.out_features
     self.original_weight = other.weight
     self.weight = torch.fbgemm_pack_gemm_matrix_fp16(
         other.weight.clone(memory_format=torch.contiguous_format).float())
     assert other.bias is not None, 'QuantizedLinearFP16 requires a bias'
     self.bias = torch.nn.Parameter(other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False)
     self.register_buffer('packed_weight', self.weight)
Example #3
0
                def process_weights(ihhh, layer, suffix, dtype):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(other, weight_name)
                    bias = getattr(other, bias_name)

                    if dtype == torch.int8:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
                        #   col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh
                        qweight, col_offsets, scale, zero_point = \
                            torch.fbgemm_linear_quantize_weight(weight.clone(memory_format=torch.contiguous_format).float())
                        packed_weight = torch.fbgemm_pack_quantized_matrix(
                            qweight)

                        params = [
                            qweight, bias, packed_weight, col_offsets, scale,
                            zero_point
                        ]
                        pos_names = [
                            'w', 'b', 'packed', 'col_offsets', 'scale',
                            'zero_point'
                        ]
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        self._quantized_weights_names.append(ret_name[0])
                        self._packed_weights_names.append(ret_name[2])
                        return params, ret_name
                    else:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   packed_ih, packed_hh, b_ih, b_hh
                        packed_weight = torch.fbgemm_pack_gemm_matrix_fp16(
                            weight.clone(
                                memory_format=torch.contiguous_format).float())

                        self._orig_weights_names.append(weight_name)
                        self.register_buffer(weight_name, weight)
                        params = [packed_weight, bias]
                        pos_names = ['packed', 'b']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        self._packed_weights_names.append(ret_name[0])
                        self._quantized_weights_names.append(ret_name[0])
                        return params, ret_name
Example #4
0
 def __init__(self, other):
     super(QuantizedLinearFP16, self).__init__()
     warnings.warn(
         "torch.jit.QuantizedLinearFP16 is deprecated and will be removed in an upcoming "
         "PyTorch release. Please use the torch.nn.quantized.dynamic.Linear instead."
     )
     self.in_features = other.in_features
     self.out_features = other.out_features
     self.original_weight = other.weight
     self.weight = torch.fbgemm_pack_gemm_matrix_fp16(
         other.weight.clone(memory_format=torch.contiguous_format).float())
     assert other.bias is not None, 'QuantizedLinearFP16 requires a bias'
     self.bias = torch.nn.Parameter(
         other.bias.clone(memory_format=torch.contiguous_format).float(),
         requires_grad=False)
     self.register_buffer('packed_weight', self.weight)
Example #5
0
 def _unpack(self):
     if self.dtype == torch.int8:
         packed_weights = self._packed_weights
         quantized_weights = self._quantized_weights
         assert len(packed_weights) == len(quantized_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             quantized = quantized_weights[i]
             packed.set_(torch.fbgemm_pack_quantized_matrix(quantized))
     else:
         packed_weights = self._packed_weights
         orig_weights = self._orig_weights
         assert len(packed_weights) == len(orig_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             orig_weight = orig_weights[i]
             packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
Example #6
0
                def process_weights(ihhh, layer, suffix, dtype):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(mod, weight_name)
                    bias = getattr(mod, bias_name)

                    if dtype == torch.qint8:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   w_ih, w_hh
                        weight_observer(weight)
                        wt_scale, wt_zp = weight_observer.calculate_qparams()
                        qweight = torch.quantize_per_tensor(
                            weight.float(), float(wt_scale), int(wt_zp),
                            torch.qint8)
                        packed_weight = \
                            torch.ops.quantized.linear_prepack(qweight, bias)

                        params = [packed_weight]
                        pos_names = ['w']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        return params, ret_name
                    else:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   packed_ih, packed_hh, b_ih, b_hh
                        packed_weight = torch.fbgemm_pack_gemm_matrix_fp16(
                            weight.float())

                        params = [packed_weight, bias]
                        pos_names = ['packed', 'b']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        return params, ret_name
Example #7
0
 def _unpack(self):
     if self.dtype == torch.uint8:
         packed_weights = self._get_packed_weights()
         quantized_weights = self._get_quantized_weights()
         assert len(packed_weights) == len(quantized_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             quantized = quantized_weights[i]
             packed.set_(
                 torch.fbgemm_pack_quantized_matrix(quantized,
                                                    quantized.size(1),
                                                    quantized.size(0)))
     else:
         packed_weights = self._get_packed_weights()
         orig_weights = self._get_orig_weights()
         assert len(packed_weights) == len(orig_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             orig_weight = orig_weights[i]
             packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
Example #8
0
 def _unpack(self):
     self.packed_weight.set_(
         torch.fbgemm_pack_gemm_matrix_fp16(self.original_weight))