Beispiel #1
0
    def __init__(self, other):
        super(QuantizedRNNCellBase, self).__init__()
        self.input_size = other.input_size
        self.hidden_size = other.hidden_size
        self.bias = other.bias
        if not self.bias:
            raise ValueError("Quantized RNN cells require bias terms")

        weight_ih, col_offsets_ih, self.scale_ih, self.zero_point_ih = \
            torch.fbgemm_linear_quantize_weight(other.weight_ih.clone().float())
        self.register_buffer('weight_ih', weight_ih)
        self.register_buffer('col_offsets_ih', col_offsets_ih)
        weight_hh, col_offsets_hh, self.scale_hh, self.zero_point_hh = \
            torch.fbgemm_linear_quantize_weight(other.weight_hh.clone().float())
        self.register_buffer('weight_hh', weight_hh)
        self.register_buffer('col_offsets_hh', col_offsets_hh)

        packed_ih = torch.fbgemm_pack_quantized_matrix(self.weight_ih)
        self.register_buffer('packed_ih', packed_ih)
        packed_hh = torch.fbgemm_pack_quantized_matrix(self.weight_hh)
        self.register_buffer('packed_hh', packed_hh)

        self.bias_ih = torch.nn.Parameter(other.bias_ih.clone().float(),
                                          requires_grad=False)
        self.bias_hh = torch.nn.Parameter(other.bias_hh.clone().float(),
                                          requires_grad=False)
Beispiel #2
0
    def __init__(self, other):
        super(QuantizedRNNCellBase, self).__init__()
        warnings.warn(
            "torch.jit.QuantizedRNNCellBase is deprecated and will be removed in an upcoming "
            "PyTorch release. Please use the torch.nn.quantized.dynamic.RNNCell instead."
        )

        self.input_size = other.input_size
        self.hidden_size = other.hidden_size
        self.bias = other.bias
        if not self.bias:
            raise ValueError("Quantized RNN cells require bias terms")

        weight_ih, col_offsets_ih, self.scale_ih, self.zero_point_ih = \
            torch.fbgemm_linear_quantize_weight(other.weight_ih.clone(memory_format=torch.contiguous_format).float())
        self.register_buffer('weight_ih', weight_ih)
        self.register_buffer('col_offsets_ih', col_offsets_ih)
        weight_hh, col_offsets_hh, self.scale_hh, self.zero_point_hh = \
            torch.fbgemm_linear_quantize_weight(other.weight_hh.clone(memory_format=torch.contiguous_format).float())
        self.register_buffer('weight_hh', weight_hh)
        self.register_buffer('col_offsets_hh', col_offsets_hh)

        packed_ih = torch.fbgemm_pack_quantized_matrix(self.weight_ih)
        self.register_buffer('packed_ih', packed_ih)
        packed_hh = torch.fbgemm_pack_quantized_matrix(self.weight_hh)
        self.register_buffer('packed_hh', packed_hh)

        self.bias_ih = torch.nn.Parameter(
            other.bias_ih.clone(memory_format=torch.contiguous_format).float(),
            requires_grad=False)
        self.bias_hh = torch.nn.Parameter(
            other.bias_hh.clone(memory_format=torch.contiguous_format).float(),
            requires_grad=False)
Beispiel #3
0
 def _unpack(self):
     self.packed_ih.set_(
         torch.fbgemm_pack_quantized_matrix(self.weight_ih,
                                            self.weight_ih.size(1),
                                            self.weight_ih.size(0)))
     self.packed_hh.set_(
         torch.fbgemm_pack_quantized_matrix(self.weight_hh,
                                            self.weight_hh.size(1),
                                            self.weight_hh.size(0)))
Beispiel #4
0
                def process_weights(ihhh, layer, suffix):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(other, weight_name)
                    bias = getattr(other, bias_name)

                    qweight, col_offsets, scale, zero_point = \
                        torch.fbgemm_linear_quantize_weight(weight.clone().float())
                    packed_weight = torch.fbgemm_pack_quantized_matrix(
                        qweight, weight.size(1), weight.size(0))

                    params = [
                        qweight, bias, packed_weight, col_offsets, scale,
                        zero_point
                    ]
                    pos_names = [
                        'w', 'b', 'packed', 'col_offsets', 'scale',
                        'zero_point'
                    ]
                    ret_name = [
                        '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                        for name in pos_names
                    ]
                    quantized_weights.append(ret_name[0])
                    packed_weights.append(ret_name[2])
                    return params, ret_name
Beispiel #5
0
 def _unpack(self):
     packed_weights = self._get_packed_weights()
     quantized_weights = self._get_quantized_weights()
     assert len(packed_weights) == len(quantized_weights)
     for i in range(len(packed_weights)):
         packed = packed_weights[i]
         quantized = quantized_weights[i]
         packed.set_(torch.fbgemm_pack_quantized_matrix(
             quantized, quantized.size(1), quantized.size(0)))
Beispiel #6
0
                def process_weights(ihhh, layer, suffix, dtype):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(other, weight_name)
                    bias = getattr(other, bias_name)

                    if dtype == torch.int8:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih,
                        #   col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh
                        qweight, col_offsets, scale, zero_point = \
                            torch.fbgemm_linear_quantize_weight(weight.clone(memory_format=torch.contiguous_format).float())
                        packed_weight = torch.fbgemm_pack_quantized_matrix(
                            qweight)

                        params = [
                            qweight, bias, packed_weight, col_offsets, scale,
                            zero_point
                        ]
                        pos_names = [
                            'w', 'b', 'packed', 'col_offsets', 'scale',
                            'zero_point'
                        ]
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        self._quantized_weights_names.append(ret_name[0])
                        self._packed_weights_names.append(ret_name[2])
                        return params, ret_name
                    else:
                        # for each layer, for each direction we need to quantize and pack
                        # weights and pack parameters in this order:
                        #
                        #   packed_ih, packed_hh, b_ih, b_hh
                        packed_weight = torch.fbgemm_pack_gemm_matrix_fp16(
                            weight.clone(
                                memory_format=torch.contiguous_format).float())

                        self._orig_weights_names.append(weight_name)
                        self.register_buffer(weight_name, weight)
                        params = [packed_weight, bias]
                        pos_names = ['packed', 'b']
                        ret_name = [
                            '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                            for name in pos_names
                        ]
                        self._packed_weights_names.append(ret_name[0])
                        self._quantized_weights_names.append(ret_name[0])
                        return params, ret_name
Beispiel #7
0
    def __init__(self, other):
        super(QuantizedLinear, self).__init__()
        self.in_features = other.in_features
        self.out_features = other.out_features
        # Quantize weight and discard the original
        self.weight, self.col_offsets, self.scale, self.zero_point = torch.fbgemm_linear_quantize_weight(
            other.weight.clone(memory_format=torch.contiguous_format).float())
        self.weight = torch.nn.Parameter(self.weight, requires_grad=False)
        self.col_offsets = torch.nn.Parameter(self.col_offsets, requires_grad=False)
        assert other.bias is not None, 'QuantizedLinear requires a bias'
        self.bias = torch.nn.Parameter(other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False)

        self.register_buffer(
            'packed_tensor_ptr',
            torch.fbgemm_pack_quantized_matrix(self.weight.clone(memory_format=torch.contiguous_format)))
Beispiel #8
0
 def _unpack(self):
     if self.dtype == torch.int8:
         packed_weights = self._packed_weights
         quantized_weights = self._quantized_weights
         assert len(packed_weights) == len(quantized_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             quantized = quantized_weights[i]
             packed.set_(torch.fbgemm_pack_quantized_matrix(quantized))
     else:
         packed_weights = self._packed_weights
         orig_weights = self._orig_weights
         assert len(packed_weights) == len(orig_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             orig_weight = orig_weights[i]
             packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
Beispiel #9
0
    def test_quantized_linear(self, shape, out_features):
        input = torch.rand(shape)
        weight = torch.rand(out_features, shape[1])
        bias = torch.rand(out_features)
        q_weight, col_offsets, scale, zero_point = \
            torch.fbgemm_linear_quantize_weight(weight.clone().float())
        packed_weight = torch.fbgemm_pack_quantized_matrix(q_weight.clone())

        def fbgemm_quantized_linear(input, weight, bias):
            return torch.fbgemm_linear_int8_weight_fp32_activation(
                input.float(), q_weight, packed_weight, col_offsets, scale,
                zero_point, bias.float())

        ref_out, tvm_out = self.runBoth(fbgemm_quantized_linear, input, weight,
                                        bias)
        # relax the constraint to avoid flaky test
        assert torch.allclose(ref_out, tvm_out, rtol=0.5, atol=0.5)
Beispiel #10
0
 def _unpack(self):
     if self.dtype == torch.uint8:
         packed_weights = self._get_packed_weights()
         quantized_weights = self._get_quantized_weights()
         assert len(packed_weights) == len(quantized_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             quantized = quantized_weights[i]
             packed.set_(
                 torch.fbgemm_pack_quantized_matrix(quantized,
                                                    quantized.size(1),
                                                    quantized.size(0)))
     else:
         packed_weights = self._get_packed_weights()
         orig_weights = self._get_orig_weights()
         assert len(packed_weights) == len(orig_weights)
         for i in range(len(packed_weights)):
             packed = packed_weights[i]
             orig_weight = orig_weights[i]
             packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
Beispiel #11
0
    def __init__(self, other):
        super(QuantizedLinear, self).__init__()
        warnings.warn(
            "torch.jit.QuantizedLinear is deprecated and will be removed in an upcoming "
            "PyTorch release. Please use the torch.nn.quantized.dynamic.Linear instead."
        )

        self.in_features = other.in_features
        self.out_features = other.out_features
        # Quantize weight and discard the original
        self.weight, self.col_offsets, self.scale, self.zero_point = torch.fbgemm_linear_quantize_weight(
            other.weight.clone(memory_format=torch.contiguous_format).float())
        self.weight = torch.nn.Parameter(self.weight, requires_grad=False)
        self.col_offsets = torch.nn.Parameter(self.col_offsets,
                                              requires_grad=False)
        assert other.bias is not None, 'QuantizedLinear requires a bias'
        self.bias = torch.nn.Parameter(
            other.bias.clone(memory_format=torch.contiguous_format).float(),
            requires_grad=False)

        self.register_buffer(
            'packed_tensor_ptr',
            torch.fbgemm_pack_quantized_matrix(
                self.weight.clone(memory_format=torch.contiguous_format)))
Beispiel #12
0
 def _unpack(self):
     self.packed_tensor_ptr.set_(
         torch.fbgemm_pack_quantized_matrix(self.weight))