Ejemplo n.º 1
0
    def test_fake_quant_quant_per_channel_other_prec(self):
        kernel_size = 3

        quant_desc_input = QuantDescriptor(num_bits=4)
        quant_desc_weight = QuantDescriptor(num_bits=3, axis=(0))

        quant_conv_object = quant_conv.QuantConv3d(
            _NUM_IN_CHANNELS,
            _NUM_OUT_CHANNELS,
            kernel_size,
            bias=False,
            quant_desc_input=quant_desc_input,
            quant_desc_weight=quant_desc_weight)
        test_input = torch.randn(16, _NUM_IN_CHANNELS, 8, 8, 8)

        test_input_quantizer = TensorQuantizer(quant_desc_input)
        weight_quantizer = TensorQuantizer(quant_desc_weight)

        quant_input = test_input_quantizer(test_input)

        weight_copy = quant_conv_object.weight.clone()
        quant_weight = weight_quantizer(weight_copy)

        out1 = F.conv3d(quant_input, quant_weight)
        out2 = quant_conv_object(test_input)
        np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
Ejemplo n.º 2
0
    def test_fake_quant_per_channel_other_precs(self):
        """Test some precisions other than 8bit."""
        size_in = 255
        size_out = 257
        quant_desc_input = tensor_quant.QuantDescriptor(num_bits=4)
        quant_desc_weight = tensor_quant.QuantDescriptor(num_bits=3)
        quant_linear_object = quant_linear.QuantLinear(
            size_in,
            size_out,
            bias=False,
            quant_desc_input=quant_desc_input,
            quant_desc_weight=quant_desc_weight)
        weight_quantizer = TensorQuantizer(quant_desc_weight)
        test_input_quantizer = TensorQuantizer(quant_desc_input)

        test_input = torch.randn(32, size_in)

        weight_copy = quant_linear_object.weight.clone()
        quant_input = test_input_quantizer(test_input)
        quant_weight = weight_quantizer(weight_copy)

        out1 = F.linear(quant_input, quant_weight)
        out2 = quant_linear_object(test_input)
        np.testing.assert_array_equal(out1.detach().cpu().numpy(),
                                      out2.detach().cpu().numpy())
Ejemplo n.º 3
0
    def __init__(self, config):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
                config, "embedding_size"):
            raise ValueError(
                "The hidden size (%d) is not a multiple of the number of attention "
                "heads (%d)" %
                (config.hidden_size, config.num_attention_heads))
        self.output_attentions = config.output_attentions

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size /
                                       config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        # Quantized implementations of torch.nn.Linear modules
        self.query = quant_nn.QuantLinear(config.hidden_size,
                                          self.all_head_size)
        self.key = quant_nn.QuantLinear(config.hidden_size, self.all_head_size)
        self.value = quant_nn.QuantLinear(config.hidden_size,
                                          self.all_head_size)

        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)

        # Additional quantizers that will be needed to quantize the inputs to the torch.matmul() operation in the
        # forward method. Since it's a simple operation and no quantized version of it exists, the inputs to this
        # operation could be manually quantized to realize a quantized mat-mul operation.
        self.matmul_q_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_k_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_v_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
        self.matmul_a_input_quantizer = TensorQuantizer(
            quant_nn.QuantLinear.default_quant_desc_input)
Ejemplo n.º 4
0
 def test_print_tensor_quantizer(self):
     test_quantizer = TensorQuantizer()
     print(test_quantizer)