Beispiel #1
0
        def quantized_linear(input, weight, bias=None):
            if self.inference_only:
                weight_ = self.quantize_instant(weight, "weight")
                res = self.origin_linear(input, weight_, bias)
                return self.quantize_instant(res, "activation_linear")
            elif self.dual_precision:
                return self.dual_prec_linear(input, weight, bias)
            else:
                input_ = attacher.pytorch_attach(input,
                                                 self.fprop_quantizer,
                                                 self.bprop_quantizer,
                                                 tag='activation/in')
                weight_ = attacher.pytorch_attach(weight,
                                                  self.fprop_quantizer,
                                                  self.bprop_quantizer,
                                                  tag='weight')
                if bias is not None:
                    bias_ = attacher.pytorch_attach(bias,
                                                    self.fprop_quantizer,
                                                    self.bprop_quantizer,
                                                    tag='bias')
                else:
                    bias_ = bias

                res = self.origin_linear(input_, weight_, bias_)
                return attacher.pytorch_attach(res,
                                               self.fprop_quantizer,
                                               self.bprop_quantizer,
                                               tag='activation_linear')
 def quantized_matmul(tensor1, tensor2):
     tensor1_ = attacher.pytorch_attach(tensor1,
                                        self.activation_quantizer, None)
     tensor2_ = attacher.pytorch_attach(tensor2,
                                        self.activation_quantizer, None)
     res = self.origin_matmul(tensor1_, tensor2_)
     return attacher.pytorch_attach(res, self.activation_quantizer,
                                    None)
Beispiel #3
0
 def quantized_matmul(tensor1, tensor2):
     assert False
     tensor1_ = attacher.pytorch_attach(tensor1, self.fprop_quantizer,
                                        self.bprop_quantizer)
     tensor2_ = attacher.pytorch_attach(tensor2, self.fprop_quantizer,
                                        self.bprop_quantizer)
     res = self.origin_matmul(tensor1_, tensor2_)
     return attacher.pytorch_attach(res, self.fprop_quantizer,
                                    self.bprop_quantizer)
Beispiel #4
0
        def quantized_conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
            if self.inference_only:
                weight_ = self.quantize_instant(weight, "weight")
                return self.origin_conv2d(input, weight_, bias, stride, padding, dilation, groups)
            elif self.dual_precision:
                return self.dual_prec_conv2d(input, weight, bias, stride, padding, dilation, groups)
            else:
                input_ = attacher.pytorch_attach(input, self.fprop_quantizer, self.bprop_quantizer, tag='activation/in')
                weight_ = attacher.pytorch_attach(weight, self.fprop_quantizer, self.bprop_quantizer, tag='weight')
                if bias is not None:
                    bias_ = attacher.pytorch_attach(bias, self.fprop_quantizer, self.bprop_quantizer, tag='bias')
                else:
                    bias_ = bias

                res = self.origin_conv2d(input_, weight_, bias_, stride, padding, dilation, groups)
                return attacher.pytorch_attach(res, self.fprop_quantizer, self.bprop_quantizer, tag='activation')
Beispiel #5
0
    def dual_prec_conv2d(self,
                         input,
                         weight,
                         bias=None,
                         stride=1,
                         padding=0,
                         dilation=1,
                         groups=1):
        # fprop conv2d quantized by fprop_quantizer
        input_fprop = attacher.pytorch_attach(input,
                                              self.fprop_quantizer,
                                              None,
                                              tag='activation/in')
        weight_fprop = attacher.pytorch_attach(weight,
                                               self.fprop_quantizer,
                                               None,
                                               tag='weight')
        if bias is not None:
            bias_fprop = attacher.pytorch_attach(bias,
                                                 self.fprop_quantizer,
                                                 None,
                                                 tag='bias')
        else:
            bias_fprop = bias
        conv_fprop = self.origin_conv2d(input_fprop, weight_fprop, bias_fprop,
                                        stride, padding, dilation, groups)
        conv_fprop = attacher.pytorch_attach(conv_fprop,
                                             self.fprop_quantizer,
                                             None,
                                             tag='activation')

        # bprop conv2d quantized by bprop_quantizer
        input_bprop = attacher.pytorch_attach(input,
                                              None,
                                              self.bprop_quantizer,
                                              tag='activation/in')
        weight_bprop = attacher.pytorch_attach(weight,
                                               None,
                                               self.bprop_quantizer,
                                               tag='weight')
        if bias is not None:
            bias_bprop = attacher.pytorch_attach(bias,
                                                 None,
                                                 self.bprop_quantizer,
                                                 tag='bias')
        else:
            bias_bprop = bias
        conv_bprop = self.origin_conv2d(input_bprop, weight_bprop, bias_bprop,
                                        stride, padding, dilation, groups)
        conv_bprop = attacher.pytorch_attach(conv_bprop,
                                             None,
                                             self.bprop_quantizer,
                                             tag='activation')
        return conv_fprop.detach() + conv_bprop - conv_bprop.detach()
Beispiel #6
0
    def dual_prec_linear(self, input, weight, bias=None):
        # fprop linear quantized by fprop_quantizer
        input_fprop = attacher.pytorch_attach(input,
                                              self.fprop_quantizer,
                                              None,
                                              tag='activation/in')
        weight_fprop = attacher.pytorch_attach(weight,
                                               self.fprop_quantizer,
                                               None,
                                               tag='weight')
        if bias is not None:
            bias_fprop = attacher.pytorch_attach(bias,
                                                 self.fprop_quantizer,
                                                 None,
                                                 tag='bias')
        else:
            bias_fprop = bias
        linear_fprop = self.origin_linear(input_fprop, weight_fprop,
                                          bias_fprop)
        linear_fprop = attacher.pytorch_attach(linear_fprop,
                                               self.fprop_quantizer,
                                               None,
                                               tag='activation_linear')

        # bprop linear quantized by bprop_quantizer
        input_bprop = attacher.pytorch_attach(input,
                                              None,
                                              self.bprop_quantizer,
                                              tag='activation/in')
        weight_bprop = attacher.pytorch_attach(weight,
                                               None,
                                               self.bprop_quantizer,
                                               tag='weight')
        if bias is not None:
            bias_bprop = attacher.pytorch_attach(bias,
                                                 None,
                                                 self.bprop_quantizer,
                                                 tag='bias')
        else:
            bias_bprop = bias
        linear_bprop = self.origin_linear(input_bprop, weight_bprop,
                                          bias_bprop)
        linear_bprop = attacher.pytorch_attach(linear_bprop,
                                               None,
                                               self.bprop_quantizer,
                                               tag='activation_linear')
        return linear_fprop.detach() + linear_bprop - linear_bprop.detach()
 def quantize_tensor(self, tensor, fprop=True, bprop=True):
     fprop = self.activation_quantizer if fprop else None
     return attacher.pytorch_attach(tensor, fprop, None)
Beispiel #8
0
 def quantize_tensor(self, tensor, fprop=True, bprop=True):
     fprop = self.fprop_quantizer if fprop else None
     bprop = self.bprop_quantizer if bprop else None
     return attacher.pytorch_attach(tensor, fprop, bprop)