def quantized_linear(input, weight, bias=None): if self.inference_only: weight_ = self.quantize_instant(weight, "weight") res = self.origin_linear(input, weight_, bias) return self.quantize_instant(res, "activation_linear") elif self.dual_precision: return self.dual_prec_linear(input, weight, bias) else: input_ = attacher.pytorch_attach(input, self.fprop_quantizer, self.bprop_quantizer, tag='activation/in') weight_ = attacher.pytorch_attach(weight, self.fprop_quantizer, self.bprop_quantizer, tag='weight') if bias is not None: bias_ = attacher.pytorch_attach(bias, self.fprop_quantizer, self.bprop_quantizer, tag='bias') else: bias_ = bias res = self.origin_linear(input_, weight_, bias_) return attacher.pytorch_attach(res, self.fprop_quantizer, self.bprop_quantizer, tag='activation_linear')
def quantized_matmul(tensor1, tensor2): tensor1_ = attacher.pytorch_attach(tensor1, self.activation_quantizer, None) tensor2_ = attacher.pytorch_attach(tensor2, self.activation_quantizer, None) res = self.origin_matmul(tensor1_, tensor2_) return attacher.pytorch_attach(res, self.activation_quantizer, None)
def quantized_matmul(tensor1, tensor2): assert False tensor1_ = attacher.pytorch_attach(tensor1, self.fprop_quantizer, self.bprop_quantizer) tensor2_ = attacher.pytorch_attach(tensor2, self.fprop_quantizer, self.bprop_quantizer) res = self.origin_matmul(tensor1_, tensor2_) return attacher.pytorch_attach(res, self.fprop_quantizer, self.bprop_quantizer)
def quantized_conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): if self.inference_only: weight_ = self.quantize_instant(weight, "weight") return self.origin_conv2d(input, weight_, bias, stride, padding, dilation, groups) elif self.dual_precision: return self.dual_prec_conv2d(input, weight, bias, stride, padding, dilation, groups) else: input_ = attacher.pytorch_attach(input, self.fprop_quantizer, self.bprop_quantizer, tag='activation/in') weight_ = attacher.pytorch_attach(weight, self.fprop_quantizer, self.bprop_quantizer, tag='weight') if bias is not None: bias_ = attacher.pytorch_attach(bias, self.fprop_quantizer, self.bprop_quantizer, tag='bias') else: bias_ = bias res = self.origin_conv2d(input_, weight_, bias_, stride, padding, dilation, groups) return attacher.pytorch_attach(res, self.fprop_quantizer, self.bprop_quantizer, tag='activation')
def dual_prec_conv2d(self, input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): # fprop conv2d quantized by fprop_quantizer input_fprop = attacher.pytorch_attach(input, self.fprop_quantizer, None, tag='activation/in') weight_fprop = attacher.pytorch_attach(weight, self.fprop_quantizer, None, tag='weight') if bias is not None: bias_fprop = attacher.pytorch_attach(bias, self.fprop_quantizer, None, tag='bias') else: bias_fprop = bias conv_fprop = self.origin_conv2d(input_fprop, weight_fprop, bias_fprop, stride, padding, dilation, groups) conv_fprop = attacher.pytorch_attach(conv_fprop, self.fprop_quantizer, None, tag='activation') # bprop conv2d quantized by bprop_quantizer input_bprop = attacher.pytorch_attach(input, None, self.bprop_quantizer, tag='activation/in') weight_bprop = attacher.pytorch_attach(weight, None, self.bprop_quantizer, tag='weight') if bias is not None: bias_bprop = attacher.pytorch_attach(bias, None, self.bprop_quantizer, tag='bias') else: bias_bprop = bias conv_bprop = self.origin_conv2d(input_bprop, weight_bprop, bias_bprop, stride, padding, dilation, groups) conv_bprop = attacher.pytorch_attach(conv_bprop, None, self.bprop_quantizer, tag='activation') return conv_fprop.detach() + conv_bprop - conv_bprop.detach()
def dual_prec_linear(self, input, weight, bias=None): # fprop linear quantized by fprop_quantizer input_fprop = attacher.pytorch_attach(input, self.fprop_quantizer, None, tag='activation/in') weight_fprop = attacher.pytorch_attach(weight, self.fprop_quantizer, None, tag='weight') if bias is not None: bias_fprop = attacher.pytorch_attach(bias, self.fprop_quantizer, None, tag='bias') else: bias_fprop = bias linear_fprop = self.origin_linear(input_fprop, weight_fprop, bias_fprop) linear_fprop = attacher.pytorch_attach(linear_fprop, self.fprop_quantizer, None, tag='activation_linear') # bprop linear quantized by bprop_quantizer input_bprop = attacher.pytorch_attach(input, None, self.bprop_quantizer, tag='activation/in') weight_bprop = attacher.pytorch_attach(weight, None, self.bprop_quantizer, tag='weight') if bias is not None: bias_bprop = attacher.pytorch_attach(bias, None, self.bprop_quantizer, tag='bias') else: bias_bprop = bias linear_bprop = self.origin_linear(input_bprop, weight_bprop, bias_bprop) linear_bprop = attacher.pytorch_attach(linear_bprop, None, self.bprop_quantizer, tag='activation_linear') return linear_fprop.detach() + linear_bprop - linear_bprop.detach()
def quantize_tensor(self, tensor, fprop=True, bprop=True): fprop = self.activation_quantizer if fprop else None return attacher.pytorch_attach(tensor, fprop, None)
def quantize_tensor(self, tensor, fprop=True, bprop=True): fprop = self.fprop_quantizer if fprop else None bprop = self.bprop_quantizer if bprop else None return attacher.pytorch_attach(tensor, fprop, bprop)