def round_weight_each_step(self, weight, bits=16): # print('before quantize: ', input) # quantization v1 # if bits == 1: # return torch.sign(weight) # S = 2. ** (bits - 1) # if bits > 15 or bits == 1: # delta = 0 # else: # delta = 1. / S # max_val = 1 - delta # min_val = delta - 1 # weight_clamp = torch.clamp(weight, min_val, max_val) # qweight = torch.round(weight_clamp * S) / S # print('after quantize: ', input_round) # quantization v2 weight_qparams = calculate_qparams(weight, num_bits=bits, flatten_dims=(1, -1), reduce_dim=None) qweight = quantize(weight, qparams=weight_qparams) weight_unique = torch.unique(qweight[0]) print('add weight range:', weight_unique.size()[0] - 1) return qweight
def forward(self, input): if self.sparsity != 0: # apply mask self.weight.data = self.weight.data * self.weight_mask.data if self.quantize == True: # quantization v2 input_q = self.quantize_input_fw(input, self.weight_bits) weight_qparams = calculate_qparams(self.weight, num_bits=self.weight_bits, flatten_dims=(1, -1), reduce_dim=None) self.qweight = quantize(self.weight, qparams=weight_qparams) bias_fixed_point = None output = F.conv2d(input_q, self.qweight, None, self.stride, self.padding) output = quantize_grad(output, num_bits=self.weight_bits, flatten_dims=(1, -1)) else: output = F.conv2d(input, self.weight, None, self.stride, self.padding) if self.bias: output += self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3) return output