def max_output_bit_width(self, input_bit_width): max_input_val = max_int(bit_width=input_bit_width, narrow_range=False, signed=False) max_output_val = max_input_val * self.in_channels output_bit_width = ceil_ste(torch.log2(max_output_val)) return output_bit_width
def max_output_bit_width(self, input_bit_width, weight_bit_width): max_input_val = max_uint(bit_width=input_bit_width, narrow_range=False) max_fc_val = self.weight_quant.tensor_quant.int_quant.max_uint( weight_bit_width) max_output_val = max_input_val * max_fc_val * self.in_features output_bit_width = ceil_ste(torch.log2(max_output_val)) return output_bit_width
def max_acc_bit_width(self, input_bit_width, weight_bit_width): max_uint_input = max_int(bit_width=input_bit_width, signed=False, narrow_range=False) max_kernel_val = self.weight_quant.max_uint_value(weight_bit_width) group_size = self.out_channels // self.groups max_uint_output = max_uint_input * max_kernel_val * self.kernel_size[0] * group_size max_output_bit_width = ceil_ste(torch.log2(max_uint_output)) return max_output_bit_width
def __add__(self, other): if isinstance(other, QuantTensor) and self.is_not_none and other.is_not_none: self.check_scaling_factors_same(other) self.check_zero_points_same(other) output_value = self.value + other.value output_scale = (self.scale + other.scale) / 2 output_zero_point = (self.zero_point + other.zero_point) / 2 max_val = max_int(signed=self.signed, narrow_range=False, bit_width=self.bit_width) max_val += max_int(signed=other.signed, narrow_range=False, bit_width=other.bit_width) min_val = min_int(signed=self.signed, narrow_range=False, bit_width=self.bit_width) min_val += min_int(signed=other.signed, narrow_range=False, bit_width=other.bit_width) output_bit_width = ceil_ste(torch.log2(max_val - min_val)) output_signed = self.signed or other.signed output_training = self.training or other.training output = QuantTensor(value=output_value, scale=output_scale, zero_point=output_zero_point, bit_width=output_bit_width, signed=output_signed, training=output_training) elif isinstance(other, QuantTensor): output = QuantTensor(self.value + other.value) else: output = QuantTensor(self.value + other) return output
def max_acc_bit_width(self, input_bit_width): max_uint_input = max_int(bit_width=input_bit_width, signed=False, narrow_range=False) max_uint_output = max_uint_input * self._avg_scaling max_output_bit_width = ceil_ste(torch.log2(max_uint_output)) return max_output_bit_width
def max_acc_bit_width(self, input_bit_width, reduce_size): max_uint_input = max_int(bit_width=input_bit_width, signed=False, narrow_range=False) max_uint_output = max_uint_input * reduce_size max_output_bit_width = ceil_ste(torch.log2(max_uint_output)) return max_output_bit_width
def max_acc_bit_width(self, input_bit_width, weight_bit_width): max_uint_input = max_uint(bit_width=input_bit_width, narrow_range=False) max_kernel_val = self.weight_quant.max_uint_value(weight_bit_width) group_size = self.out_channels // self.groups overlapping_sums = max(round(self.kernel_size[0] / self.stride[0]), 1) max_uint_output = max_uint_input * max_kernel_val * overlapping_sums * group_size max_output_bit_width = ceil_ste(torch.log2(max_uint_output)) return max_output_bit_width
def max_acc_bit_width(self, input_bit_width, weight_bit_width): max_input_val = max_int(bit_width=input_bit_width, signed=False, narrow_range=False) max_weight_val = self.weight_quant.max_uint_value(weight_bit_width) max_output_val = max_input_val * max_weight_val output_bit_width = ceil_ste(torch.log2(max_output_val)) return output_bit_width
def __add__(self, other): QuantTensor.check_input_type(other) self.check_scaling_factors_same(other) output_tensor = self.tensor + other.tensor output_scale = (self.scale + other.scale) / 2 max_uint_val = max_uint(narrow_range=False, bit_width=self.bit_width) max_uint_val += max_uint(narrow_range=False, bit_width=other.bit_width) output_bit_width = ceil_ste(torch.log2(max_uint_val)) output = pack_quant_tensor(output_tensor, output_scale, output_bit_width) return output
def __add__(self, other): QuantTensor.check_input_type(other) if self.is_valid and other.is_valid: self.check_scaling_factors_same(other) output_value = self.value + other.value output_scale = (self.scale + other.scale) / 2 max_uint_val = max_uint(narrow_range=False, bit_width=self.bit_width) max_uint_val += max_uint(narrow_range=False, bit_width=other.bit_width) output_bit_width = ceil_ste(torch.log2(max_uint_val)) output_signed = self.signed or other.signed output = QuantTensor(output_value, output_scale, output_bit_width, output_signed) else: output_value = self.value + other.value output = QuantTensor(output_value) return output
def __add__(self, other): QuantTensor.check_input_type(other) if self.is_valid and other.is_valid: self.check_scaling_factors_same(other) self.check_zero_points_same(other) output_value = self.value + other.value output_scale = (self.scale + other.scale) / 2 output_zero_point = (self.zero_point + other.zero_point) / 2 max_uint_val = max_int(signed=False, narrow_range=False, bit_width=self.bit_width) max_uint_val += max_int(signed=False, narrow_range=False, bit_width=other.bit_width) output_bit_width = ceil_ste(torch.log2(max_uint_val)) output_signed = self.signed or other.signed output_training = self.training or other.training output = QuantTensor( value=output_value, scale=output_scale, zero_point=output_zero_point, bit_width=output_bit_width, signed=output_signed, training=output_training) else: output_value = self.value + other.value output = QuantTensor(output_value) return output
def max_output_bit_width(self, input_bit_width): max_uint_input = max_uint(bit_width=input_bit_width, narrow_range=False) max_uint_output = max_uint_input * self.kernel_size * self.kernel_size max_output_bit_width = ceil_ste(torch.log2(max_uint_output)) return max_output_bit_width
def forward(self, x: torch.Tensor): return ceil_ste(x)