def test_get_quantized_range(num_bits, signed, restrict, expected_q_min, expected_q_max): q_min, q_max = qu.get_quantized_range(num_bits, signed=signed, signed_restrict_qrange=restrict) assert q_min == expected_q_min assert q_max == expected_q_max
def attach_quant_metadata(t, num_bits, quant_mode, stats=None, clip_mode=ClipMode.NONE, per_channel=False, num_stds=None, scale_approx_mult_bits=None): if stats is None: scale, zp = _get_quant_params_from_tensor(t, num_bits, quant_mode, clip_mode, per_channel, num_stds, scale_approx_mult_bits) else: scale, zp = _get_quant_params_from_stats_dict(stats, num_bits, quant_mode, clip_mode, num_stds, scale_approx_mult_bits) signed = quant_mode != LinearQuantMode.ASYMMETRIC_UNSIGNED restrict = quant_mode == LinearQuantMode.SYMMETRIC_RESTRICTED min_q_val, max_q_val = q_utils.get_quantized_range(num_bits, signed) t.quant_metadata = TensorQuantMetadata(scale, zp, min_q_val, max_q_val) return t
def _fake_quant_tensor(tensor, n_bits, mode, per_channel): q_min, q_max = q_utils.get_quantized_range( n_bits, mode != LinearQuantMode.ASYMMETRIC_UNSIGNED) scale, zp = _get_quant_params_from_tensor(tensor, n_bits, mode, per_channel=per_channel) q_utils.linear_quantize_clamp(tensor, scale, zp, q_min, q_max, inplace=True) q_utils.linear_dequantize(tensor, scale, zp, inplace=True)