def test_encoding_analyzer_with_numpy_interface(self): """ compare qat asymmetric quantization with qc quantize implementation :return: """ np.random.seed(10) random_input = 5 * (np.random.normal(size=[1, 3, 224, 224])) + 2 # Full range min, max (no scaling input) x_min = np.min([0., random_input.min()]) x_max = np.max([0., random_input.max()]) delta = (x_max - x_min) / 255 offset = np.round(x_min / delta) x_min = offset * delta x_max = x_min + 255 * delta enc_analyzer = libpymo.EncodingAnalyzerForPython( libpymo.QuantizationMode.QUANTIZATION_TF) enc_analyzer.updateStats(random_input, False) encoding, is_valid = enc_analyzer.computeEncoding( 8, False, False, False) print("Encoding.min=", encoding.min) print("Encoding.max=", encoding.max) self.assertTrue(is_valid) self.assertAlmostEqual(x_min, encoding.min, places=5) self.assertAlmostEqual(x_max, encoding.max, places=5)
def compute_encoding_for_given_bitwidth(data: np.ndarray, bitwidth: int, quant_scheme: QuantScheme, is_symmetric: bool) -> Dict: """ Return encoding dictionary for given bitwidth :param data: Numpy data :param bitwidth: bitwidth (4-31) to use for quantizing data :param quant_scheme: Quantization scheme :param is_symmetric: True if symmetric encodings is used, False otherwise :return: Encoding Dictionary """ # Create Encodings Analyzer and collect statistical data to compute encodings # Since the data is numpy array and on CPU memory, useCuda is False encoding_analyzer = libpymo.EncodingAnalyzerForPython(quant_scheme) encoding_analyzer.updateStats(data, False) encoding, is_encoding_valid = encoding_analyzer.computeEncoding(bitwidth, is_symmetric, False, False) if is_encoding_valid: return {'min': encoding.min, 'max': encoding.max, 'scale': encoding.delta, 'offset': encoding.offset, 'bitwidth': encoding.bw, 'is_symmetric': str(is_symmetric)} return {}
def _get_quantized_weights(weight_tensor, quant_params): """ helper function to get quantized dequantized weights :param weight_tensor: weight tensor :param quant_params: quantization params such as mode, rounding etc :return: quantized de-quantized weight tensor """ q_wt_tensor = weight_tensor quant_mode = libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED if quant_params.quant_mode == QuantScheme.post_training_tf or quant_params.quant_mode == 'tf': quant_mode = libpymo.QuantizationMode.QUANTIZATION_TF round_mode = libpymo.RoundingMode.ROUND_NEAREST if quant_params.round_mode == 'stochastic': round_mode = libpymo.RoundingMode.ROUND_STOCHASTIC bitwidth = 8 # use tensorQuantizerForPython to get quantizeDequantize weights encoding_analyzer = libpymo.EncodingAnalyzerForPython(quant_mode) encoding_analyzer.updateStats(weight_tensor, quant_params.use_cuda) encoding, is_encoding_valid = encoding_analyzer.computeEncoding( bitwidth, False, False, False) if is_encoding_valid: tensor_quantizer = libpymo.TensorQuantizationSimForPython() q_wt_tensor = tensor_quantizer.quantizeDequantize( weight_tensor, encoding, round_mode, quant_params.use_cuda) return q_wt_tensor