def testQCQuantize_CheckInvalidEncodingsGpu(self): _log.info('running testQCQuantize_CheckInvalidEncodings') for use_gpu in [True]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) # Set encodings enc_min_list = [-10.0, 0.5, 20] enc_max_list = [100.0, 150.0, 200.0, 255.0] output = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_SET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=enc_min_list, fixed_enc_maxs=enc_max_list) with self.assertRaises(errors_impl.InvalidArgumentError): ops.convert_to_tensor(output[0]).eval() libpytrext.ResetQuantizer()
def testQCQuantize_GetEncodingsGpu(self): _log.info('running testQCQuantize_GetEncodings') for use_gpu in [True]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 # Prepare activation tensors ACT_MIN = -20.0 ACT_MAX = 25.0 actvn_1 = constant_op.constant([-10.0, -20.0, 25.0]) actvn_2 = constant_op.constant([8.0, -19.0, 30.0]) actvn_3 = constant_op.constant([12.0, -31.0, 35.4]) # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF) # Update stats output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS), bitwidth=bw, in_tensors=[actvn_1, actvn_2, actvn_3], fixed_enc_mins=[], fixed_enc_maxs=[]) ops.convert_to_tensor(output_0[0]).eval() # Get encodings output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=3) enc_min = ops.convert_to_tensor(output_1[1]).eval() enc_max = ops.convert_to_tensor(output_1[2]).eval() true_encodings = self._compute_encodings( enc_min[0], enc_max[0], bw) expected_encodings = self._compute_encodings( ACT_MIN, ACT_MAX, bw) error_margin = 1e-5 # Use better heuristics self.assertArrayNear(true_encodings, expected_encodings, error_margin) libpytrext.ResetQuantizer()
def testQCQuantize_SetEncodings(self): _log.info('running testQCQuantize_SetEncodings') for use_gpu in [False]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_CPU if use_gpu else libpymo.ComputationMode.COMP_MODE_GPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) # Set encodings # For the purpose of exact matches with expected encodings # we choose to avoid ranges excluding 0 as the TF algorithm # forces the encoding to include 0, thus differing from expected results. enc_min_list = [-10.0, -0.5, 0] enc_max_list = [100.0, 200.0, 160.0] output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_SET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=enc_min_list, fixed_enc_maxs=enc_max_list) ops.convert_to_tensor(output_0[0]).eval() # Retrieve encodings from op and validate output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=3) get_enc_min = ops.convert_to_tensor(output_1[1]).eval() get_enc_max = ops.convert_to_tensor(output_1[2]).eval() for index in np.arange(0, len(enc_min_list)): actual_encodings = self._compute_encodings( get_enc_min[index], get_enc_max[index], bw) expected_encodings = self._compute_encodings( enc_min_list[index], enc_max_list[index], bw) self.assertAllEqual(actual_encodings, expected_encodings) libpytrext.ResetQuantizer()
def load_quantized_graph(meta_graph, checkpoint, encodings, graph=None, gpu=True): """ Function to call to setup the saved quantization encodings and model. When loading a quantized graph from saved files the quantizer must first be initialized with the quantization op names and the saved encodings. :param meta_graph: Path to meta file :param checkpoint: Path to checkpoint file :param encodings: Path to encodings file :param graph: Graph to load into :param gpu: If True, use GPU ops :return: Newly created tf.compat.v1.Session """ comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if gpu else libpymo.ComputationMode.COMP_MODE_CPU # Check to see if it's a file passed in and we need to process it, or if it's the # actual map data if isinstance(encodings, str): with open(encodings, 'r') as f: encodings = json.load(f) quant_mode = encodings['quant_mode'] if quant_mode not in _QUANT_MODES: raise ValueError('Invalid quantization mode: ' + quant_mode) quant_mode = _QUANT_MODES[quant_mode] libpytrext.ResetQuantizer() libpytrext.InitQuantizer(list(encodings.keys()), comp_mode, [], quant_mode) g = tf.Graph() with g.as_default(): sess = tf.compat.v1.Session(graph=g) _set_activation_encodings(sess, encodings, gpu=gpu) # Use the provided graph, if it exists if not graph: graph = tf.Graph() with graph.as_default(): sess, _ = _load_graph(graph, meta_graph, checkpoint) return sess
def _prepare_graph_for_quantization(self, collect_stats=True): """ Inserts the appropriate quantization ops and prequantizes the params depending upon the configuration parameters. Operations are inserted in the current default graph. Raises: RuntimeError: Thrown when there was an error inserting operations :param collect_stats: If True, stats are collected :return: """ # Get the op query module query = core.OpQuery(self._sess.graph, op_map=self._op_map, ops_to_ignore=self._ops_to_ignore) # Query the known op groups and insert quantization nodes after the ops # Should we also be including quantization ops starting with labels? No for now... activation_ops = query.get_known_ops(inputs=self._input_tensor_names) # Query all ops with weights and quantize the input weights weight_ops = query.get_weight_ops(skip_bias_op=self._skip_bias) input_indices = query.get_weight_inputs(weight_ops) # Instantiate DlQuantization object quant_node_names = [ self._get_quantized_name(op.name) for op in activation_ops ] libpytrext.ResetQuantizer() libpytrext.InitQuantizer(quant_node_names, self._comp_mode, [], self._quant_mode) # Add quantization ops/data self._insert_weight_quantization_ops(weight_ops, input_indices) if not self._skip_output: self._insert_activation_quantization_ops(activation_ops, collect_stats)
def testQCQuantize_Params(self): _log.info('running testQCQuantize_Params') for use_gpu in [False, True]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 PARAM_MIN = -50.0 PARAM_MAX = 80.0 comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU # Instantiate DlQuantization object libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) weights = constant_op.constant( [-40.0, -1.0, 0.0, 1.0, 2.0, -50.0, 80.0]) # Quantize and de-quantize params test_output = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int(libpytrext.config_type.CONFIG_TYPE_Q_DQ_PARAMS), bitwidth=bw, in_tensors=[weights], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) quantized_weights = ops.convert_to_tensor( test_output[0]).eval() self.assertAllClose(quantized_weights[0], weights.eval(), 1.0) # Examine encodings of quantized params out_enc_min = ops.convert_to_tensor(test_output[1]).eval() out_enc_max = ops.convert_to_tensor(test_output[2]).eval() true_encodings = self._compute_encodings( out_enc_min[0], out_enc_max[0], bw) expected_encodings = self._compute_encodings( PARAM_MIN, PARAM_MAX, bw) error_margin = 10 # Use better heuristics; ideally there should be 0 error margin self.assertArrayNear(true_encodings, expected_encodings, error_margin) # Repeat test with training_in_progress == true test_output = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=True, config=int(libpytrext.config_type.CONFIG_TYPE_Q_DQ_PARAMS), bitwidth=bw, in_tensors=[weights], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) quantized_weights = ops.convert_to_tensor( test_output[0]).eval() self.assertAllClose(quantized_weights[0], weights.eval(), 1.0) # Examine encodings of quantized params out_enc_min = ops.convert_to_tensor(test_output[1]).eval() out_enc_max = ops.convert_to_tensor(test_output[2]).eval() true_encodings = self._compute_encodings( out_enc_min[0], out_enc_max[0], bw) expected_encodings = self._compute_encodings( PARAM_MIN, PARAM_MAX, bw) error_margin = 10 # Use better heuristics; ideally there should be 0 error margin self.assertArrayNear(true_encodings, expected_encodings, error_margin) libpytrext.ResetQuantizer()
def testQCQuantize_CheckZeroRepresentation(self): _log.info('running testQCQuantize_CheckZeroRepresentation') for use_gpu in [False]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 # Test all negative ranges act_min = -8.0 act_max = -5.0 # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) # Set encodings output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_SET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[act_min], fixed_enc_maxs=[act_max]) ops.convert_to_tensor(output_0[0]).eval() # Get encodings from op and validate output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) enc_max = ops.convert_to_tensor(output_1[2]).eval() self.assertEqual(enc_max[0], 0.0) # Test all positive ranges act_min = 20.0 act_max = 100.0 # Set encodings output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_SET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[act_min], fixed_enc_maxs=[act_max]) ops.convert_to_tensor(output_0[0]).eval() # Get encodings from op and validate output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) enc_min = ops.convert_to_tensor(output_1[1]).eval() self.assertEqual(enc_min[0], 0.0) libpytrext.ResetQuantizer()
def testQCQuantize_MultipleActivations(self): _log.info('running testQCQuantize_MultipleActivations') for use_gpu in [False]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 actvn_stats_0 = actvn_stats_1 = actvn_stats_2 = actvn_stats_3 = constant_op.constant( np.arange(0, 100).astype(np.float32)) # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) # Update quantization stats output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS), bitwidth=bw, in_tensors=[ actvn_stats_0, actvn_stats_1, actvn_stats_2, actvn_stats_3 ], fixed_enc_mins=[], fixed_enc_maxs=[]) ops.convert_to_tensor(output_0[0]).eval() actvn_0 = constant_op.constant( np.arange(0, 10).astype(np.float32)) actvn_1 = constant_op.constant( np.arange(10, 20).astype(np.float32)) actvn_2 = constant_op.constant( np.arange(20, 30).astype(np.float32)) actvn_3 = constant_op.constant( np.arange(30, 40).astype(np.float32)) test_actvn = [actvn_0, actvn_1, actvn_2, actvn_3] # Quantize and de-quantize activations output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_Q_DQ_ACTIVATIONS), bitwidth=bw, in_tensors=[actvn_0, actvn_1, actvn_2, actvn_3], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=4) quantized_acts = ops.convert_to_tensor(output_1[0]).eval() quantization_error_margin = 1.0 for index in np.arange(0, len(quantized_acts)): self.assertArrayNear( ops.convert_to_tensor(test_actvn[index]).eval(), quantized_acts[index], quantization_error_margin) # Test output encodings enc_min = ops.convert_to_tensor(output_1[1]).eval() enc_max = ops.convert_to_tensor(output_1[2]).eval() # Compare against encodings obtained from get_encoding() get_enc_tensor = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=4) exp_enc_min = ops.convert_to_tensor(get_enc_tensor[1]).eval() exp_enc_max = ops.convert_to_tensor(get_enc_tensor[2]).eval() for index in np.arange(0, len(quantized_acts)): true_encodings = self._compute_encodings( enc_min[index], enc_max[index], bw) expected_encodings = self._compute_encodings( exp_enc_min[index], exp_enc_max[index], bw) error_margin = 1.0 # Not a fair test to compare TF with TF_ENHANCED, but works for now self.assertAllEqual(true_encodings, expected_encodings) libpytrext.ResetQuantizer()
def testQCQuantize_SingleActivation(self): _log.info('running testQCQuantize_SingleActivation') for use_gpu in [False, True]: _log.info('GPU mode is selected') if use_gpu else _log.info( 'CPU mode is selected') with self.session(use_gpu=use_gpu): bw = 8 # Instantiate DlQuantization object comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU libpytrext.InitQuantizer( ["conv1"], comp_mode, [], libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED) actvn_0 = constant_op.constant( np.arange(0, 20).astype(np.float32)) actvn_1 = constant_op.constant( np.arange(0, 50).astype(np.float32)) actvn_2 = constant_op.constant( np.arange(0, 100).astype(np.float32)) # Update quantization stats output_0 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS), bitwidth=bw, in_tensors=[actvn_0], fixed_enc_mins=[], fixed_enc_maxs=[]) ops.convert_to_tensor(output_0[0]).eval() output_1 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS), bitwidth=bw, in_tensors=[actvn_1], fixed_enc_mins=[], fixed_enc_maxs=[]) ops.convert_to_tensor(output_1[0]).eval() output_2 = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS), bitwidth=bw, in_tensors=[actvn_2], fixed_enc_mins=[], fixed_enc_maxs=[]) ops.convert_to_tensor(output_2[0]).eval() ACT_MIN = 0.0 ACT_MAX = 16.0 test_actvn = constant_op.constant( [ACT_MAX]) # Single input activation # Quantize and de-quantize activations test_output = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_Q_DQ_ACTIVATIONS), bitwidth=bw, in_tensors=[test_actvn], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) quantized_acts = ops.convert_to_tensor(test_output[0]).eval() # Test output activations self.assertAllClose(quantized_acts[0], test_actvn.eval(), 1.0) # Test output encodings from quantizing activations. enc_min = ops.convert_to_tensor(test_output[1]).eval() enc_max = ops.convert_to_tensor(test_output[2]).eval() true_encodings = self._compute_encodings( enc_min[0], enc_max[0], bw) # Compare against encodings obtained from get_encoding() get_enc_tensor = self.qc_quantize_module.qc_quantize_deprecated( op_name='conv1', training_in_progress=False, config=int( libpytrext.config_type.CONFIG_TYPE_GET_ENCODING), bitwidth=bw, in_tensors=[[]], fixed_enc_mins=[], fixed_enc_maxs=[], num_tensors=1) exp_enc_min = ops.convert_to_tensor(get_enc_tensor[1]).eval() exp_enc_max = ops.convert_to_tensor(get_enc_tensor[2]).eval() expected_encodings = self._compute_encodings( exp_enc_min[0], exp_enc_max[0], bw) self.assertAllEqual(true_encodings, expected_encodings) libpytrext.ResetQuantizer()