def test_set_and_freeze_op_mode(self): """ Create QuantSim for a CPU model, test set and freeze op mode """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') op_mode = int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize) quantizer.set_op_mode(op_mode) quantizer.freeze_encoding() self.assertEqual(op_mode, quantizer.get_op_mode()) new_op_mode = int(libpymo.TensorQuantizerOpMode.passThrough) quantizer.set_op_mode(new_op_mode) self.assertNotEqual(new_op_mode, quantizer.get_op_mode()) self.assertEqual(op_mode, quantizer.get_op_mode()) session.close()
def test_set_and_freeze_param_encodings(self): """ Test set and freeze parameter encodings functionality """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) param_encodings = { 'conv2d/Conv2D/ReadVariableOp:0': [{ 'bitwidth': 4, 'is_symmetric': False, 'max': 0.14584073424339294, 'min': -0.12761062383651733, 'offset': -7.0, 'scale': 0.01823008991777897 }] } # export encodings to JSON file encoding_file_path = os.path.join('./', 'dummy.encodings') with open(encoding_file_path, 'w') as encoding_fp: json.dump(param_encodings, encoding_fp, sort_keys=True, indent=4) sim.set_and_freeze_param_encodings(encoding_path='./dummy.encodings') quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') encoding = param_encodings['conv2d/Conv2D/ReadVariableOp:0'][0] encoding_max = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) encoding_min = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) self.assertEqual(encoding_min, encoding.get('min')) self.assertEqual(encoding_max, encoding.get('max')) self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize), quantizer.get_op_mode()) self.assertEqual(quantizer.is_encoding_valid(), True) session.close() # Delete encodings JSON file if os.path.exists("./dummy.encodings"): os.remove("./dummy.encodings")
def test_set_and_freeze_encoding(self): """ Create QuantSim for a CPU model, test set and freeze encoding """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') encoding = quantizer.compute_encoding(8, False) print(encoding.max, encoding.min) # Set and freeze encoding quantizer.set_encoding(encoding) quantizer.freeze_encoding() old_encoding_min = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) old_encoding_max = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) self.assertEqual(encoding.min, old_encoding_min) self.assertEqual(encoding.max, old_encoding_max) self.assertEqual(quantizer.is_encoding_valid(), True) # Try updating encoding min and max with new values, but values can not be changed encoding.min = -0.4 encoding.max = 0.6 quantizer.set_encoding(encoding) self.assertEqual( old_encoding_min, quantizer.get_variable_from_op(QuantizeOpIndices.encoding_min)) self.assertEqual( old_encoding_max, quantizer.get_variable_from_op(QuantizeOpIndices.encoding_max)) session.close()
def test_get_encoding(self): """ Create QuantSim for a CPU model, test get encoding """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') self.assertRaises(AssertionError, lambda: quantizer.get_encoding()) session.close()
def test_set_get_quantizer_params_using_properties(self): """ Create QuantSim for a CPU model, test param read and write using properties """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') o_quantizer = sim.quantizer_config('conv2d/Relu_quantized') bias_quantizer = sim.quantizer_config( 'conv2d/BiasAdd/ReadVariableOp_quantized') # check if __str__ can print the object info print(p_quantizer) bitwidth = p_quantizer.bitwidth self.assertEqual(8, bitwidth) p_quantizer.bitwidth = 6 bitwidth = p_quantizer.bitwidth self.assertEqual(6, bitwidth) bitwidth = o_quantizer.bitwidth self.assertEqual(8, bitwidth) o_quantizer.bitwidth = 6 bitwidth = o_quantizer.bitwidth self.assertEqual(6, bitwidth) sym_encoding = bias_quantizer.use_symmetric_encoding self.assertFalse(sym_encoding) bias_quantizer.use_symmetric_encoding = True sym_encoding = bias_quantizer.use_symmetric_encoding self.assertTrue(sym_encoding) rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode) o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode) quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, quant_scheme) o_quantizer.quant_scheme = QuantScheme.post_training_tf quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, quant_scheme) self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid) is_enabled = p_quantizer.enabled self.assertTrue(is_enabled) p_quantizer.enabled = False is_enabled = p_quantizer.enabled self.assertFalse(is_enabled) sim.session.close() del sim
def test_compute_encodings_quant_scheme_update(self): """ Create QuantSim model and update quantScheme using property interface """ tf.compat.v1.reset_default_graph() np.random.seed(0) tf.compat.v1.set_random_seed(0) with tf.device('/gpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=True) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) def dummy_forward_pass(sess, args): np.random.seed(0) tf.compat.v1.set_random_seed(0) model_output = sess.graph.get_tensor_by_name( 'conv2d_1/Relu_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') old_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) old_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, p_quantizer.quant_scheme) p_quantizer.quant_scheme = QuantScheme.post_training_tf self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, p_quantizer.quant_scheme) # invoke compute encoding after quantScheme update sim.compute_encodings(dummy_forward_pass, None) new_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) new_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) # validate self.assertNotEqual(old_p_encoding_min, new_p_encoding_min) self.assertNotEqual(old_p_encoding_max, new_p_encoding_max) sess.close() sim.session.close() del sim
def validate_simple_rnn_auto_insertion_and_forward_pass(self, sess): """ common api to validate auto quant node insertion and forward pass for simple rnn layer :param sess: TensorFlow session :return: """ np.random.seed(0) tf.set_random_seed(0) ops = sess.graph.get_operations() matmul_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized" self.assertFalse(matmul_param_quant_op_inside_while_block_name in [op.name for op in ops]) # _ = tf.summary.FileWriter('./test_simple_rnn_keras', sess.graph) # construct a quantization sim model sim = QuantizationSimModel(sess, ['input_1'], ['simplernn_model/Softmax'], use_cuda=False) # params that must have quantizers matmul_2_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul_1/ReadVariableOp_quantized" # check biasadd param quantizers are disabled param_quantizers = sim._param_quantizers for p_quantizer in param_quantizers.keys(): if 'BiasAdd' in p_quantizer: p_quant_config = sim.quantizer_config(p_quantizer) self.assertFalse(p_quant_config.enabled) # activations with quantizers activation_bias_add_op_inside_while_block_name = "simple_rnn/while/BiasAdd_quantized" add_op_inside_while_block_name = "simple_rnn/while/add_quantized" # these should not have activation quantizers activation_matmul_op_inside_while_block_name = "simple_rnn/while/MatMul_quantized" activation_matmul_2_op_inside_while_block_name = "simple_rnn/while/MatMul_1_quantized" # get ops and make sure we have a quantized op added to the conditional block quantized_graph_op_names = self._get_quant_ops_from_tf_graph( sim.session.graph) # while block ops # bias and kernel quantizers self.assertTrue(matmul_param_quant_op_inside_while_block_name in quantized_graph_op_names) self.assertTrue(matmul_2_param_quant_op_inside_while_block_name in quantized_graph_op_names) # output quantizers self.assertFalse(activation_bias_add_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse( add_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse(activation_matmul_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse(activation_matmul_2_op_inside_while_block_name in quantized_graph_op_names) # check for input quantizers input_matmul_op_inside_while_block_name = "simple_rnn/while/TensorArrayReadV3_quantized" input_matmul_2_op_inside_while_block_name = "simple_rnn/while/Identity_2_quantized" self.assertTrue(input_matmul_op_inside_while_block_name in quantized_graph_op_names) self.assertTrue(input_matmul_2_op_inside_while_block_name in quantized_graph_op_names) # validate encodings def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'simplernn_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(16, 3, 100) sess.run(model_output, feed_dict={model_input: dummy_input}) def eval(sess, input_tensor): model_output = sess.graph.get_tensor_by_name( 'simplernn_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') out = sess.run(model_output, feed_dict={model_input: input_tensor}) return out sim.compute_encodings(dummy_forward_pass, None) random_tensor = np.random.randn(16, 3, 100) orig_out = eval(sess, random_tensor) sim.compute_encodings(dummy_forward_pass, None) # check encoding min and max got updated with sim.session.graph.as_default(): quantized_out = eval(sim.session, random_tensor) # check quantized output with orig output self.assertFalse(np.allclose(orig_out, quantized_out)) # close tf sessions sess.close() sim.session.close() del sim