def test_set_and_freeze_op_mode(self): """ Create QuantSim for a CPU model, test set and freeze op mode """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') op_mode = int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize) quantizer.set_op_mode(op_mode) quantizer.freeze_encoding() self.assertEqual(op_mode, quantizer.get_op_mode()) new_op_mode = int(libpymo.TensorQuantizerOpMode.passThrough) quantizer.set_op_mode(new_op_mode) self.assertNotEqual(new_op_mode, quantizer.get_op_mode()) self.assertEqual(op_mode, quantizer.get_op_mode()) session.close()
def test_quantize_simple_rnn_save_and_load_checkpoint(self): """ Test model export for recurrent models """ tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add an RNN layer with 12 internal units. x = tf.keras.layers.SimpleRNN(10, name='rnn1', return_sequences=True)(inputs) x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x) _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="fc")(x) init = tf.global_variables_initializer() sess.run(init) sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'], use_cuda=False) def eval(sess, input_tensor): model_output = sess.graph.get_tensor_by_name('fc/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') out = sess.run(model_output, feed_dict={model_input: input_tensor}) return out def dummy_forward_pass(sess, args): dummy_input = np.random.randn(1, 3, 100) eval(sess, dummy_input) sim.compute_encodings(dummy_forward_pass, None) random_tensor = np.random.randn(1, 3, 100) old_out = eval(sim.session, random_tensor) save_checkpoint(sim, './data/', 'simple_rnn_save') new_sim = load_checkpoint('./data', 'simple_rnn_save') # Check to make sure that inference through the new sim produces exactly the same output as the old sim # This checks that quantization parameters have been restored correctly # Also checks that we are able to invoke quantize-dequantize ops in the new session (so pymo objects were # restored correctly etc.) new_out = eval(new_sim.session, random_tensor) self.assertTrue(np.allclose(old_out, new_out)) sim.session.close() del sim
def test_set_and_freeze_param_encodings(self): """ Test set and freeze parameter encodings functionality """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) param_encodings = { 'conv2d/Conv2D/ReadVariableOp:0': [{ 'bitwidth': 4, 'is_symmetric': False, 'max': 0.14584073424339294, 'min': -0.12761062383651733, 'offset': -7.0, 'scale': 0.01823008991777897 }] } # export encodings to JSON file encoding_file_path = os.path.join('./', 'dummy.encodings') with open(encoding_file_path, 'w') as encoding_fp: json.dump(param_encodings, encoding_fp, sort_keys=True, indent=4) sim.set_and_freeze_param_encodings(encoding_path='./dummy.encodings') quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') encoding = param_encodings['conv2d/Conv2D/ReadVariableOp:0'][0] encoding_max = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) encoding_min = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) self.assertEqual(encoding_min, encoding.get('min')) self.assertEqual(encoding_max, encoding.get('max')) self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize), quantizer.get_op_mode()) self.assertEqual(quantizer.is_encoding_valid(), True) session.close() # Delete encodings JSON file if os.path.exists("./dummy.encodings"): os.remove("./dummy.encodings")
def test_insert_quant_op_recurrent(self): """ test insertion of quant ops to recurrent layer with conditional blocks """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add an RNN layer with 12 internal units. # Add an RNN layer x = tf.keras.layers.SimpleRNN(12)(inputs) _ = tf.keras.layers.Dense(12, activation=tf.nn.softmax, name="simplernn_model")(x) init = tf.compat.v1.global_variables_initializer() sess.run(init) ops = sess.graph.get_operations() quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized" self.assertFalse( quant_op_inside_while_block_name in [op.name for op in ops]) # construct a quantization sim model sim = QuantizationSimModel(sess, ['input_1'], ['simplernn_model/Softmax'], use_cuda=False) # get ops and make sure we have a quantized op added to the conditional block ops = sim.session.graph.get_operations() self.assertTrue( quant_op_inside_while_block_name in [op.name for op in ops]) sim.session.close() del sim
def test_matmul_param_selection_lstm(self): """ Test apis to select input params to MatMuls within LSTM for quantization """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add an RNN layer with 12 internal units. x = tf.keras.layers.LSTM(12, name='lstm0')(inputs) _ = tf.keras.layers.Dense(12, activation=tf.nn.softmax, name="matmul0")(x) init = tf.compat.v1.global_variables_initializer() sess.run(init) # _ = tf.compat.v1.summary.FileWriter('./lstm', sess.graph) matmul_with_split_inside_lstm = "lstm0/while/MatMul" tf_split_op_in = sess.graph.get_operation_by_name( "lstm0/while/split") tf_matmul_with_split_inside_lstm = sess.graph.get_operation_by_name( matmul_with_split_inside_lstm) param_in_through_split = sess.graph.get_tensor_by_name( "lstm0/while/split/ReadVariableOp:0") can_modify_op, param_in = QuantizationSimModel._get_op_to_modify_with_param_in( tf_matmul_with_split_inside_lstm, 1) self.assertEqual(can_modify_op, tf_split_op_in) self.assertEqual(param_in, param_in_through_split) matmul_with_slice_inside_lstm = "lstm0/while/MatMul_5" tf_strided_slice_op_in = sess.graph.get_operation_by_name( "lstm0/while/strided_slice_1") tf_matmul_with_slice_inside_lstm = sess.graph.get_operation_by_name( matmul_with_slice_inside_lstm) param_in_through_strided_slice = sess.graph.get_tensor_by_name( "lstm0/while/ReadVariableOp_1:0") can_modify_op, param_in = QuantizationSimModel._get_op_to_modify_with_param_in( tf_matmul_with_slice_inside_lstm, 1) self.assertEqual(can_modify_op, tf_strided_slice_op_in) self.assertEqual(param_in, param_in_through_strided_slice) sess.close()
def test_compute_encodings(self): """ Test that ops not evaluated during compute encodings are set to passThrough mode. """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() test_inp = np.ndarray((1, 32, 32, 3)) def dummy_forward_func(sess, _): input_tensor = sess.graph.get_tensor_by_name('input_1:0') output_tensor = sess.graph.get_tensor_by_name('flatten/Reshape:0') sess.run(output_tensor, feed_dict={input_tensor: test_inp}) with sess.as_default(): _ = keras_model_functional() init = tf.compat.v1.global_variables_initializer() sess.run(init) sim = QuantizationSimModel(sess, ['input_1'], ['keras_model_functional/Softmax']) sim.compute_encodings(dummy_forward_func, None) for name, quant_info in sim._activation_quantizers.items(): if name in [ 'keras_model_functional/Softmax_quantized', 'keras_model_functional/BiasAdd_quantized' ]: # Check that quantizers after op evaluated in compute_encodings are in passThrough (3) mode self.assertEqual(quant_info.get_op_mode(), 3) self.assertFalse( quant_info.tensor_quantizer.isEncodingValid) elif name in ['scope_1/conv2d_3/BiasAdd_quantized']: # Check that passThrough quantizers remain as passThrough (3) self.assertEqual(quant_info.get_op_mode(), 3) self.assertFalse( quant_info.tensor_quantizer.isEncodingValid) else: # Check that all other quantizers are in quantizeDequantize (2) mode self.assertEqual(quant_info.get_op_mode(), 2) self.assertTrue( quant_info.tensor_quantizer.isEncodingValid) input_tensor = sim.session.graph.get_tensor_by_name('input_1:0') output_tensor = sim.session.graph.get_tensor_by_name( 'keras_model_functional/Softmax:0') sim.session.run(output_tensor, feed_dict={input_tensor: test_inp}) sim.session.close() del sim
def test_set_and_freeze_encoding(self): """ Create QuantSim for a CPU model, test set and freeze encoding """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') encoding = quantizer.compute_encoding(8, False) print(encoding.max, encoding.min) # Set and freeze encoding quantizer.set_encoding(encoding) quantizer.freeze_encoding() old_encoding_min = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) old_encoding_max = quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) self.assertEqual(encoding.min, old_encoding_min) self.assertEqual(encoding.max, old_encoding_max) self.assertEqual(quantizer.is_encoding_valid(), True) # Try updating encoding min and max with new values, but values can not be changed encoding.min = -0.4 encoding.max = 0.6 quantizer.set_encoding(encoding) self.assertEqual( old_encoding_min, quantizer.get_variable_from_op(QuantizeOpIndices.encoding_min)) self.assertEqual( old_encoding_max, quantizer.get_variable_from_op(QuantizeOpIndices.encoding_max)) session.close()
def test_get_encoding(self): """ Create QuantSim for a CPU model, test get encoding """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): _ = keras_model() init = tf.compat.v1.global_variables_initializer() session = tf.compat.v1.Session() session.run(init) sim = QuantizationSimModel(session, ['conv2d_input'], ['keras_model/Softmax'], use_cuda=False) quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') self.assertRaises(AssertionError, lambda: quantizer.get_encoding()) session.close()
def test_construction_cpu_model(self): """ Create QuantSim for a CPU model and check that quantizers have been added to the graph """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=False) # One run through the model to check if the ops got added correctly model_output = sess.graph.get_tensor_by_name( 'conv2d_1/BiasAdd_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) # Check that quantized ops got added for all params quant_ops = [ op for op in sess.graph.get_operations() if op.type == 'QcQuantize' ] for op in quant_ops: print(op.name) self.assertEqual(10, len(quant_ops)) # Check that the quant ops are correctly connected in the graph self.assertEqual('Conv2D', quant_ops[0].outputs[0].consumers()[0].type) self.assertEqual('BiasAdd', quant_ops[1].outputs[0].consumers()[0].type) self.assertEqual(int(libpymo.TensorQuantizerOpMode.passThrough), sess.run(quant_ops[1].inputs[1])) # Check that op-mode is set correctly self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sess.run(quant_ops[0].inputs[1])) sess.close() sim.session.close() del sim
def test_quantize_lstm_deepspeech_time_major_false_quantsim_and_forward_pass( self): """ Test connected graph construction on a model with lstm op """ tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add a LSTM layer with 12 internal units. x, state_h, state_c = tf.keras.layers.LSTM( 12, return_state=True, return_sequences=True, name='lstm_stacked')(inputs) x2 = tf.keras.layers.LSTM(12, name='last_lstm')(x) _ = tf.keras.layers.Dense(12, activation=tf.nn.softmax, name="lstm_model")(x2) init = tf.global_variables_initializer() sess.run(init) sim = QuantizationSimModel(sess, ['input_1'], ['lstm_model/Softmax'], use_cuda=False) # validate quantsim # get ops and make sure we have a quantized op added to the conditional block quantized_graph_op_names = self._get_quant_ops_from_tf_graph( sim.session.graph) # _ = tf.summary.FileWriter('./lstm_tm', sess.graph) self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names, 'lstm_stacked', True, False) self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names, 'last_lstm') # validate forward pass self.validate_general_lstm_forward_pass_and_encoding(sess, sim, 9, 14) self.validate_general_lstm_forward_pass_and_encoding(sess, sim, 9, 14) # close tf sessions sess.close() sim.session.close() del sim
def test_quantize_lstm_time_major_true_quantsim_and_forward_pass(self): """ Test connected graph construction on a model with lstm op """ tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add a LSTM layer with 12 internal units. x = tf.keras.layers.LSTM(12, time_major=True, name='lstm_tm')(inputs) _ = tf.keras.layers.Dense(12, activation=tf.nn.softmax, name="lstm_model")(x) init = tf.global_variables_initializer() sess.run(init) # _ = tf.summary.FileWriter('./lstm', sess.graph) sim = QuantizationSimModel(sess, ['input_1'], ['lstm_model/Softmax'], use_cuda=False) # validate quantsim # get ops and make sure we have a quantized op added to the conditional blocks quantized_graph_op_names = self._get_quant_ops_from_tf_graph( sim.session.graph) batches = 32 def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'lstm_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(batches, 3, 100) sess.run(model_output, feed_dict={model_input: dummy_input}) self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names, 'lstm_tm') # validate forward pass self.validate_general_lstm_forward_pass_and_encoding(sess, sim) # close tf sessions sess.close() sim.session.close() del sim
def test_skip_quantizing_dtype_int(self): """ Test that op with dtype int32 is skipped during quantization """ tf.compat.v1.reset_default_graph() with tf.compat.v1.Session() as sess: _ = model_with_dtype_int() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['input_1', 'input_2'], ['model_with_dtype_int/Softmax'], use_cuda=False) self.assertEqual(6, len(sim._activation_quantizers)) self.assertTrue( 'input_1_quantized' not in sim._activation_quantizers) self.assertTrue('input_2_quantized' in sim._activation_quantizers) sim.session.close() del sim
def test_quantize_simple_rnn_export(self): """ Test model export for recurrent models """ tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add an RNN layer with 12 internal units. x = tf.keras.layers.SimpleRNN(10, name='rnn1', return_sequences=True)(inputs) x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x) _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="fc")(x) init = tf.global_variables_initializer() sess.run(init) sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name('fc/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(1, 3, 100) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) sim.export('./data', 'rnn_quantsim') new_sess = load_model_from_meta('./data/rnn_quantsim.meta') dummy_forward_pass(new_sess, None) all_op_types = [op.type for op in new_sess.graph.get_operations()] self.assertNotIn('QcQuantize', all_op_types) self.assertNotIn('QcQuantizeRecurrentParam', all_op_types) # Load the encodings file to check if the encodings were exported correctly with open("./data/rnn_quantsim.encodings", "r") as encodings_file: encodings = json.load(encodings_file) self.assertEqual(8, len(encodings['activation_encodings'])) self.assertEqual(5, len(encodings['param_encodings'])) # close tf sessions sess.close() sim.session.close() del sim
def test_empty_config_file(self): """ Check that with an empty config file, all op modes and use symmetric encoding settings are set to passThrough and False respectively. """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): _ = single_residual() init = tf.compat.v1.global_variables_initializer() sess.run(init) quantsim_config = { "defaults": { "ops": {}, "params": {} }, "params": {}, "op_type": {}, "supergroups": [], "model_input": {}, "model_output": {} } with open('./quantsim_config.json', 'w') as f: json.dump(quantsim_config, f) sim = QuantizationSimModel(sess, ['input_1'], ['single_residual/Softmax'], config_file='./quantsim_config.json') all_quantize_ops = [ op for op in sim.session.graph.get_operations() if op.type == 'QcQuantize' ] self.assertTrue(all_quantize_ops is not None) for op in all_quantize_ops: is_symmetric_tensor = sim.session.graph.get_tensor_by_name( op.name + '_use_symmetric_encoding:0') op_mode_tensor = sim.session.graph.get_tensor_by_name(op.name + '_op_mode:0') self.assertEqual(sim.session.run(is_symmetric_tensor), False) self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.passThrough)) if os.path.exists('./quantsim_config.json'): os.remove('./quantsim_config.json') sess.close() sim.session.close() tf.compat.v1.reset_default_graph()
def test_parse_config_file_model_outputs(self): """ Test that model output quantization parameters are set correctly when using json config file """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): _ = single_residual() init = tf.compat.v1.global_variables_initializer() sess.run(init) quantsim_config = { "defaults": { "ops": {}, "params": {} }, "params": {}, "op_type": {}, "supergroups": [], "model_input": {}, "model_output": { "is_output_quantized": "True" } } with open('./quantsim_config.json', 'w') as f: json.dump(quantsim_config, f) sim = QuantizationSimModel(sess, ['input_1'], ['single_residual/Softmax'], config_file='./quantsim_config.json') op_mode_tensor = sim.session.graph.get_tensor_by_name( 'single_residual/Softmax_quantized_op_mode:0') self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.updateStats)) if os.path.exists('./quantsim_config.json'): os.remove('./quantsim_config.json') sess.close() sim.session.close() tf.compat.v1.reset_default_graph()
def test_parse_config_file_supergroups(self): """ Test that supergroup quantization parameters are set correctly when using json config file """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): _ = single_residual() init = tf.compat.v1.global_variables_initializer() sess.run(init) quantsim_config = { "defaults": { "ops": { "is_output_quantized": "True" }, "params": {} }, "params": {}, "op_type": {}, "supergroups": [{ "op_list": ["Conv", "AveragePool"] }, { "op_list": ["Add", "Relu"] }, { "op_list": ["Conv", "BatchNormalization"] }], "model_input": {}, "model_output": {} } with open('./quantsim_config.json', 'w') as f: json.dump(quantsim_config, f) sim = QuantizationSimModel(sess, ['input_1'], ['single_residual/Softmax'], config_file='./quantsim_config.json') activation_quantizers = [ 'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized', 'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized', 'conv2d_4/BiasAdd_quantized', 'input_1_quantized', 'batch_normalization/cond/Merge_quantized', 'Relu_quantized', 'max_pooling2d/MaxPool_quantized', 'batch_normalization_1/cond/Merge_quantized', 'Add_quantized', 'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized', 'single_residual/Softmax_quantized', 'Relu_1_quantized' ] for activation_quantizer in activation_quantizers: op_mode_tensor = sim.session.graph.get_tensor_by_name( activation_quantizer + '_op_mode:0') if activation_quantizer in [ 'input_1_quantized', 'conv2d/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized', 'Add_quantized', 'conv2d_4/BiasAdd_quantized' ]: self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.passThrough)) else: self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.updateStats)) if os.path.exists('./quantsim_config.json'): os.remove('./quantsim_config.json') sess.close() sim.session.close() tf.compat.v1.reset_default_graph()
def test_compute_encodings_quant_scheme_update(self): """ Create QuantSim model and update quantScheme using property interface """ tf.compat.v1.reset_default_graph() np.random.seed(0) tf.compat.v1.set_random_seed(0) with tf.device('/gpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=True) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) def dummy_forward_pass(sess, args): np.random.seed(0) tf.compat.v1.set_random_seed(0) model_output = sess.graph.get_tensor_by_name( 'conv2d_1/Relu_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') old_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) old_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, p_quantizer.quant_scheme) p_quantizer.quant_scheme = QuantScheme.post_training_tf self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, p_quantizer.quant_scheme) # invoke compute encoding after quantScheme update sim.compute_encodings(dummy_forward_pass, None) new_p_encoding_min = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_min) new_p_encoding_max = p_quantizer.get_variable_from_op( QuantizeOpIndices.encoding_max) # validate self.assertNotEqual(old_p_encoding_min, new_p_encoding_min) self.assertNotEqual(old_p_encoding_max, new_p_encoding_max) sess.close() sim.session.close() del sim
def test_parse_config_file_params(self): """ Test that param specific quantization parameters are set correctly when using json config file """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): _ = single_residual() init = tf.compat.v1.global_variables_initializer() sess.run(init) quantsim_config = { "defaults": { "ops": {}, "params": { "is_quantized": "False", "is_symmetric": "True" } }, "params": { "weight": { "is_quantized": "True", "is_symmetric": "False" } }, "op_type": {}, "supergroups": [], "model_input": {}, "model_output": {} } with open('./quantsim_config.json', 'w') as f: json.dump(quantsim_config, f) sim = QuantizationSimModel(sess, ['input_1'], ['single_residual/Softmax'], config_file='./quantsim_config.json') weight_quantizers = [ 'conv2d/Conv2D/ReadVariableOp_quantized', 'conv2d_1/Conv2D/ReadVariableOp_quantized', 'conv2d_2/Conv2D/ReadVariableOp_quantized', 'conv2d_3/Conv2D/ReadVariableOp_quantized', 'conv2d_4/Conv2D/ReadVariableOp_quantized', 'single_residual/MatMul/ReadVariableOp_quantized' ] bias_quantizers = [ 'conv2d/BiasAdd/ReadVariableOp_quantized', 'conv2d_1/BiasAdd/ReadVariableOp_quantized', 'conv2d_2/BiasAdd/ReadVariableOp_quantized', 'conv2d_3/BiasAdd/ReadVariableOp_quantized', 'conv2d_4/BiasAdd/ReadVariableOp_quantized', 'single_residual/BiasAdd/ReadVariableOp_quantized' ] for param_quantizer in weight_quantizers: is_symmetric_tensor = sim.session.graph.get_tensor_by_name( param_quantizer + '_use_symmetric_encoding:0') op_mode_tensor = sim.session.graph.get_tensor_by_name( param_quantizer + '_op_mode:0') self.assertEqual( sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.oneShotQuantizeDequantize)) self.assertEqual(sim.session.run(is_symmetric_tensor), False) for param_quantizer in bias_quantizers: is_symmetric_tensor = sim.session.graph.get_tensor_by_name( param_quantizer + '_use_symmetric_encoding:0') op_mode_tensor = sim.session.graph.get_tensor_by_name( param_quantizer + '_op_mode:0') self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.passThrough)) self.assertEqual(sim.session.run(is_symmetric_tensor), True) sess.close() sim.session.close() tf.compat.v1.reset_default_graph()
def test_parse_config_file_op_type(self): """ Test that op specific quantization parameters are set correctly when using json config file """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() with sess.graph.as_default(): _ = single_residual() init = tf.compat.v1.global_variables_initializer() sess.run(init) quantsim_config = { "defaults": { "ops": {}, "params": {} }, "params": {}, "op_type": { "Conv": { "is_input_quantized": "True", "params": { "bias": { "is_quantized": "True", "is_symmetric": "True" } } }, "Gemm": { "is_input_quantized": "True", "params": { "bias": { "is_quantized": "True", "is_symmetric": "True" } } }, "BatchNormalization": { "is_input_quantized": "True" } }, "supergroups": [], "model_input": {}, "model_output": {} } with open('./quantsim_config.json', 'w') as f: json.dump(quantsim_config, f) sim = QuantizationSimModel(sess, ['input_1'], ['single_residual/Softmax'], config_file='./quantsim_config.json') activation_quantizers = [ 'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized', 'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized', 'conv2d_4/BiasAdd_quantized', 'input_1_quantized', 'batch_normalization/cond/Merge_quantized', 'Relu_quantized', 'max_pooling2d/MaxPool_quantized', 'batch_normalization_1/cond/Merge_quantized', 'Add_quantized', 'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized', 'single_residual/Softmax_quantized', 'Relu_1_quantized' ] weight_quantizers = [ 'conv2d/Conv2D/ReadVariableOp_quantized', 'conv2d_1/Conv2D/ReadVariableOp_quantized', 'conv2d_2/Conv2D/ReadVariableOp_quantized', 'conv2d_3/Conv2D/ReadVariableOp_quantized', 'conv2d_4/Conv2D/ReadVariableOp_quantized', 'single_residual/MatMul/ReadVariableOp_quantized', 'conv2d/BiasAdd/ReadVariableOp_quantized', 'conv2d_1/BiasAdd/ReadVariableOp_quantized', 'conv2d_2/BiasAdd/ReadVariableOp_quantized', 'conv2d_3/BiasAdd/ReadVariableOp_quantized', 'conv2d_4/BiasAdd/ReadVariableOp_quantized', 'single_residual/BiasAdd/ReadVariableOp_quantized' ] for activation_quantizer in activation_quantizers: op_mode_tensor = sim.session.graph.get_tensor_by_name( activation_quantizer + '_op_mode:0') if activation_quantizer in [ 'input_1_quantized', 'conv2d/BiasAdd_quantized', 'max_pooling2d/MaxPool_quantized', 'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized', 'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized' ]: self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.updateStats)) else: self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.passThrough)) for weight_quantizer in weight_quantizers: is_symmetric_tensor = sim.session.graph.get_tensor_by_name( weight_quantizer + '_use_symmetric_encoding:0') op_mode_tensor = sim.session.graph.get_tensor_by_name( weight_quantizer + '_op_mode:0') if weight_quantizer in [ 'conv2d/BiasAdd/ReadVariableOp_quantized', 'conv2d_1/BiasAdd/ReadVariableOp_quantized', 'conv2d_2/BiasAdd/ReadVariableOp_quantized', 'conv2d_3/BiasAdd/ReadVariableOp_quantized', 'conv2d_4/BiasAdd/ReadVariableOp_quantized', 'single_residual/BiasAdd/ReadVariableOp_quantized' ]: self.assertEqual( sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.oneShotQuantizeDequantize)) self.assertEqual(sim.session.run(is_symmetric_tensor), True) else: self.assertEqual(sim.session.run(op_mode_tensor), int(pymo.TensorQuantizerOpMode.passThrough)) self.assertEqual(sim.session.run(is_symmetric_tensor), False) if os.path.exists('./quantsim_config.json'): os.remove('./quantsim_config.json') sess.close() sim.session.close() tf.compat.v1.reset_default_graph()
def validate_simple_rnn_auto_insertion_and_forward_pass(self, sess): """ common api to validate auto quant node insertion and forward pass for simple rnn layer :param sess: TensorFlow session :return: """ np.random.seed(0) tf.set_random_seed(0) ops = sess.graph.get_operations() matmul_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized" self.assertFalse(matmul_param_quant_op_inside_while_block_name in [op.name for op in ops]) # _ = tf.summary.FileWriter('./test_simple_rnn_keras', sess.graph) # construct a quantization sim model sim = QuantizationSimModel(sess, ['input_1'], ['simplernn_model/Softmax'], use_cuda=False) # params that must have quantizers matmul_2_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul_1/ReadVariableOp_quantized" # check biasadd param quantizers are disabled param_quantizers = sim._param_quantizers for p_quantizer in param_quantizers.keys(): if 'BiasAdd' in p_quantizer: p_quant_config = sim.quantizer_config(p_quantizer) self.assertFalse(p_quant_config.enabled) # activations with quantizers activation_bias_add_op_inside_while_block_name = "simple_rnn/while/BiasAdd_quantized" add_op_inside_while_block_name = "simple_rnn/while/add_quantized" # these should not have activation quantizers activation_matmul_op_inside_while_block_name = "simple_rnn/while/MatMul_quantized" activation_matmul_2_op_inside_while_block_name = "simple_rnn/while/MatMul_1_quantized" # get ops and make sure we have a quantized op added to the conditional block quantized_graph_op_names = self._get_quant_ops_from_tf_graph( sim.session.graph) # while block ops # bias and kernel quantizers self.assertTrue(matmul_param_quant_op_inside_while_block_name in quantized_graph_op_names) self.assertTrue(matmul_2_param_quant_op_inside_while_block_name in quantized_graph_op_names) # output quantizers self.assertFalse(activation_bias_add_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse( add_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse(activation_matmul_op_inside_while_block_name in quantized_graph_op_names) self.assertFalse(activation_matmul_2_op_inside_while_block_name in quantized_graph_op_names) # check for input quantizers input_matmul_op_inside_while_block_name = "simple_rnn/while/TensorArrayReadV3_quantized" input_matmul_2_op_inside_while_block_name = "simple_rnn/while/Identity_2_quantized" self.assertTrue(input_matmul_op_inside_while_block_name in quantized_graph_op_names) self.assertTrue(input_matmul_2_op_inside_while_block_name in quantized_graph_op_names) # validate encodings def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'simplernn_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(16, 3, 100) sess.run(model_output, feed_dict={model_input: dummy_input}) def eval(sess, input_tensor): model_output = sess.graph.get_tensor_by_name( 'simplernn_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') out = sess.run(model_output, feed_dict={model_input: input_tensor}) return out sim.compute_encodings(dummy_forward_pass, None) random_tensor = np.random.randn(16, 3, 100) orig_out = eval(sess, random_tensor) sim.compute_encodings(dummy_forward_pass, None) # check encoding min and max got updated with sim.session.graph.as_default(): quantized_out = eval(sim.session, random_tensor) # check quantized output with orig output self.assertFalse(np.allclose(orig_out, quantized_out)) # close tf sessions sess.close() sim.session.close() del sim
def run_evaluation(args): # Build graph definition with tf.Graph().as_default(): # Create iterator tf_records = glob(args.dataset_dir + '/validation*') preprocessing_fn = preprocessing_factory.get_preprocessing( args.model_name, is_training=False) parse_function = wrap_preprocessing(preprocessing_fn, height=args.image_size, width=args.image_size, num_classes=(1001 - args.labels_offset), labels_offset=args.labels_offset) dataset = tf.data.TFRecordDataset(tf_records).repeat(1) dataset = dataset.map(parse_function, num_parallel_calls=1).apply( tf.contrib.data.batch_and_drop_remainder(args.batch_size)) iterator = dataset.make_initializable_iterator() images, labels = iterator.get_next() network_fn = nets_factory.get_network_fn( args.model_name, num_classes=(1001 - args.labels_offset), is_training=False) with tf.device('/cpu:0'): images = tf.placeholder_with_default(images, shape=(None, args.image_size, args.image_size, 3), name='input') labels = tf.placeholder_with_default(labels, shape=(None, 1001 - args.labels_offset), name='labels') logits, end_points = network_fn(images) confidences = tf.nn.softmax(logits, axis=1, name='confidences') categorical_preds = tf.argmax(confidences, axis=1, name='categorical_preds') categorical_labels = tf.argmax(labels, axis=1, name='categorical_labels') correct_predictions = tf.equal(categorical_labels, categorical_preds) top1_acc = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='top1-acc') top5_acc = tf.reduce_mean(tf.cast( tf.nn.in_top_k(predictions=confidences, targets=tf.cast(categorical_labels, tf.int32), k=5), tf.float32), name='top5-acc') saver = tf.train.Saver() sess = tf.Session() # Load model from checkpoint if not args.ckpt_bn_folded: saver.restore(sess, args.checkpoint_path) else: sess.run(tf.global_variables_initializer()) # Fold all BatchNorms before QuantSim sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'], [logits.name[:-2]]) if args.ckpt_bn_folded: with sess.graph.as_default(): saver = tf.train.Saver() saver.restore(sess, args.checkpoint_path) else: # Do Cross Layer Equalization and Bias Correction if not loading from a batchnorm folded checkpoint sess = equalize_model(sess, ['input'], [logits.op.name]) conv_bn_dict = BiasCorrection.find_all_convs_bn_with_activation( sess, ['input'], [logits.op.name]) quant_params = QuantParams(quant_mode=args.quant_scheme) bias_correction_dataset = tf.data.TFRecordDataset(tf_records).repeat(1) bias_correction_dataset = bias_correction_dataset.map( lambda x: parse_function(x)[0], num_parallel_calls=1).apply( tf.contrib.data.batch_and_drop_remainder(args.batch_size)) bias_correction_params = BiasCorrectionParams( batch_size=args.batch_size, num_quant_samples=10, num_bias_correct_samples=512, input_op_names=['input'], output_op_names=[logits.op.name]) sess = BiasCorrection.correct_bias( reference_model=sess, bias_correct_params=bias_correction_params, quant_params=quant_params, data_set=bias_correction_dataset, conv_bn_dict=conv_bn_dict, perform_only_empirical_bias_corr=True) # Define eval_func to use for compute encodings in QuantSim def eval_func(session, iterations): cnt = 0 avg_acc_top1 = 0 session.run('MakeIterator') while cnt < iterations or iterations == -1: try: avg_acc_top1 += session.run('top1-acc:0') cnt += 1 except: return avg_acc_top1 / cnt return avg_acc_top1 / cnt # Select the right quant_scheme if args.quant_scheme == 'range_learning_tf': quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init elif args.quant_scheme == 'range_learning_tf_enhanced': quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init elif args.quant_scheme == 'tf': quant_scheme = aimet_common.defs.QuantScheme.post_training_tf elif args.quant_scheme == 'tf_enhanced': quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced else: raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) # Create QuantizationSimModel sim = QuantizationSimModel( session=sess, starting_op_names=['IteratorGetNext'], output_op_names=[logits.name[:-2]], quant_scheme=quant_scheme, rounding_mode=args.round_mode, default_output_bw=args.default_output_bw, default_param_bw=args.default_param_bw, config_file=args.quantsim_config_file, ) # Run compute_encodings sim.compute_encodings(eval_func, forward_pass_callback_args=args.encodings_iterations) # Run final evaluation sess = sim.session top1_acc = eval_func(sess, -1) print('Avg accuracy Top 1: {}'.format(top1_acc))
def test_manual_quantize(self): """ Test quantizing a model by manually specifying ops to quantize """ def get_manual_activations(_graph, _starting_ops, _ending_ops): """ Overriding function for getting a list of ops to insert activation quantizers for :param _graph: Unused argument :param _starting_ops: Unused argument :param _ending_ops: Unused argument :return: List of ops to insert activation quantizers for, None for placeholder """ return ['conv2d/Relu'], None def get_manual_params(_graph, _starting_ops, _ending_ops): """ Overriding function for getting a list of ops to insert param quantizers for :param _graph: Unused argument :param _starting_ops: Unused argument :param _ending_ops: Unused argument :return: List of ops to insert param quantizers for, and list of param indices for these ops """ return ['conv2d_1/Conv2D'], [1] def configure_quantization_ops(self, _conn_graph, _ops_with_param_names, _indices, _activation_op_names, _config_file): """ Overriding function for configuring quantization ops inserted by QuantizationSimModel :param self: Self refers to QuantizationSimModel object :param _conn_graph: Unused argument :param _ops_with_param_names: Unused argument :param _indices: Unused argument :param _activation_op_names: Unused argument :param _config_file: Unused argument """ conv2d_relu_quant_info = self._activation_quantizers[ 'conv2d/Relu_quantized'] conv2d_relu_quant_info.enabled = False conv2d_relu_quant_info.enabled = True conv2d_1_weight_quant_info = self._param_quantizers[ 'conv2d_1/Conv2D/ReadVariableOp_quantized'] conv2d_1_weight_quant_info.enabled = False conv2d_1_weight_quant_info.enabled = True tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) orig_get_ops_to_quantize_activations_for = QuantizationSimModel._get_ops_to_quantize_activations_for orig_get_ops_to_quantize_weights_for = QuantizationSimModel._get_ops_to_quantize_params_for orig_configure_quantization_ops = QuantizationSimModel.configure_quantization_ops QuantizationSimModel._get_ops_to_quantize_activations_for = get_manual_activations QuantizationSimModel._get_ops_to_quantize_params_for = get_manual_params QuantizationSimModel.configure_quantization_ops = configure_quantization_ops sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=False) self.assertEqual(1, len(sim._activation_quantizers)) self.assertEqual(1, len(sim._param_quantizers)) sess.close() sim.session.close() QuantizationSimModel._get_ops_to_quantize_activations_for = orig_get_ops_to_quantize_activations_for QuantizationSimModel._get_ops_to_quantize_params_for = orig_get_ops_to_quantize_weights_for QuantizationSimModel.configure_quantization_ops = orig_configure_quantization_ops sim.session.close() del sim
def test_backward_pass_time_taken_lstm(self, is_quantized=True, iterations=1): """ perform backward pass with quantized lstm block""" tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) timesteps = 5 with sess.graph.as_default(): inputs = tf.keras.Input(shape=(timesteps, 100)) # Add a lstm layer with 12 internal units. x = tf.keras.layers.LSTM(12)(inputs) _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="lstm_model")(x) init = tf.global_variables_initializer() sess.run(init) curr_sess = sess if is_quantized: sim = QuantizationSimModel(sess, ['input_1'], ['lstm_model/Softmax'], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'lstm_model/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(32, 5, 100) # time_steps = 5 sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) curr_sess = sim.session inp_tensor = curr_sess.graph.get_tensor_by_name('input_1:0') np.random.seed(0) w_shape = inp_tensor.shape batches = 32 inp_data = np.random.rand(batches, w_shape[1], w_shape[2]) logits = curr_sess.graph.get_tensor_by_name('lstm_model/MatMul:0') labels = np.random.randint(10, size=batches) one_hot_labels = np.eye(10)[labels] with curr_sess.graph.as_default(): var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) labels_placeholder = tf.placeholder(tf.float32, [None, 10], name='labels') loss = tf.losses.softmax_cross_entropy( onehot_labels=labels_placeholder, logits=logits) update_ops = [] global_step = tf.train.create_global_step() optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3) gradients = optimizer.compute_gradients(loss, var_list) init_global = tf.global_variables_initializer() init_local = tf.local_variables_initializer() init = tf.group(init_global, init_local) curr_sess.run(init) grad_updates = optimizer.apply_gradients(gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_op = tf.identity(loss, name='train_op') # start training time_taken_by_default_grad = 0 for i in range(iterations): start_time = time.perf_counter() _ = curr_sess.run(train_op, feed_dict={ inp_tensor: inp_data, labels_placeholder: one_hot_labels }) exec_time = time.perf_counter() - start_time time_taken_by_default_grad = time_taken_by_default_grad + exec_time default_grad_avg_time = time_taken_by_default_grad / iterations # close session sess.close() if is_quantized: sim.session.close() del sim return default_grad_avg_time
def test_set_get_quantizer_params_using_properties(self): """ Create QuantSim for a CPU model, test param read and write using properties """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) p_quantizer = sim.quantizer_config( 'conv2d/Conv2D/ReadVariableOp_quantized') o_quantizer = sim.quantizer_config('conv2d/Relu_quantized') bias_quantizer = sim.quantizer_config( 'conv2d/BiasAdd/ReadVariableOp_quantized') # check if __str__ can print the object info print(p_quantizer) bitwidth = p_quantizer.bitwidth self.assertEqual(8, bitwidth) p_quantizer.bitwidth = 6 bitwidth = p_quantizer.bitwidth self.assertEqual(6, bitwidth) bitwidth = o_quantizer.bitwidth self.assertEqual(8, bitwidth) o_quantizer.bitwidth = 6 bitwidth = o_quantizer.bitwidth self.assertEqual(6, bitwidth) sym_encoding = bias_quantizer.use_symmetric_encoding self.assertFalse(sym_encoding) bias_quantizer.use_symmetric_encoding = True sym_encoding = bias_quantizer.use_symmetric_encoding self.assertTrue(sym_encoding) rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode) o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC rounding_mode = o_quantizer.rounding_mode self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode) quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED, quant_scheme) o_quantizer.quant_scheme = QuantScheme.post_training_tf quant_scheme = o_quantizer.quant_scheme self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF, quant_scheme) self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid) is_enabled = p_quantizer.enabled self.assertTrue(is_enabled) p_quantizer.enabled = False is_enabled = p_quantizer.enabled self.assertFalse(is_enabled) sim.session.close() del sim
def test_save_load_ckpt_after_compute_encoding_on_orig_object(self): """ Create QuantSim for a CPU model, test save and load on a quantsim model when encodings have been computed on original quantsim object """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) def dummy_forward_pass(n_sess, args): model_output = n_sess.graph.get_tensor_by_name(model.output.name) model_output = model_output.consumers()[0].outputs[0] model_input = n_sess.graph.get_tensor_by_name(model.input.name) dummy_input = np.random.randn(20, 28, 28, 3) n_sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # save quantsim model save_checkpoint(sim, './test_3', 'orig_quantsim_model') new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model') # validations assert (sim is not new_quantsim) # as we have performed computeEncodings() on saved quantsim object, these must be set to True/False # in loaded quantsim object as on orig model for quantize_op in new_quantsim._param_quantizers: self.assertTrue( new_quantsim._param_quantizers[quantize_op].tensor_quantizer. isEncodingValid == sim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertTrue( new_quantsim._param_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_min) == sim._param_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_min)) self.assertTrue( new_quantsim._param_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_max) == sim._param_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_max)) for quantize_op in new_quantsim._activation_quantizers: self.assertTrue(new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid == sim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertTrue( new_quantsim._activation_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_min) == sim._activation_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_min)) self.assertTrue( new_quantsim._activation_quantizers[quantize_op]. get_variable_from_op(QuantizeOpIndices.encoding_max) == sim._activation_quantizers[quantize_op].get_variable_from_op( QuantizeOpIndices.encoding_max)) # delete temp folder created and close sessions shutil.rmtree('./test_3') sess.close() sim.session.close() new_quantsim.session.close() del sim del new_quantsim
def _save_to_keras_common_test_code(self, use_cuda): tf.compat.v1.reset_default_graph() if not use_cuda: model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() else: with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=use_cuda) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats), sim.session.run(conv2d_output_quant_op.inputs[1])) def dummy_forward_pass(sess, eval_tensor_name): model_output = sess.graph.get_tensor_by_name(eval_tensor_name) model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, 'conv2d_1/Relu_quantized:0') mod_sess = sim.save_to_keras() # Check 1: The new graph is well formed. Try forward pass through the graph. dummy_forward_pass(mod_sess, 'conv2d_1/Relu_quantized_static:0') # Check 2: All the QcQuantizeOp nodes have no output - meaning are disconnected from the main graph op_count = 0 for op in mod_sess.graph.get_operations(): if op.type == "QcQuantize": op_count += 1 self.assertFalse(op.outputs[0].consumers()) # Check 3: One QcQuantizeStatic for each QcQuantize op static_op_count = 0 for op in mod_sess.graph.get_operations(): if op.type == "QcQuantizeStatic": static_op_count += 1 self.assertEqual(op_count, static_op_count) # Check 4: Make sure the attributes are set correctly op = mod_sess.graph.get_operation_by_name( "conv2d/Conv2D/ReadVariableOp_quantized_static") self.assertEqual(8, op.get_attr("bitwidth")) self.assertEqual(1, op.get_attr("quant_scheme")) # TF-Enhanced self.assertEqual(1, op.get_attr("op_mode")) # oneShotQuantizeDequantize op = mod_sess.graph.get_operation_by_name( "conv2d/BiasAdd_quantized_static") self.assertEqual(3, op.get_attr("op_mode")) # passThrough op = mod_sess.graph.get_operation_by_name( "conv2d/Relu_quantized_static") self.assertEqual(8, op.get_attr("bitwidth")) self.assertEqual(1, op.get_attr("quant_scheme")) # TF-Enhanced self.assertEqual(2, op.get_attr("op_mode")) # quantizeDequantize sess.close() sim.session.close() del sim
def test_save_load_ckpt_cpu_model(self): """ Create QuantSim for a CPU model, test save and load on a quantsim model. """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) # save quantsim model save_checkpoint(sim, './test_3', 'orig_quantsim_model') new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model') # validations assert (sim is not new_quantsim) self.assertTrue(new_quantsim.session is not None) self.assertTrue(new_quantsim._quant_scheme == sim._quant_scheme) self.assertTrue(new_quantsim._rounding_mode == sim._rounding_mode) self.assertTrue(new_quantsim._use_cuda == sim._use_cuda) self.assertTrue( len(new_quantsim._param_quantizers) == len(sim._param_quantizers)) self.assertTrue( len(new_quantsim._activation_quantizers) == len( sim._activation_quantizers)) for quantize_op in new_quantsim._param_quantizers: self.assertFalse( sim._param_quantizers[quantize_op].session == new_quantsim._param_quantizers[quantize_op].session) self.assertTrue( sim._param_quantizers[quantize_op].tensor_quantizer. getQuantScheme() == new_quantsim._param_quantizers[quantize_op] .tensor_quantizer.getQuantScheme()) self.assertTrue( sim._param_quantizers[quantize_op].tensor_quantizer. roundingMode == new_quantsim._param_quantizers[quantize_op]. tensor_quantizer.roundingMode) self.assertFalse(sim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertFalse(new_quantsim._param_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) for quantize_op in new_quantsim._activation_quantizers: self.assertFalse( sim._activation_quantizers[quantize_op].session == new_quantsim._activation_quantizers[quantize_op].session) self.assertTrue(sim._activation_quantizers[quantize_op]. tensor_quantizer.getQuantScheme() == new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.getQuantScheme()) self.assertTrue(sim._activation_quantizers[quantize_op]. tensor_quantizer.roundingMode == new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.roundingMode) self.assertFalse(sim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) self.assertFalse(new_quantsim._activation_quantizers[quantize_op]. tensor_quantizer.isEncodingValid) # remove the old quant sim reference and session # to test that everything is loaded correctly on new quantsim including tensor quantizer references sim.session.close() del sim # delete temp folder created and close sessions shutil.rmtree('./test_3') sess.close() new_quantsim.session.close() del new_quantsim
def test_compute_encodings_gpu_model(self): """ Create QuantSim for a CPU model and test that activation encodings are computed """ tf.compat.v1.reset_default_graph() with tf.device('/gpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'], use_cuda=True) # Check that op-mode is set correctly conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats), sim.session.run(conv2d_output_quant_op.inputs[1])) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name( 'conv2d_1/Relu_quantized:0') model_input = sess.graph.get_tensor_by_name('conv2d_input:0') dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # Check if encodings have been calculated deactivated_quantizers = [ 'conv2d_input_quantized', 'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized' ] for name, quantizer in sim._activation_quantizers.items(): if name in deactivated_quantizers: self.assertTrue(int(libpymo.TensorQuantizerOpMode.passThrough), sim.session.run(name + '_op_mode/read:0')) else: self.assertTrue( quantizer.tensor_quantizer.isEncodingValid, "quantizer: {} does not have a valid encoding".format( name)) # Check that op-mode is set correctly # Check that quantized ops got added for all params conv2d_weight_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Conv2D/ReadVariableOp_quantized') conv2d_output_quant_op = sim.session.graph.get_operation_by_name( 'conv2d/Relu_quantized') self.assertEqual( int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize), sim.session.run(conv2d_weight_quant_op.inputs[1])) self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize), sim.session.run(conv2d_output_quant_op.inputs[1])) sess.close() sim.session.close() del sim
def run_inference(self, ckpt_path, image_files, labels, enable_ema=True, export_ckpt=None): """Build and run inference on the target images and labels.""" label_offset = 1 if self.include_background_label else 0 with tf.Graph().as_default(): sess = tf.Session() images, labels = self.build_dataset(image_files, labels, False) probs = self.build_model(images, is_training=False) if isinstance(probs, tuple): probs = probs[0] if not self.ckpt_bn_folded: saver = tf.train.Saver() saver.restore(sess, ckpt_path) else: sess.run(tf.global_variables_initializer()) # Fold all BatchNorms before QuantSim sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'], ['logits']) if self.ckpt_bn_folded: with sess.graph.as_default(): checkpoint = ckpt_path saver = tf.train.Saver() saver.restore(sess, checkpoint) sess.run('MakeIterator') # Define an eval function to use during compute encodings def eval_func(sess, iterations): sess.run('MakeIterator') for _ in range(iterations): out_probs = sess.run('Squeeze:0') # Select the right quant_scheme if self.quant_scheme == 'range_learning_tf': quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init elif self.quant_scheme == 'range_learning_tf_enhanced': quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init elif self.quant_scheme == 'tf': quant_scheme = aimet_common.defs.QuantScheme.post_training_tf elif self.quant_scheme == 'tf_enhanced': quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced else: raise ValueError("Got unrecognized quant_scheme: " + self.quant_scheme) # Create QuantizationSimModel sim = QuantizationSimModel( session=sess, starting_op_names=['IteratorGetNext'], output_op_names=['logits'], quant_scheme=quant_scheme, rounding_mode=self.round_mode, default_output_bw=self.default_output_bw, default_param_bw=self.default_param_bw, config_file=self.quantsim_config_file, ) # Run compute_encodings sim.compute_encodings(eval_func, forward_pass_callback_args=500) # Run final evaluation sess = sim.session sess.run('MakeIterator') prediction_idx = [] prediction_prob = [] for _ in range(len(image_files) // self.batch_size): out_probs = sess.run('Squeeze:0') idx = np.argsort(out_probs)[::-1] prediction_idx.append(idx[:5] - label_offset) prediction_prob.append([out_probs[pid] for pid in idx[:5]]) # Return the top 5 predictions (idx and prob) for each image. return prediction_idx, prediction_prob
def test_export_cpu_model(self): """ Create QuantSim for a CPU model, compute encodings and export out a resulting model """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name(model.output.name) model_output = model_output.consumers()[0].outputs[0] model_input = sess.graph.get_tensor_by_name(model.input.name) dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # Make some changes to model parameters to see if they are part of the exported model with sim.session.graph.as_default(): first_bias_tensor = sim.session.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = sim.session.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 0)) first_bias_tensor_var = [ var for var in tf.compat.v1.global_variables() if var.name == 'conv2d/bias:0' ][0] first_bias_tensor_var.load(np.ones(32), sim.session) all_op_types = [op.type for op in sim.session.graph.get_operations()] self.assertIn('QcQuantize', all_op_types) sim.export('/tmp', 'quant_sim_model') with open('/tmp/quant_sim_model.encodings') as json_file: encoding_data = json.load(json_file) activation_keys = list(encoding_data["activation_encodings"].keys()) self.assertTrue(activation_keys[0] == "conv2d/Relu:0") self.assertTrue( isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"], list)) act_encoding_keys = encoding_data["activation_encodings"][ "conv2d/Relu:0"][0].keys() self.assertTrue("bitwidth" in act_encoding_keys) self.assertTrue("is_symmetric" in act_encoding_keys) self.assertTrue("max" in act_encoding_keys) self.assertTrue("min" in act_encoding_keys) self.assertTrue("offset" in act_encoding_keys) self.assertTrue("scale" in act_encoding_keys) param_keys = list(encoding_data["param_encodings"].keys()) self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0") self.assertTrue( isinstance( encoding_data["param_encodings"] ["conv2d/Conv2D/ReadVariableOp:0"], list)) param_encoding_keys = encoding_data["param_encodings"][ "conv2d/Conv2D/ReadVariableOp:0"][0].keys() self.assertTrue("bitwidth" in param_encoding_keys) self.assertTrue("is_symmetric" in param_encoding_keys) self.assertTrue("max" in param_encoding_keys) self.assertTrue("min" in param_encoding_keys) self.assertTrue("offset" in param_encoding_keys) self.assertTrue("scale" in param_encoding_keys) new_sess = load_model_from_meta('/tmp/quant_sim_model.meta') first_bias_tensor = new_sess.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = new_sess.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 1)) all_op_types = [op.type for op in new_sess.graph.get_operations()] self.assertNotIn('QcQuantize', all_op_types) sess.close() sim.session.close() del sim