def test_quantize_simple_rnn_export(self): """ Test model export for recurrent models """ tf.reset_default_graph() sess = tf.Session() np.random.seed(0) tf.set_random_seed(0) with sess.graph.as_default(): inputs = tf.keras.Input(shape=(3, 100)) # Add an RNN layer with 12 internal units. x = tf.keras.layers.SimpleRNN(10, name='rnn1', return_sequences=True)(inputs) x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x) _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="fc")(x) init = tf.global_variables_initializer() sess.run(init) sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name('fc/Softmax:0') model_input = sess.graph.get_tensor_by_name('input_1:0') dummy_input = np.random.randn(1, 3, 100) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) sim.export('./data', 'rnn_quantsim') new_sess = load_model_from_meta('./data/rnn_quantsim.meta') dummy_forward_pass(new_sess, None) all_op_types = [op.type for op in new_sess.graph.get_operations()] self.assertNotIn('QcQuantize', all_op_types) self.assertNotIn('QcQuantizeRecurrentParam', all_op_types) # Load the encodings file to check if the encodings were exported correctly with open("./data/rnn_quantsim.encodings", "r") as encodings_file: encodings = json.load(encodings_file) self.assertEqual(8, len(encodings['activation_encodings'])) self.assertEqual(5, len(encodings['param_encodings'])) # close tf sessions sess.close() sim.session.close() del sim
def test_export_cpu_model(self): """ Create QuantSim for a CPU model, compute encodings and export out a resulting model """ tf.compat.v1.reset_default_graph() with tf.device('/cpu:0'): model = tf.keras.Sequential() model.add( tf.keras.layers.Conv2D(32, kernel_size=3, input_shape=(28, 28, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add( tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')) model.summary() sess = tf.compat.v1.Session() initialize_uninitialized_vars(sess) sim = QuantizationSimModel(sess, [model.input.op.name], [model.output.op.name], use_cuda=False) def dummy_forward_pass(sess, args): model_output = sess.graph.get_tensor_by_name(model.output.name) model_output = model_output.consumers()[0].outputs[0] model_input = sess.graph.get_tensor_by_name(model.input.name) dummy_input = np.random.randn(20, 28, 28, 3) sess.run(model_output, feed_dict={model_input: dummy_input}) sim.compute_encodings(dummy_forward_pass, None) # Make some changes to model parameters to see if they are part of the exported model with sim.session.graph.as_default(): first_bias_tensor = sim.session.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = sim.session.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 0)) first_bias_tensor_var = [ var for var in tf.compat.v1.global_variables() if var.name == 'conv2d/bias:0' ][0] first_bias_tensor_var.load(np.ones(32), sim.session) all_op_types = [op.type for op in sim.session.graph.get_operations()] self.assertIn('QcQuantize', all_op_types) sim.export('/tmp', 'quant_sim_model') with open('/tmp/quant_sim_model.encodings') as json_file: encoding_data = json.load(json_file) activation_keys = list(encoding_data["activation_encodings"].keys()) self.assertTrue(activation_keys[0] == "conv2d/Relu:0") self.assertTrue( isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"], list)) act_encoding_keys = encoding_data["activation_encodings"][ "conv2d/Relu:0"][0].keys() self.assertTrue("bitwidth" in act_encoding_keys) self.assertTrue("is_symmetric" in act_encoding_keys) self.assertTrue("max" in act_encoding_keys) self.assertTrue("min" in act_encoding_keys) self.assertTrue("offset" in act_encoding_keys) self.assertTrue("scale" in act_encoding_keys) param_keys = list(encoding_data["param_encodings"].keys()) self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0") self.assertTrue( isinstance( encoding_data["param_encodings"] ["conv2d/Conv2D/ReadVariableOp:0"], list)) param_encoding_keys = encoding_data["param_encodings"][ "conv2d/Conv2D/ReadVariableOp:0"][0].keys() self.assertTrue("bitwidth" in param_encoding_keys) self.assertTrue("is_symmetric" in param_encoding_keys) self.assertTrue("max" in param_encoding_keys) self.assertTrue("min" in param_encoding_keys) self.assertTrue("offset" in param_encoding_keys) self.assertTrue("scale" in param_encoding_keys) new_sess = load_model_from_meta('/tmp/quant_sim_model.meta') first_bias_tensor = new_sess.graph.get_tensor_by_name( 'conv2d/BiasAdd/ReadVariableOp:0') first_bias_tensor_val = new_sess.run(first_bias_tensor) self.assertTrue(np.any(first_bias_tensor_val == 1)) all_op_types = [op.type for op in new_sess.graph.get_operations()] self.assertNotIn('QcQuantize', all_op_types) sess.close() sim.session.close() del sim