예제 #1
0
    def test_set_and_freeze_op_mode(self):
        """ Create QuantSim for a CPU model, test set and freeze op mode """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        op_mode = int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize)
        quantizer.set_op_mode(op_mode)
        quantizer.freeze_encoding()
        self.assertEqual(op_mode, quantizer.get_op_mode())

        new_op_mode = int(libpymo.TensorQuantizerOpMode.passThrough)
        quantizer.set_op_mode(new_op_mode)
        self.assertNotEqual(new_op_mode, quantizer.get_op_mode())
        self.assertEqual(op_mode, quantizer.get_op_mode())

        session.close()
예제 #2
0
    def test_quantize_simple_rnn_save_and_load_checkpoint(self):
        """ Test model export for recurrent models """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            x = tf.keras.layers.SimpleRNN(10,
                                          name='rnn1',
                                          return_sequences=True)(inputs)
            x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x)

            _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax,
                                      name="fc")(x)

            init = tf.global_variables_initializer()
            sess.run(init)

        sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'],
                                   use_cuda=False)

        def eval(sess, input_tensor):
            model_output = sess.graph.get_tensor_by_name('fc/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            out = sess.run(model_output, feed_dict={model_input: input_tensor})
            return out

        def dummy_forward_pass(sess, args):
            dummy_input = np.random.randn(1, 3, 100)
            eval(sess, dummy_input)

        sim.compute_encodings(dummy_forward_pass, None)
        random_tensor = np.random.randn(1, 3, 100)
        old_out = eval(sim.session, random_tensor)

        save_checkpoint(sim, './data/', 'simple_rnn_save')
        new_sim = load_checkpoint('./data', 'simple_rnn_save')

        # Check to make sure that inference through the new sim produces exactly the same output as the old sim
        # This checks that quantization parameters have been restored correctly
        # Also checks that we are able to invoke quantize-dequantize ops in the new session (so pymo objects were
        # restored correctly etc.)
        new_out = eval(new_sim.session, random_tensor)
        self.assertTrue(np.allclose(old_out, new_out))
        sim.session.close()
        del sim
예제 #3
0
    def test_set_and_freeze_param_encodings(self):
        """ Test set and freeze parameter encodings functionality """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        param_encodings = {
            'conv2d/Conv2D/ReadVariableOp:0': [{
                'bitwidth': 4,
                'is_symmetric': False,
                'max': 0.14584073424339294,
                'min': -0.12761062383651733,
                'offset': -7.0,
                'scale': 0.01823008991777897
            }]
        }
        # export encodings to JSON file
        encoding_file_path = os.path.join('./', 'dummy.encodings')
        with open(encoding_file_path, 'w') as encoding_fp:
            json.dump(param_encodings, encoding_fp, sort_keys=True, indent=4)

        sim.set_and_freeze_param_encodings(encoding_path='./dummy.encodings')

        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        encoding = param_encodings['conv2d/Conv2D/ReadVariableOp:0'][0]

        encoding_max = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)
        encoding_min = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)

        self.assertEqual(encoding_min, encoding.get('min'))
        self.assertEqual(encoding_max, encoding.get('max'))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                         quantizer.get_op_mode())
        self.assertEqual(quantizer.is_encoding_valid(), True)

        session.close()

        # Delete encodings JSON file
        if os.path.exists("./dummy.encodings"):
            os.remove("./dummy.encodings")
예제 #4
0
    def test_insert_quant_op_recurrent(self):
        """ test insertion of quant ops to recurrent layer with conditional blocks """

        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            # Add an RNN layer
            x = tf.keras.layers.SimpleRNN(12)(inputs)
            _ = tf.keras.layers.Dense(12,
                                      activation=tf.nn.softmax,
                                      name="simplernn_model")(x)

        init = tf.compat.v1.global_variables_initializer()
        sess.run(init)
        ops = sess.graph.get_operations()
        quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized"
        self.assertFalse(
            quant_op_inside_while_block_name in [op.name for op in ops])

        # construct a quantization sim model
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['simplernn_model/Softmax'],
                                   use_cuda=False)

        # get ops and make sure we have a quantized op added to the conditional block
        ops = sim.session.graph.get_operations()
        self.assertTrue(
            quant_op_inside_while_block_name in [op.name for op in ops])
        sim.session.close()
        del sim
예제 #5
0
    def test_matmul_param_selection_lstm(self):
        """ Test apis to select input params to MatMuls within LSTM for quantization """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            x = tf.keras.layers.LSTM(12, name='lstm0')(inputs)
            _ = tf.keras.layers.Dense(12,
                                      activation=tf.nn.softmax,
                                      name="matmul0")(x)

            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)
            # _ = tf.compat.v1.summary.FileWriter('./lstm', sess.graph)

            matmul_with_split_inside_lstm = "lstm0/while/MatMul"
            tf_split_op_in = sess.graph.get_operation_by_name(
                "lstm0/while/split")
            tf_matmul_with_split_inside_lstm = sess.graph.get_operation_by_name(
                matmul_with_split_inside_lstm)
            param_in_through_split = sess.graph.get_tensor_by_name(
                "lstm0/while/split/ReadVariableOp:0")

            can_modify_op, param_in = QuantizationSimModel._get_op_to_modify_with_param_in(
                tf_matmul_with_split_inside_lstm, 1)

            self.assertEqual(can_modify_op, tf_split_op_in)
            self.assertEqual(param_in, param_in_through_split)

            matmul_with_slice_inside_lstm = "lstm0/while/MatMul_5"
            tf_strided_slice_op_in = sess.graph.get_operation_by_name(
                "lstm0/while/strided_slice_1")
            tf_matmul_with_slice_inside_lstm = sess.graph.get_operation_by_name(
                matmul_with_slice_inside_lstm)
            param_in_through_strided_slice = sess.graph.get_tensor_by_name(
                "lstm0/while/ReadVariableOp_1:0")

            can_modify_op, param_in = QuantizationSimModel._get_op_to_modify_with_param_in(
                tf_matmul_with_slice_inside_lstm, 1)

            self.assertEqual(can_modify_op, tf_strided_slice_op_in)
            self.assertEqual(param_in, param_in_through_strided_slice)

            sess.close()
예제 #6
0
    def test_compute_encodings(self):
        """ Test that ops not evaluated during compute encodings are set to passThrough mode. """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        test_inp = np.ndarray((1, 32, 32, 3))

        def dummy_forward_func(sess, _):
            input_tensor = sess.graph.get_tensor_by_name('input_1:0')
            output_tensor = sess.graph.get_tensor_by_name('flatten/Reshape:0')
            sess.run(output_tensor, feed_dict={input_tensor: test_inp})

        with sess.as_default():
            _ = keras_model_functional()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)
            sim = QuantizationSimModel(sess, ['input_1'],
                                       ['keras_model_functional/Softmax'])
            sim.compute_encodings(dummy_forward_func, None)

            for name, quant_info in sim._activation_quantizers.items():
                if name in [
                        'keras_model_functional/Softmax_quantized',
                        'keras_model_functional/BiasAdd_quantized'
                ]:
                    # Check that quantizers after op evaluated in compute_encodings are in passThrough (3) mode
                    self.assertEqual(quant_info.get_op_mode(), 3)
                    self.assertFalse(
                        quant_info.tensor_quantizer.isEncodingValid)
                elif name in ['scope_1/conv2d_3/BiasAdd_quantized']:
                    # Check that passThrough quantizers remain as passThrough (3)
                    self.assertEqual(quant_info.get_op_mode(), 3)
                    self.assertFalse(
                        quant_info.tensor_quantizer.isEncodingValid)
                else:
                    # Check that all other quantizers are in quantizeDequantize (2) mode
                    self.assertEqual(quant_info.get_op_mode(), 2)
                    self.assertTrue(
                        quant_info.tensor_quantizer.isEncodingValid)

            input_tensor = sim.session.graph.get_tensor_by_name('input_1:0')
            output_tensor = sim.session.graph.get_tensor_by_name(
                'keras_model_functional/Softmax:0')
            sim.session.run(output_tensor, feed_dict={input_tensor: test_inp})
            sim.session.close()
            del sim
예제 #7
0
    def test_set_and_freeze_encoding(self):
        """ Create QuantSim for a CPU model, test set and freeze encoding """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        encoding = quantizer.compute_encoding(8, False)
        print(encoding.max, encoding.min)
        # Set and freeze encoding
        quantizer.set_encoding(encoding)
        quantizer.freeze_encoding()

        old_encoding_min = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_encoding_max = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(encoding.min, old_encoding_min)
        self.assertEqual(encoding.max, old_encoding_max)
        self.assertEqual(quantizer.is_encoding_valid(), True)

        # Try updating encoding min and max with new values, but values can not be changed
        encoding.min = -0.4
        encoding.max = 0.6
        quantizer.set_encoding(encoding)

        self.assertEqual(
            old_encoding_min,
            quantizer.get_variable_from_op(QuantizeOpIndices.encoding_min))
        self.assertEqual(
            old_encoding_max,
            quantizer.get_variable_from_op(QuantizeOpIndices.encoding_max))

        session.close()
예제 #8
0
    def test_get_encoding(self):
        """ Create QuantSim for a CPU model, test get encoding """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertRaises(AssertionError, lambda: quantizer.get_encoding())

        session.close()
예제 #9
0
    def test_construction_cpu_model(self):
        """
        Create QuantSim for a CPU model and check that quantizers have been added to the graph
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=False)

        # One run through the model to check if the ops got added correctly
        model_output = sess.graph.get_tensor_by_name(
            'conv2d_1/BiasAdd_quantized:0')
        model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
        dummy_input = np.random.randn(20, 28, 28, 3)
        sess.run(model_output, feed_dict={model_input: dummy_input})

        # Check that quantized ops got added for all params
        quant_ops = [
            op for op in sess.graph.get_operations() if op.type == 'QcQuantize'
        ]
        for op in quant_ops:
            print(op.name)
        self.assertEqual(10, len(quant_ops))

        # Check that the quant ops are correctly connected in the graph
        self.assertEqual('Conv2D', quant_ops[0].outputs[0].consumers()[0].type)
        self.assertEqual('BiasAdd',
                         quant_ops[1].outputs[0].consumers()[0].type)
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.passThrough),
                         sess.run(quant_ops[1].inputs[1]))

        # Check that op-mode is set correctly
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sess.run(quant_ops[0].inputs[1]))

        sess.close()
        sim.session.close()
        del sim
예제 #10
0
    def test_quantize_lstm_deepspeech_time_major_false_quantsim_and_forward_pass(
            self):
        """ Test connected graph construction on a model with lstm op """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add a LSTM layer with 12 internal units.
            x, state_h, state_c = tf.keras.layers.LSTM(
                12,
                return_state=True,
                return_sequences=True,
                name='lstm_stacked')(inputs)
            x2 = tf.keras.layers.LSTM(12, name='last_lstm')(x)
            _ = tf.keras.layers.Dense(12,
                                      activation=tf.nn.softmax,
                                      name="lstm_model")(x2)

        init = tf.global_variables_initializer()
        sess.run(init)

        sim = QuantizationSimModel(sess, ['input_1'], ['lstm_model/Softmax'],
                                   use_cuda=False)

        # validate quantsim
        # get ops and make sure we have a quantized op added to the conditional block
        quantized_graph_op_names = self._get_quant_ops_from_tf_graph(
            sim.session.graph)

        # _ = tf.summary.FileWriter('./lstm_tm', sess.graph)
        self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names,
                                                    'lstm_stacked', True,
                                                    False)
        self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names,
                                                    'last_lstm')

        # validate forward pass
        self.validate_general_lstm_forward_pass_and_encoding(sess, sim, 9, 14)
        self.validate_general_lstm_forward_pass_and_encoding(sess, sim, 9, 14)

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
예제 #11
0
    def test_quantize_lstm_time_major_true_quantsim_and_forward_pass(self):
        """ Test connected graph construction on a model with lstm op """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add a LSTM layer with 12 internal units.
            x = tf.keras.layers.LSTM(12, time_major=True,
                                     name='lstm_tm')(inputs)
            _ = tf.keras.layers.Dense(12,
                                      activation=tf.nn.softmax,
                                      name="lstm_model")(x)

        init = tf.global_variables_initializer()
        sess.run(init)
        # _ = tf.summary.FileWriter('./lstm', sess.graph)

        sim = QuantizationSimModel(sess, ['input_1'], ['lstm_model/Softmax'],
                                   use_cuda=False)

        # validate quantsim
        # get ops and make sure we have a quantized op added to the conditional blocks
        quantized_graph_op_names = self._get_quant_ops_from_tf_graph(
            sim.session.graph)

        batches = 32

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'lstm_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(batches, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        self.validate_internal_lstm_quantisim_nodes(quantized_graph_op_names,
                                                    'lstm_tm')

        # validate forward pass
        self.validate_general_lstm_forward_pass_and_encoding(sess, sim)

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
예제 #12
0
 def test_skip_quantizing_dtype_int(self):
     """ Test that op with dtype int32 is skipped during quantization """
     tf.compat.v1.reset_default_graph()
     with tf.compat.v1.Session() as sess:
         _ = model_with_dtype_int()
         initialize_uninitialized_vars(sess)
         sim = QuantizationSimModel(sess, ['input_1', 'input_2'],
                                    ['model_with_dtype_int/Softmax'],
                                    use_cuda=False)
         self.assertEqual(6, len(sim._activation_quantizers))
         self.assertTrue(
             'input_1_quantized' not in sim._activation_quantizers)
         self.assertTrue('input_2_quantized' in sim._activation_quantizers)
         sim.session.close()
         del sim
예제 #13
0
    def test_quantize_simple_rnn_export(self):
        """ Test model export for recurrent models """
        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)

        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(3, 100))

            # Add an RNN layer with 12 internal units.
            x = tf.keras.layers.SimpleRNN(10,
                                          name='rnn1',
                                          return_sequences=True)(inputs)
            x = tf.keras.layers.SimpleRNN(10, name='rnn2')(x)

            _ = tf.keras.layers.Dense(10, activation=tf.nn.softmax,
                                      name="fc")(x)

            init = tf.global_variables_initializer()
            sess.run(init)

        sim = QuantizationSimModel(sess, ['input_1'], ['fc/Softmax'],
                                   use_cuda=False)

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name('fc/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(1, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)
        sim.export('./data', 'rnn_quantsim')

        new_sess = load_model_from_meta('./data/rnn_quantsim.meta')

        dummy_forward_pass(new_sess, None)

        all_op_types = [op.type for op in new_sess.graph.get_operations()]
        self.assertNotIn('QcQuantize', all_op_types)
        self.assertNotIn('QcQuantizeRecurrentParam', all_op_types)

        # Load the encodings file to check if the encodings were exported correctly
        with open("./data/rnn_quantsim.encodings", "r") as encodings_file:
            encodings = json.load(encodings_file)
            self.assertEqual(8, len(encodings['activation_encodings']))
            self.assertEqual(5, len(encodings['param_encodings']))

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
예제 #14
0
    def test_empty_config_file(self):
        """ Check that with an empty config file, all op modes and use symmetric encoding settings are set to
        passThrough and False respectively. """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            _ = single_residual()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

        quantsim_config = {
            "defaults": {
                "ops": {},
                "params": {}
            },
            "params": {},
            "op_type": {},
            "supergroups": [],
            "model_input": {},
            "model_output": {}
        }
        with open('./quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)

        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['single_residual/Softmax'],
                                   config_file='./quantsim_config.json')
        all_quantize_ops = [
            op for op in sim.session.graph.get_operations()
            if op.type == 'QcQuantize'
        ]
        self.assertTrue(all_quantize_ops is not None)
        for op in all_quantize_ops:
            is_symmetric_tensor = sim.session.graph.get_tensor_by_name(
                op.name + '_use_symmetric_encoding:0')
            op_mode_tensor = sim.session.graph.get_tensor_by_name(op.name +
                                                                  '_op_mode:0')
            self.assertEqual(sim.session.run(is_symmetric_tensor), False)
            self.assertEqual(sim.session.run(op_mode_tensor),
                             int(pymo.TensorQuantizerOpMode.passThrough))
        if os.path.exists('./quantsim_config.json'):
            os.remove('./quantsim_config.json')
        sess.close()
        sim.session.close()
        tf.compat.v1.reset_default_graph()
예제 #15
0
    def test_parse_config_file_model_outputs(self):
        """ Test that model output quantization parameters are set correctly when using json config file """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            _ = single_residual()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

        quantsim_config = {
            "defaults": {
                "ops": {},
                "params": {}
            },
            "params": {},
            "op_type": {},
            "supergroups": [],
            "model_input": {},
            "model_output": {
                "is_output_quantized": "True"
            }
        }
        with open('./quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)

        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['single_residual/Softmax'],
                                   config_file='./quantsim_config.json')

        op_mode_tensor = sim.session.graph.get_tensor_by_name(
            'single_residual/Softmax_quantized_op_mode:0')
        self.assertEqual(sim.session.run(op_mode_tensor),
                         int(pymo.TensorQuantizerOpMode.updateStats))

        if os.path.exists('./quantsim_config.json'):
            os.remove('./quantsim_config.json')
        sess.close()
        sim.session.close()
        tf.compat.v1.reset_default_graph()
예제 #16
0
    def test_parse_config_file_supergroups(self):
        """ Test that supergroup quantization parameters are set correctly when using json config file """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            _ = single_residual()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

        quantsim_config = {
            "defaults": {
                "ops": {
                    "is_output_quantized": "True"
                },
                "params": {}
            },
            "params": {},
            "op_type": {},
            "supergroups": [{
                "op_list": ["Conv", "AveragePool"]
            }, {
                "op_list": ["Add", "Relu"]
            }, {
                "op_list": ["Conv", "BatchNormalization"]
            }],
            "model_input": {},
            "model_output": {}
        }
        with open('./quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['single_residual/Softmax'],
                                   config_file='./quantsim_config.json')

        activation_quantizers = [
            'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized',
            'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized',
            'conv2d_4/BiasAdd_quantized', 'input_1_quantized',
            'batch_normalization/cond/Merge_quantized', 'Relu_quantized',
            'max_pooling2d/MaxPool_quantized',
            'batch_normalization_1/cond/Merge_quantized', 'Add_quantized',
            'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized',
            'single_residual/Softmax_quantized', 'Relu_1_quantized'
        ]

        for activation_quantizer in activation_quantizers:
            op_mode_tensor = sim.session.graph.get_tensor_by_name(
                activation_quantizer + '_op_mode:0')
            if activation_quantizer in [
                    'input_1_quantized', 'conv2d/BiasAdd_quantized',
                    'conv2d_3/BiasAdd_quantized', 'Add_quantized',
                    'conv2d_4/BiasAdd_quantized'
            ]:
                self.assertEqual(sim.session.run(op_mode_tensor),
                                 int(pymo.TensorQuantizerOpMode.passThrough))
            else:
                self.assertEqual(sim.session.run(op_mode_tensor),
                                 int(pymo.TensorQuantizerOpMode.updateStats))

        if os.path.exists('./quantsim_config.json'):
            os.remove('./quantsim_config.json')
        sess.close()
        sim.session.close()
        tf.compat.v1.reset_default_graph()
예제 #17
0
    def test_compute_encodings_quant_scheme_update(self):
        """
        Create QuantSim model and update quantScheme using property interface
        """

        tf.compat.v1.reset_default_graph()
        np.random.seed(0)
        tf.compat.v1.set_random_seed(0)

        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            np.random.seed(0)
            tf.compat.v1.set_random_seed(0)
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        old_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         p_quantizer.quant_scheme)
        p_quantizer.quant_scheme = QuantScheme.post_training_tf
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         p_quantizer.quant_scheme)

        # invoke compute encoding after quantScheme update
        sim.compute_encodings(dummy_forward_pass, None)
        new_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        new_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        # validate
        self.assertNotEqual(old_p_encoding_min, new_p_encoding_min)
        self.assertNotEqual(old_p_encoding_max, new_p_encoding_max)

        sess.close()
        sim.session.close()
        del sim
예제 #18
0
    def test_parse_config_file_params(self):
        """ Test that param specific quantization parameters are set correctly when using json config file """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            _ = single_residual()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

        quantsim_config = {
            "defaults": {
                "ops": {},
                "params": {
                    "is_quantized": "False",
                    "is_symmetric": "True"
                }
            },
            "params": {
                "weight": {
                    "is_quantized": "True",
                    "is_symmetric": "False"
                }
            },
            "op_type": {},
            "supergroups": [],
            "model_input": {},
            "model_output": {}
        }
        with open('./quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['single_residual/Softmax'],
                                   config_file='./quantsim_config.json')

        weight_quantizers = [
            'conv2d/Conv2D/ReadVariableOp_quantized',
            'conv2d_1/Conv2D/ReadVariableOp_quantized',
            'conv2d_2/Conv2D/ReadVariableOp_quantized',
            'conv2d_3/Conv2D/ReadVariableOp_quantized',
            'conv2d_4/Conv2D/ReadVariableOp_quantized',
            'single_residual/MatMul/ReadVariableOp_quantized'
        ]

        bias_quantizers = [
            'conv2d/BiasAdd/ReadVariableOp_quantized',
            'conv2d_1/BiasAdd/ReadVariableOp_quantized',
            'conv2d_2/BiasAdd/ReadVariableOp_quantized',
            'conv2d_3/BiasAdd/ReadVariableOp_quantized',
            'conv2d_4/BiasAdd/ReadVariableOp_quantized',
            'single_residual/BiasAdd/ReadVariableOp_quantized'
        ]

        for param_quantizer in weight_quantizers:
            is_symmetric_tensor = sim.session.graph.get_tensor_by_name(
                param_quantizer + '_use_symmetric_encoding:0')
            op_mode_tensor = sim.session.graph.get_tensor_by_name(
                param_quantizer + '_op_mode:0')
            self.assertEqual(
                sim.session.run(op_mode_tensor),
                int(pymo.TensorQuantizerOpMode.oneShotQuantizeDequantize))
            self.assertEqual(sim.session.run(is_symmetric_tensor), False)
        for param_quantizer in bias_quantizers:
            is_symmetric_tensor = sim.session.graph.get_tensor_by_name(
                param_quantizer + '_use_symmetric_encoding:0')
            op_mode_tensor = sim.session.graph.get_tensor_by_name(
                param_quantizer + '_op_mode:0')
            self.assertEqual(sim.session.run(op_mode_tensor),
                             int(pymo.TensorQuantizerOpMode.passThrough))
            self.assertEqual(sim.session.run(is_symmetric_tensor), True)

        sess.close()
        sim.session.close()
        tf.compat.v1.reset_default_graph()
예제 #19
0
    def test_parse_config_file_op_type(self):
        """ Test that op specific quantization parameters are set correctly when using json config file """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        with sess.graph.as_default():
            _ = single_residual()
            init = tf.compat.v1.global_variables_initializer()
            sess.run(init)

        quantsim_config = {
            "defaults": {
                "ops": {},
                "params": {}
            },
            "params": {},
            "op_type": {
                "Conv": {
                    "is_input_quantized": "True",
                    "params": {
                        "bias": {
                            "is_quantized": "True",
                            "is_symmetric": "True"
                        }
                    }
                },
                "Gemm": {
                    "is_input_quantized": "True",
                    "params": {
                        "bias": {
                            "is_quantized": "True",
                            "is_symmetric": "True"
                        }
                    }
                },
                "BatchNormalization": {
                    "is_input_quantized": "True"
                }
            },
            "supergroups": [],
            "model_input": {},
            "model_output": {}
        }
        with open('./quantsim_config.json', 'w') as f:
            json.dump(quantsim_config, f)
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['single_residual/Softmax'],
                                   config_file='./quantsim_config.json')

        activation_quantizers = [
            'conv2d/BiasAdd_quantized', 'conv2d_1/BiasAdd_quantized',
            'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized',
            'conv2d_4/BiasAdd_quantized', 'input_1_quantized',
            'batch_normalization/cond/Merge_quantized', 'Relu_quantized',
            'max_pooling2d/MaxPool_quantized',
            'batch_normalization_1/cond/Merge_quantized', 'Add_quantized',
            'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized',
            'single_residual/Softmax_quantized', 'Relu_1_quantized'
        ]

        weight_quantizers = [
            'conv2d/Conv2D/ReadVariableOp_quantized',
            'conv2d_1/Conv2D/ReadVariableOp_quantized',
            'conv2d_2/Conv2D/ReadVariableOp_quantized',
            'conv2d_3/Conv2D/ReadVariableOp_quantized',
            'conv2d_4/Conv2D/ReadVariableOp_quantized',
            'single_residual/MatMul/ReadVariableOp_quantized',
            'conv2d/BiasAdd/ReadVariableOp_quantized',
            'conv2d_1/BiasAdd/ReadVariableOp_quantized',
            'conv2d_2/BiasAdd/ReadVariableOp_quantized',
            'conv2d_3/BiasAdd/ReadVariableOp_quantized',
            'conv2d_4/BiasAdd/ReadVariableOp_quantized',
            'single_residual/BiasAdd/ReadVariableOp_quantized'
        ]

        for activation_quantizer in activation_quantizers:
            op_mode_tensor = sim.session.graph.get_tensor_by_name(
                activation_quantizer + '_op_mode:0')
            if activation_quantizer in [
                    'input_1_quantized', 'conv2d/BiasAdd_quantized',
                    'max_pooling2d/MaxPool_quantized',
                    'conv2d_2/BiasAdd_quantized', 'conv2d_3/BiasAdd_quantized',
                    'Relu_2_quantized', 'average_pooling2d/AvgPool_quantized'
            ]:
                self.assertEqual(sim.session.run(op_mode_tensor),
                                 int(pymo.TensorQuantizerOpMode.updateStats))
            else:
                self.assertEqual(sim.session.run(op_mode_tensor),
                                 int(pymo.TensorQuantizerOpMode.passThrough))
        for weight_quantizer in weight_quantizers:
            is_symmetric_tensor = sim.session.graph.get_tensor_by_name(
                weight_quantizer + '_use_symmetric_encoding:0')
            op_mode_tensor = sim.session.graph.get_tensor_by_name(
                weight_quantizer + '_op_mode:0')
            if weight_quantizer in [
                    'conv2d/BiasAdd/ReadVariableOp_quantized',
                    'conv2d_1/BiasAdd/ReadVariableOp_quantized',
                    'conv2d_2/BiasAdd/ReadVariableOp_quantized',
                    'conv2d_3/BiasAdd/ReadVariableOp_quantized',
                    'conv2d_4/BiasAdd/ReadVariableOp_quantized',
                    'single_residual/BiasAdd/ReadVariableOp_quantized'
            ]:
                self.assertEqual(
                    sim.session.run(op_mode_tensor),
                    int(pymo.TensorQuantizerOpMode.oneShotQuantizeDequantize))
                self.assertEqual(sim.session.run(is_symmetric_tensor), True)
            else:

                self.assertEqual(sim.session.run(op_mode_tensor),
                                 int(pymo.TensorQuantizerOpMode.passThrough))
                self.assertEqual(sim.session.run(is_symmetric_tensor), False)

        if os.path.exists('./quantsim_config.json'):
            os.remove('./quantsim_config.json')
        sess.close()
        sim.session.close()
        tf.compat.v1.reset_default_graph()
예제 #20
0
    def validate_simple_rnn_auto_insertion_and_forward_pass(self, sess):
        """
        common api to validate auto quant node insertion and forward pass for simple rnn layer
        :param sess: TensorFlow session
        :return:
        """

        np.random.seed(0)
        tf.set_random_seed(0)

        ops = sess.graph.get_operations()
        matmul_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized"
        self.assertFalse(matmul_param_quant_op_inside_while_block_name in
                         [op.name for op in ops])
        # _ = tf.summary.FileWriter('./test_simple_rnn_keras', sess.graph)
        # construct a quantization sim model
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['simplernn_model/Softmax'],
                                   use_cuda=False)

        # params that must have quantizers
        matmul_2_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul_1/ReadVariableOp_quantized"
        # check biasadd param quantizers are disabled
        param_quantizers = sim._param_quantizers
        for p_quantizer in param_quantizers.keys():
            if 'BiasAdd' in p_quantizer:
                p_quant_config = sim.quantizer_config(p_quantizer)
                self.assertFalse(p_quant_config.enabled)

        # activations with quantizers
        activation_bias_add_op_inside_while_block_name = "simple_rnn/while/BiasAdd_quantized"
        add_op_inside_while_block_name = "simple_rnn/while/add_quantized"

        # these should not have activation quantizers
        activation_matmul_op_inside_while_block_name = "simple_rnn/while/MatMul_quantized"
        activation_matmul_2_op_inside_while_block_name = "simple_rnn/while/MatMul_1_quantized"

        # get ops and make sure we have a quantized op added to the conditional block
        quantized_graph_op_names = self._get_quant_ops_from_tf_graph(
            sim.session.graph)

        # while block ops
        # bias and kernel quantizers
        self.assertTrue(matmul_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(matmul_2_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)

        # output quantizers
        self.assertFalse(activation_bias_add_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(
            add_op_inside_while_block_name in quantized_graph_op_names)

        self.assertFalse(activation_matmul_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(activation_matmul_2_op_inside_while_block_name in
                         quantized_graph_op_names)

        # check for input quantizers
        input_matmul_op_inside_while_block_name = "simple_rnn/while/TensorArrayReadV3_quantized"
        input_matmul_2_op_inside_while_block_name = "simple_rnn/while/Identity_2_quantized"
        self.assertTrue(input_matmul_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(input_matmul_2_op_inside_while_block_name in
                        quantized_graph_op_names)

        # validate encodings
        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(16, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        def eval(sess, input_tensor):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            out = sess.run(model_output, feed_dict={model_input: input_tensor})
            return out

        sim.compute_encodings(dummy_forward_pass, None)
        random_tensor = np.random.randn(16, 3, 100)
        orig_out = eval(sess, random_tensor)

        sim.compute_encodings(dummy_forward_pass, None)

        # check encoding min and max got updated
        with sim.session.graph.as_default():
            quantized_out = eval(sim.session, random_tensor)

        # check quantized output with orig output
        self.assertFalse(np.allclose(orig_out, quantized_out))

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim
예제 #21
0
def run_evaluation(args):
    # Build graph definition
    with tf.Graph().as_default():
        # Create iterator
        tf_records = glob(args.dataset_dir + '/validation*')
        preprocessing_fn = preprocessing_factory.get_preprocessing(
            args.model_name, is_training=False)
        parse_function = wrap_preprocessing(preprocessing_fn,
                                            height=args.image_size,
                                            width=args.image_size,
                                            num_classes=(1001 -
                                                         args.labels_offset),
                                            labels_offset=args.labels_offset)

        dataset = tf.data.TFRecordDataset(tf_records).repeat(1)
        dataset = dataset.map(parse_function, num_parallel_calls=1).apply(
            tf.contrib.data.batch_and_drop_remainder(args.batch_size))
        iterator = dataset.make_initializable_iterator()
        images, labels = iterator.get_next()

        network_fn = nets_factory.get_network_fn(
            args.model_name,
            num_classes=(1001 - args.labels_offset),
            is_training=False)
        with tf.device('/cpu:0'):
            images = tf.placeholder_with_default(images,
                                                 shape=(None, args.image_size,
                                                        args.image_size, 3),
                                                 name='input')
            labels = tf.placeholder_with_default(labels,
                                                 shape=(None, 1001 -
                                                        args.labels_offset),
                                                 name='labels')
        logits, end_points = network_fn(images)
        confidences = tf.nn.softmax(logits, axis=1, name='confidences')
        categorical_preds = tf.argmax(confidences,
                                      axis=1,
                                      name='categorical_preds')
        categorical_labels = tf.argmax(labels,
                                       axis=1,
                                       name='categorical_labels')
        correct_predictions = tf.equal(categorical_labels, categorical_preds)
        top1_acc = tf.reduce_mean(tf.cast(correct_predictions, tf.float32),
                                  name='top1-acc')
        top5_acc = tf.reduce_mean(tf.cast(
            tf.nn.in_top_k(predictions=confidences,
                           targets=tf.cast(categorical_labels, tf.int32),
                           k=5), tf.float32),
                                  name='top5-acc')

        saver = tf.train.Saver()
        sess = tf.Session()

        # Load model from checkpoint
        if not args.ckpt_bn_folded:
            saver.restore(sess, args.checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

    # Fold all BatchNorms before QuantSim
    sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'],
                                              [logits.name[:-2]])

    if args.ckpt_bn_folded:
        with sess.graph.as_default():
            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint_path)
    else:
        # Do Cross Layer Equalization and Bias Correction if not loading from a batchnorm folded checkpoint
        sess = equalize_model(sess, ['input'], [logits.op.name])
        conv_bn_dict = BiasCorrection.find_all_convs_bn_with_activation(
            sess, ['input'], [logits.op.name])
        quant_params = QuantParams(quant_mode=args.quant_scheme)
        bias_correction_dataset = tf.data.TFRecordDataset(tf_records).repeat(1)
        bias_correction_dataset = bias_correction_dataset.map(
            lambda x: parse_function(x)[0], num_parallel_calls=1).apply(
                tf.contrib.data.batch_and_drop_remainder(args.batch_size))
        bias_correction_params = BiasCorrectionParams(
            batch_size=args.batch_size,
            num_quant_samples=10,
            num_bias_correct_samples=512,
            input_op_names=['input'],
            output_op_names=[logits.op.name])

        sess = BiasCorrection.correct_bias(
            reference_model=sess,
            bias_correct_params=bias_correction_params,
            quant_params=quant_params,
            data_set=bias_correction_dataset,
            conv_bn_dict=conv_bn_dict,
            perform_only_empirical_bias_corr=True)

    # Define eval_func to use for compute encodings in QuantSim
    def eval_func(session, iterations):
        cnt = 0
        avg_acc_top1 = 0
        session.run('MakeIterator')
        while cnt < iterations or iterations == -1:
            try:
                avg_acc_top1 += session.run('top1-acc:0')
                cnt += 1
            except:
                return avg_acc_top1 / cnt

        return avg_acc_top1 / cnt

    # Select the right quant_scheme
    if args.quant_scheme == 'range_learning_tf':
        quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init
    elif args.quant_scheme == 'range_learning_tf_enhanced':
        quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init
    elif args.quant_scheme == 'tf':
        quant_scheme = aimet_common.defs.QuantScheme.post_training_tf
    elif args.quant_scheme == 'tf_enhanced':
        quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced
    else:
        raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme)

    # Create QuantizationSimModel
    sim = QuantizationSimModel(
        session=sess,
        starting_op_names=['IteratorGetNext'],
        output_op_names=[logits.name[:-2]],
        quant_scheme=quant_scheme,
        rounding_mode=args.round_mode,
        default_output_bw=args.default_output_bw,
        default_param_bw=args.default_param_bw,
        config_file=args.quantsim_config_file,
    )

    # Run compute_encodings
    sim.compute_encodings(eval_func,
                          forward_pass_callback_args=args.encodings_iterations)

    # Run final evaluation
    sess = sim.session

    top1_acc = eval_func(sess, -1)
    print('Avg accuracy  Top 1: {}'.format(top1_acc))
예제 #22
0
    def test_manual_quantize(self):
        """ Test quantizing a model by manually specifying ops to quantize """
        def get_manual_activations(_graph, _starting_ops, _ending_ops):
            """
            Overriding function for getting a list of ops to insert activation quantizers for
            :param _graph: Unused argument
            :param _starting_ops: Unused argument
            :param _ending_ops: Unused argument
            :return: List of ops to insert activation quantizers for, None for placeholder
            """
            return ['conv2d/Relu'], None

        def get_manual_params(_graph, _starting_ops, _ending_ops):
            """
            Overriding function for getting a list of ops to insert param quantizers for
            :param _graph: Unused argument
            :param _starting_ops: Unused argument
            :param _ending_ops: Unused argument
            :return: List of ops to insert param quantizers for, and list of param indices for these ops
            """
            return ['conv2d_1/Conv2D'], [1]

        def configure_quantization_ops(self, _conn_graph,
                                       _ops_with_param_names, _indices,
                                       _activation_op_names, _config_file):
            """
            Overriding function for configuring quantization ops inserted by QuantizationSimModel
            :param self: Self refers to QuantizationSimModel object
            :param _conn_graph: Unused argument
            :param _ops_with_param_names: Unused argument
            :param _indices: Unused argument
            :param _activation_op_names: Unused argument
            :param _config_file: Unused argument
            """
            conv2d_relu_quant_info = self._activation_quantizers[
                'conv2d/Relu_quantized']
            conv2d_relu_quant_info.enabled = False
            conv2d_relu_quant_info.enabled = True
            conv2d_1_weight_quant_info = self._param_quantizers[
                'conv2d_1/Conv2D/ReadVariableOp_quantized']
            conv2d_1_weight_quant_info.enabled = False
            conv2d_1_weight_quant_info.enabled = True

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)

        orig_get_ops_to_quantize_activations_for = QuantizationSimModel._get_ops_to_quantize_activations_for
        orig_get_ops_to_quantize_weights_for = QuantizationSimModel._get_ops_to_quantize_params_for
        orig_configure_quantization_ops = QuantizationSimModel.configure_quantization_ops
        QuantizationSimModel._get_ops_to_quantize_activations_for = get_manual_activations
        QuantizationSimModel._get_ops_to_quantize_params_for = get_manual_params
        QuantizationSimModel.configure_quantization_ops = configure_quantization_ops
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=False)
        self.assertEqual(1, len(sim._activation_quantizers))
        self.assertEqual(1, len(sim._param_quantizers))
        sess.close()
        sim.session.close()
        QuantizationSimModel._get_ops_to_quantize_activations_for = orig_get_ops_to_quantize_activations_for
        QuantizationSimModel._get_ops_to_quantize_params_for = orig_get_ops_to_quantize_weights_for
        QuantizationSimModel.configure_quantization_ops = orig_configure_quantization_ops

        sim.session.close()
        del sim
예제 #23
0
    def test_backward_pass_time_taken_lstm(self,
                                           is_quantized=True,
                                           iterations=1):
        """ perform backward pass with quantized lstm block"""

        tf.reset_default_graph()

        sess = tf.Session()
        np.random.seed(0)
        tf.set_random_seed(0)
        timesteps = 5
        with sess.graph.as_default():
            inputs = tf.keras.Input(shape=(timesteps, 100))

            # Add a lstm layer with 12 internal units.
            x = tf.keras.layers.LSTM(12)(inputs)

            _ = tf.keras.layers.Dense(10,
                                      activation=tf.nn.softmax,
                                      name="lstm_model")(x)

            init = tf.global_variables_initializer()
            sess.run(init)
        curr_sess = sess
        if is_quantized:
            sim = QuantizationSimModel(sess, ['input_1'],
                                       ['lstm_model/Softmax'],
                                       use_cuda=False)

            def dummy_forward_pass(sess, args):
                model_output = sess.graph.get_tensor_by_name(
                    'lstm_model/Softmax:0')
                model_input = sess.graph.get_tensor_by_name('input_1:0')
                dummy_input = np.random.randn(32, 5, 100)  # time_steps = 5
                sess.run(model_output, feed_dict={model_input: dummy_input})

            sim.compute_encodings(dummy_forward_pass, None)

            curr_sess = sim.session

        inp_tensor = curr_sess.graph.get_tensor_by_name('input_1:0')
        np.random.seed(0)
        w_shape = inp_tensor.shape
        batches = 32
        inp_data = np.random.rand(batches, w_shape[1], w_shape[2])
        logits = curr_sess.graph.get_tensor_by_name('lstm_model/MatMul:0')

        labels = np.random.randint(10, size=batches)
        one_hot_labels = np.eye(10)[labels]

        with curr_sess.graph.as_default():
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            labels_placeholder = tf.placeholder(tf.float32, [None, 10],
                                                name='labels')
            loss = tf.losses.softmax_cross_entropy(
                onehot_labels=labels_placeholder, logits=logits)

            update_ops = []
            global_step = tf.train.create_global_step()
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3)
            gradients = optimizer.compute_gradients(loss, var_list)

            init_global = tf.global_variables_initializer()
            init_local = tf.local_variables_initializer()
            init = tf.group(init_global, init_local)
            curr_sess.run(init)

            grad_updates = optimizer.apply_gradients(gradients,
                                                     global_step=global_step)
            update_ops.append(grad_updates)
            update_op = tf.group(*update_ops)

            with tf.control_dependencies([update_op]):
                train_op = tf.identity(loss, name='train_op')

            # start training
            time_taken_by_default_grad = 0
            for i in range(iterations):
                start_time = time.perf_counter()
                _ = curr_sess.run(train_op,
                                  feed_dict={
                                      inp_tensor: inp_data,
                                      labels_placeholder: one_hot_labels
                                  })
                exec_time = time.perf_counter() - start_time
                time_taken_by_default_grad = time_taken_by_default_grad + exec_time

            default_grad_avg_time = time_taken_by_default_grad / iterations

        # close session
        sess.close()
        if is_quantized:
            sim.session.close()
            del sim

        return default_grad_avg_time
예제 #24
0
    def test_set_get_quantizer_params_using_properties(self):
        """
        Create QuantSim for a CPU model, test param read and write using properties
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        o_quantizer = sim.quantizer_config('conv2d/Relu_quantized')
        bias_quantizer = sim.quantizer_config(
            'conv2d/BiasAdd/ReadVariableOp_quantized')

        # check if __str__ can print the object info
        print(p_quantizer)
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        p_quantizer.bitwidth = 6
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        bitwidth = o_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        o_quantizer.bitwidth = 6
        bitwidth = o_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertFalse(sym_encoding)
        bias_quantizer.use_symmetric_encoding = True
        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertTrue(sym_encoding)

        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode)
        o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC
        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode)

        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         quant_scheme)
        o_quantizer.quant_scheme = QuantScheme.post_training_tf
        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         quant_scheme)
        self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid)

        is_enabled = p_quantizer.enabled
        self.assertTrue(is_enabled)
        p_quantizer.enabled = False
        is_enabled = p_quantizer.enabled
        self.assertFalse(is_enabled)

        sim.session.close()
        del sim
예제 #25
0
    def test_save_load_ckpt_after_compute_encoding_on_orig_object(self):
        """
        Create QuantSim for a CPU model, test save and load on a quantsim model
        when encodings have been computed on original quantsim object
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(n_sess, args):
            model_output = n_sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = n_sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            n_sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # save quantsim model
        save_checkpoint(sim, './test_3', 'orig_quantsim_model')

        new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model')

        # validations
        assert (sim is not new_quantsim)

        # as we have performed computeEncodings() on saved quantsim object, these must be set to True/False
        # in loaded quantsim object as on orig model
        for quantize_op in new_quantsim._param_quantizers:
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].tensor_quantizer.
                isEncodingValid == sim._param_quantizers[quantize_op].
                tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        for quantize_op in new_quantsim._activation_quantizers:
            self.assertTrue(new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid ==
                            sim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        # delete temp folder created and close sessions
        shutil.rmtree('./test_3')
        sess.close()
        sim.session.close()
        new_quantsim.session.close()
        del sim
        del new_quantsim
예제 #26
0
    def _save_to_keras_common_test_code(self, use_cuda):
        tf.compat.v1.reset_default_graph()
        if not use_cuda:
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()
        else:
            with tf.device('/cpu:0'):
                model = tf.keras.Sequential()
                model.add(
                    tf.keras.layers.Conv2D(32,
                                           kernel_size=3,
                                           input_shape=(28, 28, 3),
                                           activation='relu'))
                model.add(tf.keras.layers.MaxPooling2D((2, 2)))
                model.add(
                    tf.keras.layers.Conv2D(64,
                                           kernel_size=3,
                                           activation='relu'))
                model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=use_cuda)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, eval_tensor_name):
            model_output = sess.graph.get_tensor_by_name(eval_tensor_name)
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, 'conv2d_1/Relu_quantized:0')
        mod_sess = sim.save_to_keras()

        # Check 1: The new graph is well formed. Try forward pass through the graph.
        dummy_forward_pass(mod_sess, 'conv2d_1/Relu_quantized_static:0')

        # Check 2: All the QcQuantizeOp nodes have no output - meaning are disconnected from the main graph
        op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantize":
                op_count += 1
                self.assertFalse(op.outputs[0].consumers())

        # Check 3: One QcQuantizeStatic for each QcQuantize op
        static_op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantizeStatic":
                static_op_count += 1
        self.assertEqual(op_count, static_op_count)

        # Check 4: Make sure the attributes are set correctly
        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Conv2D/ReadVariableOp_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(1,
                         op.get_attr("op_mode"))  # oneShotQuantizeDequantize

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/BiasAdd_quantized_static")
        self.assertEqual(3, op.get_attr("op_mode"))  # passThrough

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Relu_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(2, op.get_attr("op_mode"))  # quantizeDequantize

        sess.close()
        sim.session.close()
        del sim
예제 #27
0
    def test_save_load_ckpt_cpu_model(self):
        """
        Create QuantSim for a CPU model, test save and load on a quantsim model.
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        # save quantsim model
        save_checkpoint(sim, './test_3', 'orig_quantsim_model')

        new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model')

        # validations
        assert (sim is not new_quantsim)
        self.assertTrue(new_quantsim.session is not None)
        self.assertTrue(new_quantsim._quant_scheme == sim._quant_scheme)
        self.assertTrue(new_quantsim._rounding_mode == sim._rounding_mode)
        self.assertTrue(new_quantsim._use_cuda == sim._use_cuda)
        self.assertTrue(
            len(new_quantsim._param_quantizers) == len(sim._param_quantizers))
        self.assertTrue(
            len(new_quantsim._activation_quantizers) == len(
                sim._activation_quantizers))

        for quantize_op in new_quantsim._param_quantizers:
            self.assertFalse(
                sim._param_quantizers[quantize_op].session ==
                new_quantsim._param_quantizers[quantize_op].session)
            self.assertTrue(
                sim._param_quantizers[quantize_op].tensor_quantizer.
                getQuantScheme() == new_quantsim._param_quantizers[quantize_op]
                .tensor_quantizer.getQuantScheme())
            self.assertTrue(
                sim._param_quantizers[quantize_op].tensor_quantizer.
                roundingMode == new_quantsim._param_quantizers[quantize_op].
                tensor_quantizer.roundingMode)
            self.assertFalse(sim._param_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)
            self.assertFalse(new_quantsim._param_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)

        for quantize_op in new_quantsim._activation_quantizers:
            self.assertFalse(
                sim._activation_quantizers[quantize_op].session ==
                new_quantsim._activation_quantizers[quantize_op].session)
            self.assertTrue(sim._activation_quantizers[quantize_op].
                            tensor_quantizer.getQuantScheme() ==
                            new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.getQuantScheme())
            self.assertTrue(sim._activation_quantizers[quantize_op].
                            tensor_quantizer.roundingMode ==
                            new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.roundingMode)
            self.assertFalse(sim._activation_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)
            self.assertFalse(new_quantsim._activation_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)

        # remove the old quant sim reference and session
        # to test that everything is loaded correctly on new quantsim including tensor quantizer references
        sim.session.close()
        del sim

        # delete temp folder created and close sessions
        shutil.rmtree('./test_3')
        sess.close()
        new_quantsim.session.close()
        del new_quantsim
예제 #28
0
    def test_compute_encodings_gpu_model(self):
        """
        Create QuantSim for a CPU model and test that activation encodings are computed
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Check if encodings have been calculated
        deactivated_quantizers = [
            'conv2d_input_quantized', 'conv2d/BiasAdd_quantized',
            'conv2d_1/BiasAdd_quantized'
        ]
        for name, quantizer in sim._activation_quantizers.items():
            if name in deactivated_quantizers:
                self.assertTrue(int(libpymo.TensorQuantizerOpMode.passThrough),
                                sim.session.run(name + '_op_mode/read:0'))
            else:
                self.assertTrue(
                    quantizer.tensor_quantizer.isEncodingValid,
                    "quantizer: {} does not have a valid encoding".format(
                        name))

        # Check that op-mode is set correctly
        # Check that quantized ops got added for all params
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        sess.close()
        sim.session.close()
        del sim
예제 #29
0
    def run_inference(self,
                      ckpt_path,
                      image_files,
                      labels,
                      enable_ema=True,
                      export_ckpt=None):
        """Build and run inference on the target images and labels."""
        label_offset = 1 if self.include_background_label else 0
        with tf.Graph().as_default():
            sess = tf.Session()
            images, labels = self.build_dataset(image_files, labels, False)
            probs = self.build_model(images, is_training=False)
            if isinstance(probs, tuple):
                probs = probs[0]

            if not self.ckpt_bn_folded:
                saver = tf.train.Saver()
                saver.restore(sess, ckpt_path)
            else:
                sess.run(tf.global_variables_initializer())

        # Fold all BatchNorms before QuantSim
        sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'],
                                                  ['logits'])

        if self.ckpt_bn_folded:
            with sess.graph.as_default():
                checkpoint = ckpt_path
                saver = tf.train.Saver()
                saver.restore(sess, checkpoint)

        sess.run('MakeIterator')

        # Define an eval function to use during compute encodings
        def eval_func(sess, iterations):
            sess.run('MakeIterator')
            for _ in range(iterations):
                out_probs = sess.run('Squeeze:0')

        # Select the right quant_scheme
        if self.quant_scheme == 'range_learning_tf':
            quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init
        elif self.quant_scheme == 'range_learning_tf_enhanced':
            quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init
        elif self.quant_scheme == 'tf':
            quant_scheme = aimet_common.defs.QuantScheme.post_training_tf
        elif self.quant_scheme == 'tf_enhanced':
            quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced
        else:
            raise ValueError("Got unrecognized quant_scheme: " +
                             self.quant_scheme)

        # Create QuantizationSimModel
        sim = QuantizationSimModel(
            session=sess,
            starting_op_names=['IteratorGetNext'],
            output_op_names=['logits'],
            quant_scheme=quant_scheme,
            rounding_mode=self.round_mode,
            default_output_bw=self.default_output_bw,
            default_param_bw=self.default_param_bw,
            config_file=self.quantsim_config_file,
        )

        # Run compute_encodings
        sim.compute_encodings(eval_func, forward_pass_callback_args=500)

        # Run final evaluation
        sess = sim.session
        sess.run('MakeIterator')
        prediction_idx = []
        prediction_prob = []
        for _ in range(len(image_files) // self.batch_size):
            out_probs = sess.run('Squeeze:0')
            idx = np.argsort(out_probs)[::-1]
            prediction_idx.append(idx[:5] - label_offset)
            prediction_prob.append([out_probs[pid] for pid in idx[:5]])

        # Return the top 5 predictions (idx and prob) for each image.
        return prediction_idx, prediction_prob
예제 #30
0
    def test_export_cpu_model(self):
        """
        Create QuantSim for a CPU model, compute encodings and export out a resulting model
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Make some changes to model parameters to see if they are part of the exported model
        with sim.session.graph.as_default():
            first_bias_tensor = sim.session.graph.get_tensor_by_name(
                'conv2d/BiasAdd/ReadVariableOp:0')
            first_bias_tensor_val = sim.session.run(first_bias_tensor)
            self.assertTrue(np.any(first_bias_tensor_val == 0))
            first_bias_tensor_var = [
                var for var in tf.compat.v1.global_variables()
                if var.name == 'conv2d/bias:0'
            ][0]
            first_bias_tensor_var.load(np.ones(32), sim.session)

        all_op_types = [op.type for op in sim.session.graph.get_operations()]
        self.assertIn('QcQuantize', all_op_types)

        sim.export('/tmp', 'quant_sim_model')

        with open('/tmp/quant_sim_model.encodings') as json_file:
            encoding_data = json.load(json_file)
        activation_keys = list(encoding_data["activation_encodings"].keys())
        self.assertTrue(activation_keys[0] == "conv2d/Relu:0")
        self.assertTrue(
            isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"],
                       list))
        act_encoding_keys = encoding_data["activation_encodings"][
            "conv2d/Relu:0"][0].keys()
        self.assertTrue("bitwidth" in act_encoding_keys)
        self.assertTrue("is_symmetric" in act_encoding_keys)
        self.assertTrue("max" in act_encoding_keys)
        self.assertTrue("min" in act_encoding_keys)
        self.assertTrue("offset" in act_encoding_keys)
        self.assertTrue("scale" in act_encoding_keys)

        param_keys = list(encoding_data["param_encodings"].keys())
        self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0")
        self.assertTrue(
            isinstance(
                encoding_data["param_encodings"]
                ["conv2d/Conv2D/ReadVariableOp:0"], list))
        param_encoding_keys = encoding_data["param_encodings"][
            "conv2d/Conv2D/ReadVariableOp:0"][0].keys()
        self.assertTrue("bitwidth" in param_encoding_keys)
        self.assertTrue("is_symmetric" in param_encoding_keys)
        self.assertTrue("max" in param_encoding_keys)
        self.assertTrue("min" in param_encoding_keys)
        self.assertTrue("offset" in param_encoding_keys)
        self.assertTrue("scale" in param_encoding_keys)

        new_sess = load_model_from_meta('/tmp/quant_sim_model.meta')
        first_bias_tensor = new_sess.graph.get_tensor_by_name(
            'conv2d/BiasAdd/ReadVariableOp:0')
        first_bias_tensor_val = new_sess.run(first_bias_tensor)
        self.assertTrue(np.any(first_bias_tensor_val == 1))

        all_op_types = [op.type for op in new_sess.graph.get_operations()]
        self.assertNotIn('QcQuantize', all_op_types)
        sess.close()
        sim.session.close()
        del sim