Ejemplo n.º 1
0
    def test_set_and_freeze_op_mode(self):
        """ Create QuantSim for a CPU model, test set and freeze op mode """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        op_mode = int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize)
        quantizer.set_op_mode(op_mode)
        quantizer.freeze_encoding()
        self.assertEqual(op_mode, quantizer.get_op_mode())

        new_op_mode = int(libpymo.TensorQuantizerOpMode.passThrough)
        quantizer.set_op_mode(new_op_mode)
        self.assertNotEqual(new_op_mode, quantizer.get_op_mode())
        self.assertEqual(op_mode, quantizer.get_op_mode())

        session.close()
Ejemplo n.º 2
0
    def test_set_and_freeze_param_encodings(self):
        """ Test set and freeze parameter encodings functionality """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        param_encodings = {
            'conv2d/Conv2D/ReadVariableOp:0': [{
                'bitwidth': 4,
                'is_symmetric': False,
                'max': 0.14584073424339294,
                'min': -0.12761062383651733,
                'offset': -7.0,
                'scale': 0.01823008991777897
            }]
        }
        # export encodings to JSON file
        encoding_file_path = os.path.join('./', 'dummy.encodings')
        with open(encoding_file_path, 'w') as encoding_fp:
            json.dump(param_encodings, encoding_fp, sort_keys=True, indent=4)

        sim.set_and_freeze_param_encodings(encoding_path='./dummy.encodings')

        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        encoding = param_encodings['conv2d/Conv2D/ReadVariableOp:0'][0]

        encoding_max = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)
        encoding_min = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)

        self.assertEqual(encoding_min, encoding.get('min'))
        self.assertEqual(encoding_max, encoding.get('max'))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                         quantizer.get_op_mode())
        self.assertEqual(quantizer.is_encoding_valid(), True)

        session.close()

        # Delete encodings JSON file
        if os.path.exists("./dummy.encodings"):
            os.remove("./dummy.encodings")
Ejemplo n.º 3
0
    def test_set_and_freeze_encoding(self):
        """ Create QuantSim for a CPU model, test set and freeze encoding """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        encoding = quantizer.compute_encoding(8, False)
        print(encoding.max, encoding.min)
        # Set and freeze encoding
        quantizer.set_encoding(encoding)
        quantizer.freeze_encoding()

        old_encoding_min = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_encoding_max = quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(encoding.min, old_encoding_min)
        self.assertEqual(encoding.max, old_encoding_max)
        self.assertEqual(quantizer.is_encoding_valid(), True)

        # Try updating encoding min and max with new values, but values can not be changed
        encoding.min = -0.4
        encoding.max = 0.6
        quantizer.set_encoding(encoding)

        self.assertEqual(
            old_encoding_min,
            quantizer.get_variable_from_op(QuantizeOpIndices.encoding_min))
        self.assertEqual(
            old_encoding_max,
            quantizer.get_variable_from_op(QuantizeOpIndices.encoding_max))

        session.close()
Ejemplo n.º 4
0
    def test_get_encoding(self):
        """ Create QuantSim for a CPU model, test get encoding """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            _ = keras_model()
            init = tf.compat.v1.global_variables_initializer()

        session = tf.compat.v1.Session()
        session.run(init)

        sim = QuantizationSimModel(session, ['conv2d_input'],
                                   ['keras_model/Softmax'],
                                   use_cuda=False)
        quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertRaises(AssertionError, lambda: quantizer.get_encoding())

        session.close()
Ejemplo n.º 5
0
    def test_set_get_quantizer_params_using_properties(self):
        """
        Create QuantSim for a CPU model, test param read and write using properties
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        o_quantizer = sim.quantizer_config('conv2d/Relu_quantized')
        bias_quantizer = sim.quantizer_config(
            'conv2d/BiasAdd/ReadVariableOp_quantized')

        # check if __str__ can print the object info
        print(p_quantizer)
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        p_quantizer.bitwidth = 6
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        bitwidth = o_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        o_quantizer.bitwidth = 6
        bitwidth = o_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertFalse(sym_encoding)
        bias_quantizer.use_symmetric_encoding = True
        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertTrue(sym_encoding)

        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode)
        o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC
        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode)

        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         quant_scheme)
        o_quantizer.quant_scheme = QuantScheme.post_training_tf
        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         quant_scheme)
        self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid)

        is_enabled = p_quantizer.enabled
        self.assertTrue(is_enabled)
        p_quantizer.enabled = False
        is_enabled = p_quantizer.enabled
        self.assertFalse(is_enabled)

        sim.session.close()
        del sim
Ejemplo n.º 6
0
    def test_compute_encodings_quant_scheme_update(self):
        """
        Create QuantSim model and update quantScheme using property interface
        """

        tf.compat.v1.reset_default_graph()
        np.random.seed(0)
        tf.compat.v1.set_random_seed(0)

        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            np.random.seed(0)
            tf.compat.v1.set_random_seed(0)
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        old_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         p_quantizer.quant_scheme)
        p_quantizer.quant_scheme = QuantScheme.post_training_tf
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         p_quantizer.quant_scheme)

        # invoke compute encoding after quantScheme update
        sim.compute_encodings(dummy_forward_pass, None)
        new_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        new_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        # validate
        self.assertNotEqual(old_p_encoding_min, new_p_encoding_min)
        self.assertNotEqual(old_p_encoding_max, new_p_encoding_max)

        sess.close()
        sim.session.close()
        del sim
Ejemplo n.º 7
0
    def validate_simple_rnn_auto_insertion_and_forward_pass(self, sess):
        """
        common api to validate auto quant node insertion and forward pass for simple rnn layer
        :param sess: TensorFlow session
        :return:
        """

        np.random.seed(0)
        tf.set_random_seed(0)

        ops = sess.graph.get_operations()
        matmul_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul/ReadVariableOp_quantized"
        self.assertFalse(matmul_param_quant_op_inside_while_block_name in
                         [op.name for op in ops])
        # _ = tf.summary.FileWriter('./test_simple_rnn_keras', sess.graph)
        # construct a quantization sim model
        sim = QuantizationSimModel(sess, ['input_1'],
                                   ['simplernn_model/Softmax'],
                                   use_cuda=False)

        # params that must have quantizers
        matmul_2_param_quant_op_inside_while_block_name = "simple_rnn/while/MatMul_1/ReadVariableOp_quantized"
        # check biasadd param quantizers are disabled
        param_quantizers = sim._param_quantizers
        for p_quantizer in param_quantizers.keys():
            if 'BiasAdd' in p_quantizer:
                p_quant_config = sim.quantizer_config(p_quantizer)
                self.assertFalse(p_quant_config.enabled)

        # activations with quantizers
        activation_bias_add_op_inside_while_block_name = "simple_rnn/while/BiasAdd_quantized"
        add_op_inside_while_block_name = "simple_rnn/while/add_quantized"

        # these should not have activation quantizers
        activation_matmul_op_inside_while_block_name = "simple_rnn/while/MatMul_quantized"
        activation_matmul_2_op_inside_while_block_name = "simple_rnn/while/MatMul_1_quantized"

        # get ops and make sure we have a quantized op added to the conditional block
        quantized_graph_op_names = self._get_quant_ops_from_tf_graph(
            sim.session.graph)

        # while block ops
        # bias and kernel quantizers
        self.assertTrue(matmul_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(matmul_2_param_quant_op_inside_while_block_name in
                        quantized_graph_op_names)

        # output quantizers
        self.assertFalse(activation_bias_add_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(
            add_op_inside_while_block_name in quantized_graph_op_names)

        self.assertFalse(activation_matmul_op_inside_while_block_name in
                         quantized_graph_op_names)
        self.assertFalse(activation_matmul_2_op_inside_while_block_name in
                         quantized_graph_op_names)

        # check for input quantizers
        input_matmul_op_inside_while_block_name = "simple_rnn/while/TensorArrayReadV3_quantized"
        input_matmul_2_op_inside_while_block_name = "simple_rnn/while/Identity_2_quantized"
        self.assertTrue(input_matmul_op_inside_while_block_name in
                        quantized_graph_op_names)
        self.assertTrue(input_matmul_2_op_inside_while_block_name in
                        quantized_graph_op_names)

        # validate encodings
        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            dummy_input = np.random.randn(16, 3, 100)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        def eval(sess, input_tensor):
            model_output = sess.graph.get_tensor_by_name(
                'simplernn_model/Softmax:0')
            model_input = sess.graph.get_tensor_by_name('input_1:0')
            out = sess.run(model_output, feed_dict={model_input: input_tensor})
            return out

        sim.compute_encodings(dummy_forward_pass, None)
        random_tensor = np.random.randn(16, 3, 100)
        orig_out = eval(sess, random_tensor)

        sim.compute_encodings(dummy_forward_pass, None)

        # check encoding min and max got updated
        with sim.session.graph.as_default():
            quantized_out = eval(sim.session, random_tensor)

        # check quantized output with orig output
        self.assertFalse(np.allclose(orig_out, quantized_out))

        # close tf sessions
        sess.close()
        sim.session.close()
        del sim