예제 #1
0
    def test_compare_qat_qc_quantize_quarter_range_scaled_input(self):
        """
        compare qat asymmetric quantization with  qc quantize implementation
        :return:
        """

        quantizer = libpymo.TensorQuantizer(
            libpymo.QuantizationMode.QUANTIZATION_TF,
            libpymo.RoundingMode.ROUND_NEAREST)

        np.random.seed(1)
        random_input = 10 * np.random.normal(size=[1, 3, 224, 224]) - 20

        # 1/4 range min, max  (no scaling input)
        x_min = min(0., 0.5 * float(random_input.min()))
        x_max = max(0., 0.5 * float(random_input.max()))

        x_min = min(x_min, 0)
        x_max = max(x_max, 0)

        #  qc quantize
        self.set_quantizer_values(quantizer, x_min, x_max)
        # print(quantizer.encoding.min, quantizer.encoding.max, quantizer.encoding.delta, quantizer.encoding.offset)

        # aimet quantizer
        output_tensor = np.zeros((1, 3, 224, 224)).astype(np.float32)
        quantizer.quantizeDequantize(random_input, output_tensor, x_min, x_max,
                                     8, False)

        # qat asymmenteic quantizer output as float32
        x_quant = self.qat_python_asymmetric_quantizer(
            random_input, 8, x_max, x_min).astype(np.float32)

        # compare qc quantize output and qat asymmetric quantizer output
        self.assertTrue(np.allclose(x_quant, output_tensor))
예제 #2
0
    def test_sanity(self):
        quantizer = libpymo.TensorQuantizer(
            libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
            libpymo.RoundingMode.ROUND_NEAREST)

        np.random.seed(10)
        random_input = np.random.randn(1, 3, 224, 224).astype(np.float32)

        self.assertFalse(quantizer.isEncodingValid)
        quantizer.updateStats(random_input, False)
        self.assertFalse(quantizer.isEncodingValid)

        encoding = quantizer.computeEncoding(8, False, False, False)
        print(quantizer.encoding.min, quantizer.encoding.max,
              quantizer.encoding.delta, quantizer.encoding.offset)
        self.assertTrue(quantizer.isEncodingValid)

        self.assertEqual(quantizer.quantScheme,
                         libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)
        self.assertEqual(quantizer.roundingMode,
                         libpymo.RoundingMode.ROUND_NEAREST)

        input_tensor = np.random.randn(1, 3, 224, 224).astype(np.float32)
        output_tensor = np.zeros((1, 3, 224, 224)).astype(np.float32)

        quantizer.quantizeDequantize(input_tensor, output_tensor, encoding.min,
                                     encoding.max, 8, False)

        # Check that the output tensor did get updated
        self.assertFalse(np.all(output_tensor == 0))

        # Check that the quantized tensor is close to the input tensor but not the same
        self.assertTrue(np.allclose(output_tensor, input_tensor, atol=0.2))
        self.assertFalse(np.allclose(output_tensor, input_tensor, atol=0.1))
예제 #3
0
 def __setstate__(self, state):
     self.session = None
     # Create the cpp tensor quantizer reference
     self.quant_op_name = state.quant_op_name
     self.quantizer_type = state.quantizer_type
     self.tensor_quantizer = libpymo.TensorQuantizer(
         state.quant_scheme, state.rounding_mode)
     self.tensor_quantizer.isEncodingValid = state.is_encoding_valid
예제 #4
0
    def test_qc_quantize_recurrent_param_op(self):
        """
        test custom recurrent param quantize op with CPU
        """
        zero_out_module = tf.load_op_library('libaimet_tf_ops.so')
        graph = tf.Graph()
        config = tf.compat.v1.ConfigProto(log_device_placement=False)
        sess = tf.compat.v1.Session(graph=graph, config=config)
        bitwidth = 8
        use_symm_encoding = True

        with graph.as_default():
            # place holder for the input
            with tf.device("/device:CPU:0"):
                inp = tf.compat.v1.placeholder(tf.float32,
                                               shape=[10],
                                               name='input')
                tensor_quantizer = libpymo.TensorQuantizer(
                    libpymo.QuantizationMode.QUANTIZATION_TF,
                    libpymo.RoundingMode.ROUND_NEAREST)
                tensor_quantizer_val = libpymo.PtrToInt64(tensor_quantizer)
                tensor_quant_ref = tf.Variable(
                    initial_value=tensor_quantizer_val,
                    trainable=False,
                    dtype=tf.int64)

                time_step_tensor = tf.constant(1, dtype=tf.int32)

                encoding_min = tf.Variable(initial_value=-0.5,
                                           trainable=True,
                                           dtype=tf.double)
                encoding_max = tf.Variable(initial_value=0.5,
                                           trainable=True,
                                           dtype=tf.double)
                bit_width = tf.Variable(initial_value=bitwidth,
                                        trainable=False,
                                        dtype=tf.int8)
                use_symmetric_encoding = tf.Variable(
                    initial_value=use_symm_encoding,
                    trainable=False,
                    dtype=tf.bool)

                mode_var = tf.Variable(initial_value=int(
                    libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
                                       trainable=False,
                                       dtype=tf.int32)

                sess.run([
                    mode_var.initializer, tensor_quant_ref.initializer,
                    encoding_min.initializer, encoding_max.initializer,
                    bit_width.initializer, use_symmetric_encoding.initializer
                ])

                pass_through_op_output = zero_out_module.qc_quantize_recurrent_param(
                    name='quant_op',
                    in_tensor=inp,
                    op_mode=mode_var,
                    tensor_quantizer_reference=tensor_quant_ref,
                    encoding_min=encoding_min,
                    encoding_max=encoding_max,
                    bit_width=bit_width,
                    use_symmetric_encoding=use_symmetric_encoding,
                    time_steps=time_step_tensor)

        inp_tensor = sess.graph.get_tensor_by_name('input:0')
        # inp_data = np.random.rand(10).astype(np.float32)
        np.random.seed(18)
        inp_data = np.random.randint(low=-1, high=2,
                                     size=10).astype(np.float32)

        # get the output
        print(inp_data)
        out_data = sess.run(pass_through_op_output,
                            feed_dict={inp_tensor: inp_data})
        print(out_data)

        # compare qc_quantize op's output with input
        # encodings being set to -0.5 and 0.5 should not have a bearing on this quantized output
        # we should not observe truncation if op's encoding min/max input values are used instead of cached values
        self.assertTrue(np.allclose(out_data, inp_data, atol=1e-6))
        sess.close()
예제 #5
0
    def test_qc_quantize_op_straight_through_gradient_computation(self):
        """
        test to validate tensorflow quantize op straight through estimator gradient computation
        """

        from aimet_tensorflow import quantsim_straight_through_grad

        zero_out_module = tf.load_op_library('libaimet_tf_ops.so')
        graph = tf.Graph()
        config = tf.compat.v1.ConfigProto(log_device_placement=False)
        sess = tf.compat.v1.Session(graph=graph, config=config)
        with graph.as_default():
            inp = tf.compat.v1.placeholder(tf.float32,
                                           shape=[2, 2],
                                           name='input')
            tensor_quantizer = libpymo.TensorQuantizer(
                libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                libpymo.RoundingMode.ROUND_NEAREST)
            tensor_quantizer_val = libpymo.PtrToInt64(tensor_quantizer)
            tensor_quant_ref = tf.Variable(initial_value=tensor_quantizer_val,
                                           trainable=False,
                                           dtype=tf.int64)

            mode_var = tf.Variable(initial_value=int(
                libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
                                   trainable=False,
                                   dtype=tf.int32)

            # fix min max and bitwidth to be used
            encoding_min = tf.Variable(initial_value=0.0,
                                       trainable=True,
                                       dtype=tf.double)
            encoding_max = tf.Variable(initial_value=5.0,
                                       trainable=True,
                                       dtype=tf.double)
            bit_width = tf.Variable(initial_value=8,
                                    trainable=False,
                                    dtype=tf.int8)
            use_symmetric_encoding = tf.Variable(initial_value=False,
                                                 trainable=False,
                                                 dtype=tf.bool)

            sess.run([
                mode_var.initializer, tensor_quant_ref.initializer,
                encoding_min.initializer, encoding_max.initializer,
                bit_width.initializer, use_symmetric_encoding.initializer
            ])

            # use default gradient
            pass_through_op_output = zero_out_module.qc_quantize(
                name='quant_op',
                in_tensor=inp,
                op_mode=mode_var,
                tensor_quantizer_reference=tensor_quant_ref,
                encoding_min=encoding_min,
                encoding_max=encoding_max,
                bit_width=bit_width,
                use_symmetric_encoding=use_symmetric_encoding)

            # pass_through_op = graph.get_operation_by_name('quant_op')

        inp_tensor = sess.graph.get_tensor_by_name('input:0')

        # set the encodings
        tensor_quantizer.isEncodingValid = True
        mode_var.load(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                      sess)

        # compute default gradient
        grads = tf.gradients(pass_through_op_output, [inp_tensor])
        dlossbydx = grads

        # send input, note the last value sent here is > 5.0 ,
        # we set encodings earlier to be min = 0.0 , max = 5.0
        # input has data > p
        inp_data = [[1.4581, 0.4829], [0.3125, 5.6150]]
        # check the gradient returned is a gated version, in this case should be [[1.0, 1.0],[1.0, 0.0]]
        with graph.as_default():
            input_gradient = sess.run([dlossbydx],
                                      feed_dict={inp_tensor: inp_data})[0]

        # validate valid clamping in gradient computation
        self.assertTrue(input_gradient[0][0][0] == 1.0)
        self.assertTrue(input_gradient[0][0][1] == 1.0)
        self.assertTrue(input_gradient[0][1][0] == 1.0)
        self.assertTrue(input_gradient[0][1][1] == 0.0)

        # pass input in correct range
        inp_data = [[1.4581, 0.4829], [0.3125, 1.6150]]
        # check the gradient returned is a gated version, in this case should be [[1.0, 1.0],[1.0, 0.0]]
        with graph.as_default():
            input_gradient = sess.run([dlossbydx],
                                      feed_dict={inp_tensor: inp_data})[0]

        # validate no clamping case in gradient computation
        self.assertTrue(input_gradient[0][0][0] == 1.0)
        self.assertTrue(input_gradient[0][0][1] == 1.0)
        self.assertTrue(input_gradient[0][1][0] == 1.0)
        self.assertTrue(input_gradient[0][1][1] == 1.0)

        # pass input with data < n , first value here is -0.5
        inp_data = [[-0.5, 0.4829], [0.3125, 1.6150]]
        # check the gradient returned is a gated version, in this case should be [[1.0, 1.0],[1.0, 0.0]]
        with graph.as_default():
            input_gradient = sess.run([dlossbydx],
                                      feed_dict={inp_tensor: inp_data})[0]

        # validate valid clamping case in gradient computation
        self.assertTrue(input_gradient[0][0][0] == 0.0)
        self.assertTrue(input_gradient[0][0][1] == 1.0)
        self.assertTrue(input_gradient[0][1][0] == 1.0)
        self.assertTrue(input_gradient[0][1][1] == 1.0)
예제 #6
0
    def test_qc_quantize_op_cpu(self):
        """
        test custom op with CPU
        """
        zero_out_module = tf.load_op_library('libaimet_tf_ops.so')
        graph = tf.Graph()
        config = tf.compat.v1.ConfigProto(log_device_placement=False)
        sess = tf.compat.v1.Session(graph=graph, config=config)
        bitwidth = 8
        use_symm_encoding = True

        with graph.as_default():
            # place holder for the input
            with tf.device("/device:CPU:0"):
                inp = tf.compat.v1.placeholder(tf.float32,
                                               shape=[10],
                                               name='input')
                tensor_quantizer = libpymo.TensorQuantizer(
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                    libpymo.RoundingMode.ROUND_NEAREST)
                tensor_quantizer_val = libpymo.PtrToInt64(tensor_quantizer)
                tensor_quant_ref = tf.Variable(
                    initial_value=tensor_quantizer_val,
                    trainable=False,
                    dtype=tf.int64)

                encoding_min = tf.Variable(initial_value=0.0,
                                           trainable=True,
                                           dtype=tf.double)
                encoding_max = tf.Variable(initial_value=0.0,
                                           trainable=True,
                                           dtype=tf.double)
                bit_width = tf.Variable(initial_value=bitwidth,
                                        trainable=False,
                                        dtype=tf.int8)
                use_symmetric_encoding = tf.Variable(
                    initial_value=use_symm_encoding,
                    trainable=False,
                    dtype=tf.bool)

                mode_var = tf.Variable(initial_value=int(
                    libpymo.TensorQuantizerOpMode.updateStats),
                                       trainable=False,
                                       dtype=tf.int32)

                sess.run([
                    mode_var.initializer, tensor_quant_ref.initializer,
                    encoding_min.initializer, encoding_max.initializer,
                    bit_width.initializer, use_symmetric_encoding.initializer
                ])

                pass_through_op_output = zero_out_module.qc_quantize(
                    name='quant_op',
                    in_tensor=inp,
                    op_mode=mode_var,
                    tensor_quantizer_reference=tensor_quant_ref,
                    encoding_min=encoding_min,
                    encoding_max=encoding_max,
                    bit_width=bit_width,
                    use_symmetric_encoding=use_symmetric_encoding)

        inp_tensor = sess.graph.get_tensor_by_name('input:0')
        inp_data = np.random.rand(10)

        # get the output
        print(inp_data)
        out_data = sess.run(pass_through_op_output,
                            feed_dict={inp_tensor: inp_data})
        print(out_data)

        # compare qc_quantize op's output with input
        self.assertTrue(np.allclose(out_data, inp_data))

        # compute encodings
        self.assertFalse(tensor_quantizer.isEncodingValid)
        encoding = tensor_quantizer.computeEncoding(bitwidth,
                                                    use_symm_encoding, False,
                                                    False)
        self.assertTrue(tensor_quantizer.isEncodingValid)
        print('min=', encoding.min, ', max=', encoding.max)

        # get the output
        inp_data = np.random.rand(10) * 2
        print(inp_data)
        mode_var.load(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                      sess)
        out_data = sess.run(pass_through_op_output,
                            feed_dict={inp_tensor: inp_data})
        print(out_data)

        # compare qc_quantize op's output with input
        self.assertFalse(np.allclose(out_data, inp_data))

        sess.close()