def testQCQuantize_CheckInvalidEncodingsGpu(self):
            _log.info('running testQCQuantize_CheckInvalidEncodings')
            for use_gpu in [True]:
                _log.info('GPU mode is selected') if use_gpu else _log.info(
                    'CPU mode is selected')
                with self.session(use_gpu=use_gpu):
                    bw = 8
                    # Instantiate DlQuantization object
                    comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                    libpytrext.InitQuantizer(
                        ["conv1"], comp_mode, [],
                        libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                    # Set encodings
                    enc_min_list = [-10.0, 0.5, 20]
                    enc_max_list = [100.0, 150.0, 200.0, 255.0]

                    output = self.qc_quantize_module.qc_quantize_deprecated(
                        op_name='conv1',
                        training_in_progress=False,
                        config=int(
                            libpytrext.config_type.CONFIG_TYPE_SET_ENCODING),
                        bitwidth=bw,
                        in_tensors=[[]],
                        fixed_enc_mins=enc_min_list,
                        fixed_enc_maxs=enc_max_list)
                    with self.assertRaises(errors_impl.InvalidArgumentError):
                        ops.convert_to_tensor(output[0]).eval()

                    libpytrext.ResetQuantizer()
    def testQCQuantize_GetEncodingsGpu(self):
        _log.info('running testQCQuantize_GetEncodings')
        for use_gpu in [True]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                # Prepare activation tensors
                ACT_MIN = -20.0
                ACT_MAX = 25.0
                actvn_1 = constant_op.constant([-10.0, -20.0, 25.0])
                actvn_2 = constant_op.constant([8.0, -19.0, 30.0])
                actvn_3 = constant_op.constant([12.0, -31.0, 35.4])

                # Instantiate DlQuantization object
                comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF)

                # Update stats
                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS),
                    bitwidth=bw,
                    in_tensors=[actvn_1, actvn_2, actvn_3],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[])
                ops.convert_to_tensor(output_0[0]).eval()

                # Get encodings
                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=3)

                enc_min = ops.convert_to_tensor(output_1[1]).eval()
                enc_max = ops.convert_to_tensor(output_1[2]).eval()

                true_encodings = self._compute_encodings(
                    enc_min[0], enc_max[0], bw)
                expected_encodings = self._compute_encodings(
                    ACT_MIN, ACT_MAX, bw)
                error_margin = 1e-5  # Use better heuristics
                self.assertArrayNear(true_encodings, expected_encodings,
                                     error_margin)

                libpytrext.ResetQuantizer()
    def testQCQuantize_SetEncodings(self):
        _log.info('running testQCQuantize_SetEncodings')
        for use_gpu in [False]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                # Instantiate DlQuantization object
                comp_mode = libpymo.ComputationMode.COMP_MODE_CPU if use_gpu else libpymo.ComputationMode.COMP_MODE_GPU
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                # Set encodings
                # For the purpose of exact matches with expected encodings
                # we choose to avoid ranges excluding 0 as the TF algorithm
                # forces the encoding to include 0, thus differing from expected results.
                enc_min_list = [-10.0, -0.5, 0]
                enc_max_list = [100.0, 200.0, 160.0]

                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_SET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=enc_min_list,
                    fixed_enc_maxs=enc_max_list)
                ops.convert_to_tensor(output_0[0]).eval()

                # Retrieve encodings from op and validate
                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=3)

                get_enc_min = ops.convert_to_tensor(output_1[1]).eval()
                get_enc_max = ops.convert_to_tensor(output_1[2]).eval()
                for index in np.arange(0, len(enc_min_list)):
                    actual_encodings = self._compute_encodings(
                        get_enc_min[index], get_enc_max[index], bw)
                    expected_encodings = self._compute_encodings(
                        enc_min_list[index], enc_max_list[index], bw)
                    self.assertAllEqual(actual_encodings, expected_encodings)

                libpytrext.ResetQuantizer()
예제 #4
0
def load_quantized_graph(meta_graph,
                         checkpoint,
                         encodings,
                         graph=None,
                         gpu=True):
    """
    Function to call to setup the saved quantization encodings and model. When loading a quantized graph
    from saved files the quantizer must first be initialized with the quantization op names
    and the saved encodings.

    :param meta_graph: Path to meta file
    :param checkpoint: Path to checkpoint file
    :param encodings: Path to encodings file
    :param graph: Graph to load into
    :param gpu: If True, use GPU ops
    :return: Newly created tf.compat.v1.Session
    """
    comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if gpu else libpymo.ComputationMode.COMP_MODE_CPU

    # Check to see if it's a file passed in and we need to process it, or if it's the
    # actual map data
    if isinstance(encodings, str):
        with open(encodings, 'r') as f:
            encodings = json.load(f)

    quant_mode = encodings['quant_mode']
    if quant_mode not in _QUANT_MODES:
        raise ValueError('Invalid quantization mode: ' + quant_mode)
    quant_mode = _QUANT_MODES[quant_mode]

    libpytrext.ResetQuantizer()
    libpytrext.InitQuantizer(list(encodings.keys()), comp_mode, [], quant_mode)

    g = tf.Graph()
    with g.as_default():
        sess = tf.compat.v1.Session(graph=g)
        _set_activation_encodings(sess, encodings, gpu=gpu)

    # Use the provided graph, if it exists
    if not graph:
        graph = tf.Graph()
    with graph.as_default():
        sess, _ = _load_graph(graph, meta_graph, checkpoint)

    return sess
예제 #5
0
    def _prepare_graph_for_quantization(self, collect_stats=True):
        """
        Inserts the appropriate quantization ops and prequantizes the params depending upon the
        configuration parameters. Operations are inserted in the current default graph.
        Raises:
            RuntimeError: Thrown when there was an error inserting operations
        :param collect_stats: If True, stats are collected
        :return:
        """

        # Get the op query module
        query = core.OpQuery(self._sess.graph,
                             op_map=self._op_map,
                             ops_to_ignore=self._ops_to_ignore)

        # Query the known op groups and insert quantization nodes after the ops
        # Should we also be including quantization ops starting with labels? No for now...
        activation_ops = query.get_known_ops(inputs=self._input_tensor_names)

        # Query all ops with weights and quantize the input weights
        weight_ops = query.get_weight_ops(skip_bias_op=self._skip_bias)
        input_indices = query.get_weight_inputs(weight_ops)

        # Instantiate DlQuantization object
        quant_node_names = [
            self._get_quantized_name(op.name) for op in activation_ops
        ]
        libpytrext.ResetQuantizer()
        libpytrext.InitQuantizer(quant_node_names, self._comp_mode, [],
                                 self._quant_mode)

        # Add quantization ops/data
        self._insert_weight_quantization_ops(weight_ops, input_indices)
        if not self._skip_output:
            self._insert_activation_quantization_ops(activation_ops,
                                                     collect_stats)
    def testQCQuantize_Params(self):
        _log.info('running testQCQuantize_Params')
        for use_gpu in [False, True]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                PARAM_MIN = -50.0
                PARAM_MAX = 80.0
                comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                # Instantiate DlQuantization object
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                weights = constant_op.constant(
                    [-40.0, -1.0, 0.0, 1.0, 2.0, -50.0, 80.0])

                # Quantize and de-quantize params
                test_output = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(libpytrext.config_type.CONFIG_TYPE_Q_DQ_PARAMS),
                    bitwidth=bw,
                    in_tensors=[weights],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)
                quantized_weights = ops.convert_to_tensor(
                    test_output[0]).eval()
                self.assertAllClose(quantized_weights[0], weights.eval(), 1.0)

                # Examine encodings of quantized params
                out_enc_min = ops.convert_to_tensor(test_output[1]).eval()
                out_enc_max = ops.convert_to_tensor(test_output[2]).eval()
                true_encodings = self._compute_encodings(
                    out_enc_min[0], out_enc_max[0], bw)
                expected_encodings = self._compute_encodings(
                    PARAM_MIN, PARAM_MAX, bw)
                error_margin = 10  # Use better heuristics; ideally there should be 0 error margin
                self.assertArrayNear(true_encodings, expected_encodings,
                                     error_margin)

                # Repeat test with training_in_progress == true
                test_output = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=True,
                    config=int(libpytrext.config_type.CONFIG_TYPE_Q_DQ_PARAMS),
                    bitwidth=bw,
                    in_tensors=[weights],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)
                quantized_weights = ops.convert_to_tensor(
                    test_output[0]).eval()
                self.assertAllClose(quantized_weights[0], weights.eval(), 1.0)

                # Examine encodings of quantized params
                out_enc_min = ops.convert_to_tensor(test_output[1]).eval()
                out_enc_max = ops.convert_to_tensor(test_output[2]).eval()
                true_encodings = self._compute_encodings(
                    out_enc_min[0], out_enc_max[0], bw)
                expected_encodings = self._compute_encodings(
                    PARAM_MIN, PARAM_MAX, bw)
                error_margin = 10  # Use better heuristics; ideally there should be 0 error margin
                self.assertArrayNear(true_encodings, expected_encodings,
                                     error_margin)

                libpytrext.ResetQuantizer()
    def testQCQuantize_CheckZeroRepresentation(self):
        _log.info('running testQCQuantize_CheckZeroRepresentation')
        for use_gpu in [False]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                # Test all negative ranges
                act_min = -8.0
                act_max = -5.0

                # Instantiate DlQuantization object
                comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                # Set encodings
                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_SET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[act_min],
                    fixed_enc_maxs=[act_max])
                ops.convert_to_tensor(output_0[0]).eval()

                # Get encodings from op and validate
                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)

                enc_max = ops.convert_to_tensor(output_1[2]).eval()
                self.assertEqual(enc_max[0], 0.0)

                # Test all positive ranges
                act_min = 20.0
                act_max = 100.0

                # Set encodings
                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_SET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[act_min],
                    fixed_enc_maxs=[act_max])
                ops.convert_to_tensor(output_0[0]).eval()

                # Get encodings from op and validate
                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)

                enc_min = ops.convert_to_tensor(output_1[1]).eval()
                self.assertEqual(enc_min[0], 0.0)

                libpytrext.ResetQuantizer()
    def testQCQuantize_MultipleActivations(self):

        _log.info('running testQCQuantize_MultipleActivations')
        for use_gpu in [False]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                actvn_stats_0 = actvn_stats_1 = actvn_stats_2 = actvn_stats_3 = constant_op.constant(
                    np.arange(0, 100).astype(np.float32))

                # Instantiate DlQuantization object
                comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                # Update quantization stats
                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS),
                    bitwidth=bw,
                    in_tensors=[
                        actvn_stats_0, actvn_stats_1, actvn_stats_2,
                        actvn_stats_3
                    ],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[])

                ops.convert_to_tensor(output_0[0]).eval()

                actvn_0 = constant_op.constant(
                    np.arange(0, 10).astype(np.float32))
                actvn_1 = constant_op.constant(
                    np.arange(10, 20).astype(np.float32))
                actvn_2 = constant_op.constant(
                    np.arange(20, 30).astype(np.float32))
                actvn_3 = constant_op.constant(
                    np.arange(30, 40).astype(np.float32))
                test_actvn = [actvn_0, actvn_1, actvn_2, actvn_3]
                # Quantize and de-quantize activations

                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_Q_DQ_ACTIVATIONS),
                    bitwidth=bw,
                    in_tensors=[actvn_0, actvn_1, actvn_2, actvn_3],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=4)
                quantized_acts = ops.convert_to_tensor(output_1[0]).eval()

                quantization_error_margin = 1.0
                for index in np.arange(0, len(quantized_acts)):
                    self.assertArrayNear(
                        ops.convert_to_tensor(test_actvn[index]).eval(),
                        quantized_acts[index], quantization_error_margin)

                # Test output encodings
                enc_min = ops.convert_to_tensor(output_1[1]).eval()
                enc_max = ops.convert_to_tensor(output_1[2]).eval()

                # Compare against encodings obtained from get_encoding()
                get_enc_tensor = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=4)

                exp_enc_min = ops.convert_to_tensor(get_enc_tensor[1]).eval()
                exp_enc_max = ops.convert_to_tensor(get_enc_tensor[2]).eval()

                for index in np.arange(0, len(quantized_acts)):
                    true_encodings = self._compute_encodings(
                        enc_min[index], enc_max[index], bw)
                    expected_encodings = self._compute_encodings(
                        exp_enc_min[index], exp_enc_max[index], bw)
                    error_margin = 1.0  # Not a fair test to compare TF with TF_ENHANCED, but works for now
                    self.assertAllEqual(true_encodings, expected_encodings)

                libpytrext.ResetQuantizer()
    def testQCQuantize_SingleActivation(self):
        _log.info('running testQCQuantize_SingleActivation')
        for use_gpu in [False, True]:
            _log.info('GPU mode is selected') if use_gpu else _log.info(
                'CPU mode is selected')
            with self.session(use_gpu=use_gpu):
                bw = 8
                # Instantiate DlQuantization object
                comp_mode = libpymo.ComputationMode.COMP_MODE_GPU if use_gpu else libpymo.ComputationMode.COMP_MODE_CPU
                libpytrext.InitQuantizer(
                    ["conv1"], comp_mode, [],
                    libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED)

                actvn_0 = constant_op.constant(
                    np.arange(0, 20).astype(np.float32))
                actvn_1 = constant_op.constant(
                    np.arange(0, 50).astype(np.float32))
                actvn_2 = constant_op.constant(
                    np.arange(0, 100).astype(np.float32))
                # Update quantization stats
                output_0 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS),
                    bitwidth=bw,
                    in_tensors=[actvn_0],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[])
                ops.convert_to_tensor(output_0[0]).eval()
                output_1 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS),
                    bitwidth=bw,
                    in_tensors=[actvn_1],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[])
                ops.convert_to_tensor(output_1[0]).eval()
                output_2 = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_UPDATE_STATS),
                    bitwidth=bw,
                    in_tensors=[actvn_2],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[])
                ops.convert_to_tensor(output_2[0]).eval()

                ACT_MIN = 0.0
                ACT_MAX = 16.0
                test_actvn = constant_op.constant(
                    [ACT_MAX])  # Single input activation
                # Quantize and de-quantize activations
                test_output = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_Q_DQ_ACTIVATIONS),
                    bitwidth=bw,
                    in_tensors=[test_actvn],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)
                quantized_acts = ops.convert_to_tensor(test_output[0]).eval()
                # Test output activations
                self.assertAllClose(quantized_acts[0], test_actvn.eval(), 1.0)

                # Test output encodings from quantizing activations.
                enc_min = ops.convert_to_tensor(test_output[1]).eval()
                enc_max = ops.convert_to_tensor(test_output[2]).eval()

                true_encodings = self._compute_encodings(
                    enc_min[0], enc_max[0], bw)
                # Compare against encodings obtained from get_encoding()
                get_enc_tensor = self.qc_quantize_module.qc_quantize_deprecated(
                    op_name='conv1',
                    training_in_progress=False,
                    config=int(
                        libpytrext.config_type.CONFIG_TYPE_GET_ENCODING),
                    bitwidth=bw,
                    in_tensors=[[]],
                    fixed_enc_mins=[],
                    fixed_enc_maxs=[],
                    num_tensors=1)

                exp_enc_min = ops.convert_to_tensor(get_enc_tensor[1]).eval()
                exp_enc_max = ops.convert_to_tensor(get_enc_tensor[2]).eval()
                expected_encodings = self._compute_encodings(
                    exp_enc_min[0], exp_enc_max[0], bw)
                self.assertAllEqual(true_encodings, expected_encodings)

                libpytrext.ResetQuantizer()