コード例 #1
0
ファイル: quantization.py プロジェクト: Rohan-Chaudhury/aimet
def training_helper(sim, generator):
    """A Helper function to fine-tune MNIST model"""
    g = sim.session.graph
    sess = sim.session
    with g.as_default():
        x = sim.session.graph.get_tensor_by_name("reshape_input:0")
        y = g.get_tensor_by_name("labels:0")
        fc1_w = g.get_tensor_by_name("dense_1/MatMul/ReadVariableOp:0")

        ce = g.get_tensor_by_name("xent:0")
        # Using Adam optimizer
        train_step = tf.compat.v1.train.AdamOptimizer(
            1e-3, name="TempAdam").minimize(ce)
        graph_eval.initialize_uninitialized_vars(sess)
        # Input data for MNIST
        mnist = input_data.read_data_sets('./data', one_hot=True)

        # Using 100 iterations and batch of size 50
        for i in range(100):
            batch = mnist.train.next_batch(50)
            sess.run([train_step, fc1_w], feed_dict={x: batch[0], y: batch[1]})
            if i % 10 == 0:
                # Find accuracy of model every 10 iterations
                perf = graph_eval.evaluate_graph(sess, generator, ['accuracy'],
                                                 graph_eval.default_eval_func,
                                                 1)
                print('Quantized performance: ' + str(perf * 100))

    # close session
    sess.close()
コード例 #2
0
    def test_construction_cpu_model(self):
        """
        Create QuantSim for a CPU model and check that quantizers have been added to the graph
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=False)

        # One run through the model to check if the ops got added correctly
        model_output = sess.graph.get_tensor_by_name(
            'conv2d_1/BiasAdd_quantized:0')
        model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
        dummy_input = np.random.randn(20, 28, 28, 3)
        sess.run(model_output, feed_dict={model_input: dummy_input})

        # Check that quantized ops got added for all params
        quant_ops = [
            op for op in sess.graph.get_operations() if op.type == 'QcQuantize'
        ]
        for op in quant_ops:
            print(op.name)
        self.assertEqual(10, len(quant_ops))

        # Check that the quant ops are correctly connected in the graph
        self.assertEqual('Conv2D', quant_ops[0].outputs[0].consumers()[0].type)
        self.assertEqual('BiasAdd',
                         quant_ops[1].outputs[0].consumers()[0].type)
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.passThrough),
                         sess.run(quant_ops[1].inputs[1]))

        # Check that op-mode is set correctly
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sess.run(quant_ops[0].inputs[1]))

        sess.close()
        sim.session.close()
        del sim
コード例 #3
0
 def test_skip_quantizing_dtype_int(self):
     """ Test that op with dtype int32 is skipped during quantization """
     tf.compat.v1.reset_default_graph()
     with tf.compat.v1.Session() as sess:
         _ = model_with_dtype_int()
         initialize_uninitialized_vars(sess)
         sim = QuantizationSimModel(sess, ['input_1', 'input_2'],
                                    ['model_with_dtype_int/Softmax'],
                                    use_cuda=False)
         self.assertEqual(6, len(sim._activation_quantizers))
         self.assertTrue(
             'input_1_quantized' not in sim._activation_quantizers)
         self.assertTrue('input_2_quantized' in sim._activation_quantizers)
         sim.session.close()
         del sim
コード例 #4
0
ファイル: quantizer.py プロジェクト: Rohan-Chaudhury/aimet
def _load_graph(graph, meta_graph, checkpoint):
    """
    Load a TF graph given the meta and checkpoint files
    :param graph: Graph to load into
    :param meta_graph: Meta file
    :param checkpoint: Checkpoint file
    :return: Newly created tf.compat.v1.Session
    """
    _log.info('Loading graph: %s', meta_graph)
    sess = tf.compat.v1.Session(graph=graph)

    # Open the graph and restore the parameters
    saver = tf.compat.v1.train.import_meta_graph(meta_graph)
    saver.restore(sess, checkpoint)

    # Initialize any uninitialized variables
    graph_eval.initialize_uninitialized_vars(sess)

    return sess, saver
コード例 #5
0
    def prune_model(self, layer_db: LayerDatabase,
                    layer_comp_ratio_list: List[LayerCompRatioPair],
                    cost_metric: CostMetric, trainer):

        # sort all the layers in layer_comp_ratio_list based on occurrence
        layer_comp_ratio_list = self._sort_on_occurrence(
            layer_db.model, layer_comp_ratio_list)

        # Copy the db
        comp_layer_db = copy.deepcopy(layer_db)
        current_sess = comp_layer_db.model

        # Dictionary to map original layer name to list of most recent pruned layer name and output mask.
        # Masks remain at the original length and specify channels winnowed after each round of winnower.
        orig_layer_name_to_pruned_name_and_mask_dict = {}
        # Dictionary to map most recent pruned layer name to the original layer name
        pruned_name_to_orig_name_dict = {}
        # List to hold original layers to reconstruct
        layers_to_reconstruct = []
        detached_op_names = set()

        # Prune layers which have comp ratios less than 1
        for layer_comp_ratio in layer_comp_ratio_list:
            orig_layer = layer_db.find_layer_by_name(
                layer_comp_ratio.layer.name)
            if layer_comp_ratio.comp_ratio is not None and layer_comp_ratio.comp_ratio < 1.0:
                # 1) channel selection
                prune_indices = self._select_inp_channels(
                    orig_layer, layer_comp_ratio.comp_ratio)
                if not prune_indices:
                    continue

                # 2) Winnowing the model
                current_sess, ordered_modules_list = winnow.winnow_tf_model(
                    current_sess,
                    self._input_op_names,
                    self._output_op_names,
                    [(orig_layer.module, prune_indices)],
                    reshape=self._allow_custom_downsample_ops,
                    in_place=True,
                    verbose=False)
                if not ordered_modules_list:
                    continue

                layers_to_reconstruct.append(orig_layer)
                # Update dictionaries with new info about pruned ops and new masks
                self._update_pruned_ops_and_masks_info(
                    ordered_modules_list,
                    orig_layer_name_to_pruned_name_and_mask_dict,
                    pruned_name_to_orig_name_dict, detached_op_names)

        # Save and reload modified graph to allow changes to take effect
        # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during
        # winnow_tf_model, and all other newly winnowed ops are not.
        with current_sess.graph.as_default():
            initialize_uninitialized_vars(current_sess)
        current_sess = save_and_load_graph('./saver', current_sess)
        comp_layer_db.update_database(current_sess,
                                      detached_op_names,
                                      update_model=True)

        # Perform reconstruction
        self._reconstruct_layers(layers_to_reconstruct,
                                 orig_layer_name_to_pruned_name_and_mask_dict,
                                 layer_db, comp_layer_db)

        return comp_layer_db
コード例 #6
0
    def test_spatial_svd_compress_auto_with_finetuning(self):
        """
        End to end test with MNIST model following fine tuning
        :return:
        """
        tf.compat.v1.set_random_seed(10)
        AimetLogger.set_level_for_all_areas(logging.DEBUG)

        # load the meta file
        meta_path = os.path.join('models', 'mnist_save.meta')
        sess = aimet_tensorflow.utils.graph_saver.load_model_from_meta(
            meta_path)

        # ignore first Conv2D op
        conv2d = sess.graph.get_operation_by_name('conv1/Conv2D')
        modules_to_ignore = [conv2d]

        greedy_params = aimet_common.defs.GreedySelectionParameters(
            target_comp_ratio=Decimal(0.5),
            num_comp_ratio_candidates=10,
            use_monotonic_fit=True,
            saved_eval_scores_dict=None)

        auto_params = aimet_tensorflow.defs.SpatialSvdParameters.AutoModeParams(
            greedy_select_params=greedy_params,
            modules_to_ignore=modules_to_ignore)

        params = aimet_tensorflow.defs.SpatialSvdParameters(
            input_op_names=['reshape_input'],
            output_op_names=['dense_1/BiasAdd'],
            mode=aimet_tensorflow.defs.SpatialSvdParameters.Mode.auto,
            params=auto_params,
            multiplicity=8)
        input_shape = (1, 1, 28, 28)

        compr_model_sess, stats = ModelCompressor.compress_model(
            sess=sess,
            working_dir=None,
            eval_callback=evaluate,
            eval_iterations=5,
            input_shape=input_shape,
            compress_scheme=aimet_common.defs.CompressionScheme.spatial_svd,
            cost_metric=aimet_common.defs.CostMetric.mac,
            parameters=params)

        print(stats)

        self.assertEqual(evaluate(compr_model_sess, 1, True),
                         float(stats.compressed_model_accuracy))

        all_ops = compr_model_sess.graph.get_operations()

        conv_ops = [op for op in all_ops if op.type == 'Conv2D']

        self.assertEqual(len(conv_ops), 4)
        self.assertTrue(
            math.isclose(float(stats.mac_compression_ratio), 0.5, abs_tol=0.1))

        # get the weights after fine tuning

        conv2d_1_a_op = compr_model_sess.graph.get_operation_by_name(
            'conv2_a/Conv2D')
        conv2d_1_a_op_weights_before = conv2d_1_a_op.inputs[1].eval(
            session=compr_model_sess)

        # fine tune the model

        # get the input and validation place holders
        x = compr_model_sess.graph.get_tensor_by_name('reshape_input:0')
        y = compr_model_sess.graph.get_tensor_by_name('labels:0')
        cross_entropy = compr_model_sess.graph.get_tensor_by_name('xent:0')

        with compr_model_sess.graph.as_default():

            # new optimizer and back propagation Op
            optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-3,
                                                         name='Adam_new')
            train_step = optimizer.minimize(loss=cross_entropy,
                                            name='train_step_new')

            # initialize only uninitialized variables
            # only needed when fine tuning, because we are adding new optimizer
            graph_eval.initialize_uninitialized_vars(compr_model_sess)

        mnist = input_data.read_data_sets(os.path.join(str('./'), 'data'),
                                          one_hot=True)

        for i in range(1):

            batch = mnist.train.next_batch(batch_size=32, shuffle=True)
            _, loss_val = compr_model_sess.run([train_step, cross_entropy],
                                               feed_dict={
                                                   x: batch[0],
                                                   y: batch[1]
                                               })

        # get the weights after fine tuning

        conv2d_1_a_op = compr_model_sess.graph.get_operation_by_name(
            'conv2_a/Conv2D')
        conv2d_1_a_op_weights_after = conv2d_1_a_op.inputs[1].eval(
            session=compr_model_sess)

        # weight should be different after one iteration
        self.assertFalse(
            np.allclose(conv2d_1_a_op_weights_before,
                        conv2d_1_a_op_weights_after))

        # close original session
        sess.close()
        # close compressed model session
        compr_model_sess.close()

        # delete temp directory
        shutil.rmtree(str('./temp_meta/'))
コード例 #7
0
    def test_manual_quantize(self):
        """ Test quantizing a model by manually specifying ops to quantize """
        def get_manual_activations(_graph, _starting_ops, _ending_ops):
            """
            Overriding function for getting a list of ops to insert activation quantizers for
            :param _graph: Unused argument
            :param _starting_ops: Unused argument
            :param _ending_ops: Unused argument
            :return: List of ops to insert activation quantizers for, None for placeholder
            """
            return ['conv2d/Relu'], None

        def get_manual_params(_graph, _starting_ops, _ending_ops):
            """
            Overriding function for getting a list of ops to insert param quantizers for
            :param _graph: Unused argument
            :param _starting_ops: Unused argument
            :param _ending_ops: Unused argument
            :return: List of ops to insert param quantizers for, and list of param indices for these ops
            """
            return ['conv2d_1/Conv2D'], [1]

        def configure_quantization_ops(self, _conn_graph,
                                       _ops_with_param_names, _indices,
                                       _activation_op_names, _config_file):
            """
            Overriding function for configuring quantization ops inserted by QuantizationSimModel
            :param self: Self refers to QuantizationSimModel object
            :param _conn_graph: Unused argument
            :param _ops_with_param_names: Unused argument
            :param _indices: Unused argument
            :param _activation_op_names: Unused argument
            :param _config_file: Unused argument
            """
            conv2d_relu_quant_info = self._activation_quantizers[
                'conv2d/Relu_quantized']
            conv2d_relu_quant_info.enabled = False
            conv2d_relu_quant_info.enabled = True
            conv2d_1_weight_quant_info = self._param_quantizers[
                'conv2d_1/Conv2D/ReadVariableOp_quantized']
            conv2d_1_weight_quant_info.enabled = False
            conv2d_1_weight_quant_info.enabled = True

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)

        orig_get_ops_to_quantize_activations_for = QuantizationSimModel._get_ops_to_quantize_activations_for
        orig_get_ops_to_quantize_weights_for = QuantizationSimModel._get_ops_to_quantize_params_for
        orig_configure_quantization_ops = QuantizationSimModel.configure_quantization_ops
        QuantizationSimModel._get_ops_to_quantize_activations_for = get_manual_activations
        QuantizationSimModel._get_ops_to_quantize_params_for = get_manual_params
        QuantizationSimModel.configure_quantization_ops = configure_quantization_ops
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=False)
        self.assertEqual(1, len(sim._activation_quantizers))
        self.assertEqual(1, len(sim._param_quantizers))
        sess.close()
        sim.session.close()
        QuantizationSimModel._get_ops_to_quantize_activations_for = orig_get_ops_to_quantize_activations_for
        QuantizationSimModel._get_ops_to_quantize_params_for = orig_get_ops_to_quantize_weights_for
        QuantizationSimModel.configure_quantization_ops = orig_configure_quantization_ops

        sim.session.close()
        del sim
コード例 #8
0
    def test_set_get_quantizer_params_using_properties(self):
        """
        Create QuantSim for a CPU model, test param read and write using properties
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        o_quantizer = sim.quantizer_config('conv2d/Relu_quantized')
        bias_quantizer = sim.quantizer_config(
            'conv2d/BiasAdd/ReadVariableOp_quantized')

        # check if __str__ can print the object info
        print(p_quantizer)
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        p_quantizer.bitwidth = 6
        bitwidth = p_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        bitwidth = o_quantizer.bitwidth
        self.assertEqual(8, bitwidth)
        o_quantizer.bitwidth = 6
        bitwidth = o_quantizer.bitwidth
        self.assertEqual(6, bitwidth)

        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertFalse(sym_encoding)
        bias_quantizer.use_symmetric_encoding = True
        sym_encoding = bias_quantizer.use_symmetric_encoding
        self.assertTrue(sym_encoding)

        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_NEAREST, rounding_mode)
        o_quantizer.rounding_mode = libpymo.RoundingMode.ROUND_STOCHASTIC
        rounding_mode = o_quantizer.rounding_mode
        self.assertEqual(libpymo.RoundingMode.ROUND_STOCHASTIC, rounding_mode)

        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         quant_scheme)
        o_quantizer.quant_scheme = QuantScheme.post_training_tf
        quant_scheme = o_quantizer.quant_scheme
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         quant_scheme)
        self.assertFalse(o_quantizer.tensor_quantizer.isEncodingValid)

        is_enabled = p_quantizer.enabled
        self.assertTrue(is_enabled)
        p_quantizer.enabled = False
        is_enabled = p_quantizer.enabled
        self.assertFalse(is_enabled)

        sim.session.close()
        del sim
コード例 #9
0
    def test_save_load_ckpt_after_compute_encoding_on_orig_object(self):
        """
        Create QuantSim for a CPU model, test save and load on a quantsim model
        when encodings have been computed on original quantsim object
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(n_sess, args):
            model_output = n_sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = n_sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            n_sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # save quantsim model
        save_checkpoint(sim, './test_3', 'orig_quantsim_model')

        new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model')

        # validations
        assert (sim is not new_quantsim)

        # as we have performed computeEncodings() on saved quantsim object, these must be set to True/False
        # in loaded quantsim object as on orig model
        for quantize_op in new_quantsim._param_quantizers:
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].tensor_quantizer.
                isEncodingValid == sim._param_quantizers[quantize_op].
                tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._param_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._param_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        for quantize_op in new_quantsim._activation_quantizers:
            self.assertTrue(new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid ==
                            sim._activation_quantizers[quantize_op].
                            tensor_quantizer.isEncodingValid)
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_min) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_min))
            self.assertTrue(
                new_quantsim._activation_quantizers[quantize_op].
                get_variable_from_op(QuantizeOpIndices.encoding_max) ==
                sim._activation_quantizers[quantize_op].get_variable_from_op(
                    QuantizeOpIndices.encoding_max))

        # delete temp folder created and close sessions
        shutil.rmtree('./test_3')
        sess.close()
        sim.session.close()
        new_quantsim.session.close()
        del sim
        del new_quantsim
コード例 #10
0
    def test_save_load_ckpt_cpu_model(self):
        """
        Create QuantSim for a CPU model, test save and load on a quantsim model.
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        # save quantsim model
        save_checkpoint(sim, './test_3', 'orig_quantsim_model')

        new_quantsim = load_checkpoint('./test_3', 'orig_quantsim_model')

        # validations
        assert (sim is not new_quantsim)
        self.assertTrue(new_quantsim.session is not None)
        self.assertTrue(new_quantsim._quant_scheme == sim._quant_scheme)
        self.assertTrue(new_quantsim._rounding_mode == sim._rounding_mode)
        self.assertTrue(new_quantsim._use_cuda == sim._use_cuda)
        self.assertTrue(
            len(new_quantsim._param_quantizers) == len(sim._param_quantizers))
        self.assertTrue(
            len(new_quantsim._activation_quantizers) == len(
                sim._activation_quantizers))

        for quantize_op in new_quantsim._param_quantizers:
            self.assertFalse(
                sim._param_quantizers[quantize_op].session ==
                new_quantsim._param_quantizers[quantize_op].session)
            self.assertTrue(
                sim._param_quantizers[quantize_op].tensor_quantizer.
                getQuantScheme() == new_quantsim._param_quantizers[quantize_op]
                .tensor_quantizer.getQuantScheme())
            self.assertTrue(
                sim._param_quantizers[quantize_op].tensor_quantizer.
                roundingMode == new_quantsim._param_quantizers[quantize_op].
                tensor_quantizer.roundingMode)
            self.assertFalse(sim._param_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)
            self.assertFalse(new_quantsim._param_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)

        for quantize_op in new_quantsim._activation_quantizers:
            self.assertFalse(
                sim._activation_quantizers[quantize_op].session ==
                new_quantsim._activation_quantizers[quantize_op].session)
            self.assertTrue(sim._activation_quantizers[quantize_op].
                            tensor_quantizer.getQuantScheme() ==
                            new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.getQuantScheme())
            self.assertTrue(sim._activation_quantizers[quantize_op].
                            tensor_quantizer.roundingMode ==
                            new_quantsim._activation_quantizers[quantize_op].
                            tensor_quantizer.roundingMode)
            self.assertFalse(sim._activation_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)
            self.assertFalse(new_quantsim._activation_quantizers[quantize_op].
                             tensor_quantizer.isEncodingValid)

        # remove the old quant sim reference and session
        # to test that everything is loaded correctly on new quantsim including tensor quantizer references
        sim.session.close()
        del sim

        # delete temp folder created and close sessions
        shutil.rmtree('./test_3')
        sess.close()
        new_quantsim.session.close()
        del new_quantsim
コード例 #11
0
    def test_export_cpu_model(self):
        """
        Create QuantSim for a CPU model, compute encodings and export out a resulting model
        """
        tf.compat.v1.reset_default_graph()
        with tf.device('/cpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, [model.input.op.name],
                                   [model.output.op.name],
                                   use_cuda=False)

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(model.output.name)
            model_output = model_output.consumers()[0].outputs[0]
            model_input = sess.graph.get_tensor_by_name(model.input.name)
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Make some changes to model parameters to see if they are part of the exported model
        with sim.session.graph.as_default():
            first_bias_tensor = sim.session.graph.get_tensor_by_name(
                'conv2d/BiasAdd/ReadVariableOp:0')
            first_bias_tensor_val = sim.session.run(first_bias_tensor)
            self.assertTrue(np.any(first_bias_tensor_val == 0))
            first_bias_tensor_var = [
                var for var in tf.compat.v1.global_variables()
                if var.name == 'conv2d/bias:0'
            ][0]
            first_bias_tensor_var.load(np.ones(32), sim.session)

        all_op_types = [op.type for op in sim.session.graph.get_operations()]
        self.assertIn('QcQuantize', all_op_types)

        sim.export('/tmp', 'quant_sim_model')

        with open('/tmp/quant_sim_model.encodings') as json_file:
            encoding_data = json.load(json_file)
        activation_keys = list(encoding_data["activation_encodings"].keys())
        self.assertTrue(activation_keys[0] == "conv2d/Relu:0")
        self.assertTrue(
            isinstance(encoding_data["activation_encodings"]["conv2d/Relu:0"],
                       list))
        act_encoding_keys = encoding_data["activation_encodings"][
            "conv2d/Relu:0"][0].keys()
        self.assertTrue("bitwidth" in act_encoding_keys)
        self.assertTrue("is_symmetric" in act_encoding_keys)
        self.assertTrue("max" in act_encoding_keys)
        self.assertTrue("min" in act_encoding_keys)
        self.assertTrue("offset" in act_encoding_keys)
        self.assertTrue("scale" in act_encoding_keys)

        param_keys = list(encoding_data["param_encodings"].keys())
        self.assertTrue(param_keys[0] == "conv2d/Conv2D/ReadVariableOp:0")
        self.assertTrue(
            isinstance(
                encoding_data["param_encodings"]
                ["conv2d/Conv2D/ReadVariableOp:0"], list))
        param_encoding_keys = encoding_data["param_encodings"][
            "conv2d/Conv2D/ReadVariableOp:0"][0].keys()
        self.assertTrue("bitwidth" in param_encoding_keys)
        self.assertTrue("is_symmetric" in param_encoding_keys)
        self.assertTrue("max" in param_encoding_keys)
        self.assertTrue("min" in param_encoding_keys)
        self.assertTrue("offset" in param_encoding_keys)
        self.assertTrue("scale" in param_encoding_keys)

        new_sess = load_model_from_meta('/tmp/quant_sim_model.meta')
        first_bias_tensor = new_sess.graph.get_tensor_by_name(
            'conv2d/BiasAdd/ReadVariableOp:0')
        first_bias_tensor_val = new_sess.run(first_bias_tensor)
        self.assertTrue(np.any(first_bias_tensor_val == 1))

        all_op_types = [op.type for op in new_sess.graph.get_operations()]
        self.assertNotIn('QcQuantize', all_op_types)
        sess.close()
        sim.session.close()
        del sim
コード例 #12
0
    def test_compute_encodings_quant_scheme_update(self):
        """
        Create QuantSim model and update quantScheme using property interface
        """

        tf.compat.v1.reset_default_graph()
        np.random.seed(0)
        tf.compat.v1.set_random_seed(0)

        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            np.random.seed(0)
            tf.compat.v1.set_random_seed(0)
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        p_quantizer = sim.quantizer_config(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        old_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        old_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
                         p_quantizer.quant_scheme)
        p_quantizer.quant_scheme = QuantScheme.post_training_tf
        self.assertEqual(libpymo.QuantizationMode.QUANTIZATION_TF,
                         p_quantizer.quant_scheme)

        # invoke compute encoding after quantScheme update
        sim.compute_encodings(dummy_forward_pass, None)
        new_p_encoding_min = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_min)
        new_p_encoding_max = p_quantizer.get_variable_from_op(
            QuantizeOpIndices.encoding_max)

        # validate
        self.assertNotEqual(old_p_encoding_min, new_p_encoding_min)
        self.assertNotEqual(old_p_encoding_max, new_p_encoding_max)

        sess.close()
        sim.session.close()
        del sim
コード例 #13
0
    def test_compute_encodings_gpu_model(self):
        """
        Create QuantSim for a CPU model and test that activation encodings are computed
        """

        tf.compat.v1.reset_default_graph()
        with tf.device('/gpu:0'):
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=True)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, args):
            model_output = sess.graph.get_tensor_by_name(
                'conv2d_1/Relu_quantized:0')
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, None)

        # Check if encodings have been calculated
        deactivated_quantizers = [
            'conv2d_input_quantized', 'conv2d/BiasAdd_quantized',
            'conv2d_1/BiasAdd_quantized'
        ]
        for name, quantizer in sim._activation_quantizers.items():
            if name in deactivated_quantizers:
                self.assertTrue(int(libpymo.TensorQuantizerOpMode.passThrough),
                                sim.session.run(name + '_op_mode/read:0'))
            else:
                self.assertTrue(
                    quantizer.tensor_quantizer.isEncodingValid,
                    "quantizer: {} does not have a valid encoding".format(
                        name))

        # Check that op-mode is set correctly
        # Check that quantized ops got added for all params
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')

        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.quantizeDequantize),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        sess.close()
        sim.session.close()
        del sim
コード例 #14
0
    def _save_to_keras_common_test_code(self, use_cuda):
        tf.compat.v1.reset_default_graph()
        if not use_cuda:
            model = tf.keras.Sequential()
            model.add(
                tf.keras.layers.Conv2D(32,
                                       kernel_size=3,
                                       input_shape=(28, 28, 3),
                                       activation='relu'))
            model.add(tf.keras.layers.MaxPooling2D((2, 2)))
            model.add(
                tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
            model.summary()
        else:
            with tf.device('/cpu:0'):
                model = tf.keras.Sequential()
                model.add(
                    tf.keras.layers.Conv2D(32,
                                           kernel_size=3,
                                           input_shape=(28, 28, 3),
                                           activation='relu'))
                model.add(tf.keras.layers.MaxPooling2D((2, 2)))
                model.add(
                    tf.keras.layers.Conv2D(64,
                                           kernel_size=3,
                                           activation='relu'))
                model.summary()

        sess = tf.compat.v1.Session()
        initialize_uninitialized_vars(sess)
        sim = QuantizationSimModel(sess, ['conv2d_input'], ['conv2d_1/Relu'],
                                   use_cuda=use_cuda)

        # Check that op-mode is set correctly
        conv2d_weight_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Conv2D/ReadVariableOp_quantized')
        conv2d_output_quant_op = sim.session.graph.get_operation_by_name(
            'conv2d/Relu_quantized')
        self.assertEqual(
            int(libpymo.TensorQuantizerOpMode.oneShotQuantizeDequantize),
            sim.session.run(conv2d_weight_quant_op.inputs[1]))
        self.assertEqual(int(libpymo.TensorQuantizerOpMode.updateStats),
                         sim.session.run(conv2d_output_quant_op.inputs[1]))

        def dummy_forward_pass(sess, eval_tensor_name):
            model_output = sess.graph.get_tensor_by_name(eval_tensor_name)
            model_input = sess.graph.get_tensor_by_name('conv2d_input:0')
            dummy_input = np.random.randn(20, 28, 28, 3)
            sess.run(model_output, feed_dict={model_input: dummy_input})

        sim.compute_encodings(dummy_forward_pass, 'conv2d_1/Relu_quantized:0')
        mod_sess = sim.save_to_keras()

        # Check 1: The new graph is well formed. Try forward pass through the graph.
        dummy_forward_pass(mod_sess, 'conv2d_1/Relu_quantized_static:0')

        # Check 2: All the QcQuantizeOp nodes have no output - meaning are disconnected from the main graph
        op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantize":
                op_count += 1
                self.assertFalse(op.outputs[0].consumers())

        # Check 3: One QcQuantizeStatic for each QcQuantize op
        static_op_count = 0
        for op in mod_sess.graph.get_operations():
            if op.type == "QcQuantizeStatic":
                static_op_count += 1
        self.assertEqual(op_count, static_op_count)

        # Check 4: Make sure the attributes are set correctly
        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Conv2D/ReadVariableOp_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(1,
                         op.get_attr("op_mode"))  # oneShotQuantizeDequantize

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/BiasAdd_quantized_static")
        self.assertEqual(3, op.get_attr("op_mode"))  # passThrough

        op = mod_sess.graph.get_operation_by_name(
            "conv2d/Relu_quantized_static")
        self.assertEqual(8, op.get_attr("bitwidth"))
        self.assertEqual(1, op.get_attr("quant_scheme"))  # TF-Enhanced
        self.assertEqual(2, op.get_attr("op_mode"))  # quantizeDequantize

        sess.close()
        sim.session.close()
        del sim
コード例 #15
0
ファイル: test_winnow.py プロジェクト: Rohan-Chaudhury/aimet
    def test_reducing_inceptionV3(self):
        """ Test module reduction in inceptionV3 """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        module_zero_channels_list = []

        _ = InceptionV3(weights=None)
        init = tf.compat.v1.global_variables_initializer()
        sess.run(init)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2d_12/Conv2D")
        input_channels_to_winnow = [0, 1, 64, 128, 224]
        module_mask_pair = (tf_op, input_channels_to_winnow)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2d_13/Conv2D")
        input_channels_to_winnow_1 = [0, 64, 65, 66, 128, 224]
        module_mask_pair = (tf_op, input_channels_to_winnow_1)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2d_15/Conv2D")
        input_channels_to_winnow_2 = [0, 64, 128, 129, 130, 131, 224]
        module_mask_pair = (tf_op, input_channels_to_winnow_2)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2d_18/Conv2D")
        input_channels_to_winnow_3 = [0, 64, 128, 224, 225, 226, 227, 228]
        module_mask_pair = (tf_op, input_channels_to_winnow_3)
        module_zero_channels_list.append(module_mask_pair)

        input_op_names = ["input_1"]
        output_op_names = ['predictions/Softmax']
        new_sess, ordered_modules_list = winnow.winnow_tf_model(
            sess,
            input_op_names,
            output_op_names,
            module_zero_channels_list,
            reshape=True,
            in_place=True,
            verbose=True)
        # Save and reload modified graph to allow changes to take effect
        # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during
        # winnow_tf_model, and all other newly winnowed ops are not.
        with new_sess.graph.as_default():
            initialize_uninitialized_vars(new_sess)
        new_sess = save_and_load_graph('./saver', new_sess)

        # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph)

        with new_sess.graph.as_default():
            inp = tf.random.uniform(shape=(1, 299, 299, 3))
            inp_array = inp.eval(session=new_sess)
            model_input = new_sess.graph.get_tensor_by_name("input_1:0")
            model_output = new_sess.graph.get_tensor_by_name(
                "predictions/Softmax:0")

            # check that reduced tensor shapes are as expected
            reduced_conv2d_12_input = new_sess.graph.get_operation_by_name(
                "reduced_conv2d_12/Conv2D").inputs[0]
            reduced_conv2d_13_input = new_sess.graph.get_operation_by_name(
                "reduced_conv2d_13/Conv2D").inputs[0]
            reduced_conv2d_15_input = new_sess.graph.get_operation_by_name(
                "reduced_conv2d_15/Conv2D").inputs[0]
            reduced_conv2d_18_input = new_sess.graph.get_operation_by_name(
                "reduced_conv2d_18/Conv2D").inputs[0]
            reduced_conv2d_5_output = new_sess.graph.get_tensor_by_name(
                "reduced_conv2d_5/Conv2D:0")
            reduced_conv2d_7_output = new_sess.graph.get_tensor_by_name(
                "reduced_conv2d_7/Conv2D:0")
            reduced_conv2d_10_output = new_sess.graph.get_tensor_by_name(
                "reduced_conv2d_10/Conv2D:0")
            reduced_conv2d_11_output = new_sess.graph.get_tensor_by_name(
                "reduced_conv2d_11/Conv2D:0")
            self.assertEqual(251, reduced_conv2d_12_input.shape.as_list()[-1])
            self.assertEqual(250, reduced_conv2d_13_input.shape.as_list()[-1])
            self.assertEqual(249, reduced_conv2d_15_input.shape.as_list()[-1])
            self.assertEqual(248, reduced_conv2d_18_input.shape.as_list()[-1])
            self.assertEqual(63, reduced_conv2d_5_output.shape.as_list()[-1])
            self.assertEqual(63, reduced_conv2d_7_output.shape.as_list()[-1])
            self.assertEqual(95, reduced_conv2d_10_output.shape.as_list()[-1])
            self.assertEqual(31, reduced_conv2d_11_output.shape.as_list()[-1])
            self.assertEqual(17, len(ordered_modules_list))

            # run through entire model to check no error is produced
            _ = new_sess.run(model_output, feed_dict={model_input: inp_array})
        new_sess.close()
        sess.close()
コード例 #16
0
ファイル: test_winnow.py プロジェクト: Rohan-Chaudhury/aimet
    def test_reducing_resnet_50(self):
        """ Test module reduction in resnet_50 """
        tf.compat.v1.reset_default_graph()
        sess = tf.compat.v1.Session()
        module_zero_channels_list = []

        _ = ResNet50(weights=None)
        init = tf.compat.v1.global_variables_initializer()
        sess.run(init)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2_block1_1_conv/Conv2D")
        input_channels_to_winnow_1 = [3, 5, 7]
        module_mask_pair = (tf_op, input_channels_to_winnow_1)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv2_block1_0_conv/Conv2D")
        input_channels_to_winnow_2 = [3, 5, 7, 8]
        module_mask_pair = (tf_op, input_channels_to_winnow_2)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv3_block1_1_conv/Conv2D")
        input_channels_to_winnow_3 = [3, 5, 7]
        module_mask_pair = (tf_op, input_channels_to_winnow_3)
        module_zero_channels_list.append(module_mask_pair)

        tf_op = tf.compat.v1.get_default_graph().get_operation_by_name(
            "conv3_block1_0_conv/Conv2D")
        input_channels_to_winnow_4 = [3, 5, 7, 8]
        module_mask_pair = (tf_op, input_channels_to_winnow_4)
        module_zero_channels_list.append(module_mask_pair)

        input_op_names = ["input_1"]
        output_op_names = ['probs/Softmax']
        new_sess, ordered_modules_list = winnow.winnow_tf_model(
            sess,
            input_op_names,
            output_op_names,
            module_zero_channels_list,
            reshape=True,
            in_place=True,
            verbose=True)
        # Save and reload modified graph to allow changes to take effect
        # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during
        # winnow_tf_model, and all other newly winnowed ops are not.
        with new_sess.graph.as_default():
            initialize_uninitialized_vars(new_sess)
        new_sess = save_and_load_graph('./saver', new_sess)

        # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph)
        with new_sess.graph.as_default():
            inp = tf.random.uniform(shape=(1, 224, 224, 3))
            inp_array = inp.eval(session=new_sess)
        model_input = new_sess.graph.get_tensor_by_name("input_1:0")
        model_output = new_sess.graph.get_tensor_by_name("probs/Softmax:0")

        # check that reduced tensor shapes are as expected
        reduced_conv3_block1_1_input = new_sess.graph.get_operation_by_name(
            "reduced_conv3_block1_1_conv/"
            "Conv2D").inputs[0]
        reduced_conv3_block1_0_input = new_sess.graph.get_operation_by_name(
            "reduced_conv3_block1_0_conv/"
            "Conv2D").inputs[0]
        reduced_conv2_block3_3_output = new_sess.graph.get_tensor_by_name(
            "reduced_conv2_block3_3_conv/"
            "Conv2D:0")
        reduced_conv2_block1_1_input = new_sess.graph.get_operation_by_name(
            "reduced_conv2_block1_1_conv/"
            "Conv2D").inputs[0]
        reduced_conv2_block1_0_input = new_sess.graph.get_operation_by_name(
            "reduced_conv2_block1_0_conv/"
            "Conv2D").inputs[0]
        reduced_conv1_output = new_sess.graph.get_tensor_by_name(
            "reduced_conv1_conv/Conv2D:0")
        self.assertEqual(253, reduced_conv3_block1_1_input.shape.as_list()[-1])
        self.assertEqual(252, reduced_conv3_block1_0_input.shape.as_list()[-1])
        self.assertEqual(253,
                         reduced_conv2_block3_3_output.shape.as_list()[-1])
        self.assertEqual(61, reduced_conv2_block1_1_input.shape.as_list()[-1])
        self.assertEqual(60, reduced_conv2_block1_0_input.shape.as_list()[-1])
        self.assertEqual(61, reduced_conv1_output.shape.as_list()[-1])

        # run through entire model to check no error is produced
        _ = new_sess.run(model_output, feed_dict={model_input: inp_array})
        self.assertEqual(11, len(ordered_modules_list))
        new_sess.close()
        sess.close()