예제 #1
0
    def _update_statistics(self, baseline_accuracy, best_model,
                           best_compressed_layers, rank_stats_list,
                           best_index):
        """ Function that updates the SVD statistics after the svd compression is completed.

        :param baseline_accuracy: Accuracy of the model before it was compressed.
        :param best_index: The best rank index that was used to compress the model.
        :param rank_stats_list: A list of Rank index specific SVD statistics
        :return: SvdStatistics object that contains all of the SVD Statistics
        """

        compressed_model_accuracy = self._run_model(best_model,
                                                    self._run_model_iterations,
                                                    self._use_cuda)
        cost_calc = cc.CostCalculator()
        network_cost = cost_calc.compute_model_cost(self._layer_database)
        memory_compression_ratio = ModelStats.compute_compression_ratio(
            best_compressed_layers, CostMetric.memory, network_cost)
        mac_compression_ratio = ModelStats.compute_compression_ratio(
            best_compressed_layers, CostMetric.mac, network_cost)
        stats = stats_u.SvdStatistics(base_accuracy=baseline_accuracy,
                                      comp_accuracy=compressed_model_accuracy,
                                      cost_metric=self._metric,
                                      best_index=best_index,
                                      mem_comp_ratio=memory_compression_ratio,
                                      mac_comp_ratio=mac_compression_ratio,
                                      rank_stats_list=rank_stats_list)
        return stats
예제 #2
0
    def test_total_model_cost(self):

        # create tf.compat.v1.Session and initialize the weights and biases with zeros
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True

        # create session with graph
        sess = tf.compat.v1.Session(graph=tf.Graph(), config=config)

        with sess.graph.as_default():
            # by default, model will be constructed in default graph
            _ = mnist_tf_model.create_model(data_format='channels_last')
            sess.run(tf.compat.v1.global_variables_initializer())

        layer_database = LayerDatabase(model=sess,
                                       input_shape=(1, 28, 28, 1),
                                       working_dir=None)

        cost_calc = cc.CostCalculator()
        network_cost = cost_calc.compute_model_cost(layer_database)

        self.assertEqual(800 + 51200 + 3211264 + 10240, network_cost.memory)
        self.assertEqual(627200 + 10035200 + 3211264 + 10240, network_cost.mac)

        tf.compat.v1.reset_default_graph()
        sess.close()
        # delete temp directory
        shutil.rmtree(str('./temp_meta/'))
    def compute_per_layer_compression_ratio(orig_layer, split_layers, metric):
        """
        Updates the per layer statistics

        :param orig_layer: The layer before it was split
        :param split_layers: List of split layers
        :param metric: Cost metric
        :return: The compression ratio of split layers
        """
        cost_calc = cc.CostCalculator()
        orig_layer_cost = cost_calc.compute_layer_cost(orig_layer)

        split_layers_cost = cc.Cost(0, 0)

        for layer in split_layers:
            split_cost = cost_calc.compute_layer_cost(layer)
            split_layers_cost += split_cost

        savings = orig_layer_cost - split_layers_cost
        if metric is CostMetric.memory:
            ratio = savings.memory / orig_layer_cost.memory
            logger.debug('Original Layer Cost: %i   Memory Compression Ratio: %f', orig_layer_cost.memory, ratio)
        else:
            ratio = savings.mac / orig_layer_cost.mac
            logger.debug('Original Layer Cost: %i   MAC Compression Ratio: %f', orig_layer_cost.mac, ratio)

        return ratio
예제 #4
0
    def test_total_model_cost(self):

        logger.debug(self.id())
        model = MnistSequentialModel().to("cpu")

        layer_database = lad.LayerDatabase(model=model,
                                           input_shape=(1, 1, 28, 28))

        cost_calc = cc.CostCalculator()
        network_cost = cost_calc.compute_model_cost(layer_database)

        self.assertEqual(800 + 51200 + 3211264 + 10240, network_cost.memory)
        self.assertEqual(627200 + 10035200 + 3211264 + 10240, network_cost.mac)
예제 #5
0
    def split_manual_rank(self, model, run_model, run_model_iterations,
                          use_cuda, metric, database, **kw_args):
        """
        :param model: The original model
        :param run_model: Method to run evaluation on model
        :param run_model_iterations: Number of iterations for run_model
        :param use_cuda: Model is on GPU or not
        :param metric: cost metric
        :param database: Layer attribute database reference
        :param kw_args: layer rank list
        :return:
        """
        # pylint: disable=too-many-locals

        cost_calc = cc.CostCalculator()
        network_cost = cost_calc.compute_model_cost(database)
        layer_rank_list = kw_args['layer_rank_list']
        svd_rank_pair_dict = {}
        for name, module in model.named_modules():
            for layer, rank in layer_rank_list:
                if layer is module:
                    svd_rank_pair_dict[name] = (rank, 0)

        compressed_model, \
        compressed_model_layers, \
        layer_stats_list = svd_pruner_deprecated.ModelPruner().create_compressed_model(svd_rank_pair_dict=svd_rank_pair_dict,
                                                                                       model=model,
                                                                                       compressible_layers=database.get_compressible_layers(),
                                                                                       svd_lib_ref=self._svd_lib_ref,
                                                                                       metric=metric)
        model_perf = run_model(compressed_model, run_model_iterations,
                               use_cuda)
        ms = MS.ModelStats
        compression_score = ms.compute_compression_ratio(
            compressed_model_layers, metric, network_cost)
        rank_data = stats_u.SvdStatistics.PerRankIndex(
            rank_index=0,
            model_accuracy=model_perf,
            model_compression_ratio=compression_score,
            layer_stats_list=layer_stats_list)
        rank_data_list = list()
        rank_data_list.append(rank_data)

        return rank_data_list, svd_rank_pair_dict
    def compute_compression_ratio(compressed_layers, cost_metric, network_cost):
        """
        Computes the compression ratio of a model
        :param compressed_layers: layers which are compressed
        :param cost_metric: cost metric is memory or mac
        :param network_cost: mac and memory cost calculated for the entire network
        :return: It returns the compression ratio for a network
        """
        cost_calc = cc.CostCalculator()
        compressed_model_cost = cost_calc.compute_network_cost(compressed_layers)

        if cost_metric is CostMetric.memory:
            savings = network_cost.memory - compressed_model_cost.memory
            ratio = savings/network_cost.memory

        else:
            savings = network_cost.mac - compressed_model_cost.mac
            ratio = savings/network_cost.mac

        return ratio
예제 #7
0
    def choose_best_rank(self, model, run_model, run_model_iterations,
                         use_cuda, metric, error_margin, baseline_perf,
                         num_rank_indices, database):
        """
        :param model: Original model
        :param run_model: Method to run evaluation on model
        :param run_model_iterations: Number of iterations for run_model
        :param use_cuda: Model is on GPU or not
        :param metric: cost metric
        :param error_margin: permissible error allowed for rank selection
        :param baseline_perf: original model's accuracy
        :param num_rank_indices: number of rank indices
        :param database: reference to Layer Attribute Database
        :return:
        """
        # pylint: disable=too-many-arguments, too-many-locals

        num_rank_indices = self._select_candidate_ranks(num_rank_indices)
        cost_calc = cc.CostCalculator()
        network_cost = cost_calc.compute_model_cost(database)
        # Ranks are in order from least compression to highest
        best_index = None
        min_objective_score = None

        # List to hold the SVD Statistics for all the Rank indices
        rank_stats_list = list()

        for rank_index in range(num_rank_indices):
            svd_rank_pair_dict = {}
            for layer in database.get_selected_layers():

                # Get the candidate ranks for given rank index
                svd_ranks = self._svd_lib_ref.GetCandidateRanks(
                    str(layer.name), rank_index)
                svd_rank_pair_dict[layer.name] = (svd_ranks[0], 0)
            # Compress the model given a rank index
            compressed_model, compressed_layers, layer_stats_list = svd_pruner_deprecated.ModelPruner(
            ).create_compressed_model(
                svd_rank_pair_dict=svd_rank_pair_dict,
                model=model,
                compressible_layers=database.get_compressible_layers(),
                svd_lib_ref=self._svd_lib_ref,
                metric=metric)
            ms = MS.ModelStats

            # Estimate relative compression score for this rank_index
            compression_score = ms.compute_compression_ratio(
                compressed_layers, metric, network_cost)
            logger.debug('Rank Index: %i, Compression Score: %f', rank_index,
                         compression_score)

            # Get accuracy for the compressed model
            model_perf = run_model(compressed_model, run_model_iterations,
                                   use_cuda)

            model_accuracy = model_perf
            model_compression_ratio = compression_score

            objective_score = ms.compute_objective_score(
                model_perf, compression_score, error_margin, baseline_perf)

            logger.info(
                'Compressed network with rank_index %i/%i: accuracy = %f percent '
                'with %f percent compression (%r option) and an objective score of %f',
                rank_index, num_rank_indices, model_perf * 100,
                compression_score * 100, metric, objective_score)

            if not min_objective_score:
                min_objective_score = objective_score
                logger.info(
                    'Initializing objective score to %f at rank index %i',
                    min_objective_score, rank_index)

            if model_perf + error_margin / 100 < baseline_perf:
                logger.info(
                    'Model performance %f falls below %f percent of baseline performance %f'
                    ' Ending rank selection', model_perf * 100, error_margin,
                    baseline_perf * 100)
                break
            else:
                if objective_score <= min_objective_score:
                    min_objective_score = objective_score
                    logger.info(
                        'Found a better value for the objective score %f at rank_index %i',
                        min_objective_score, rank_index)
                    best_index = rank_index
                    svd_rank_pair_dict_best_index = svd_rank_pair_dict

            # Create the Per Rank Index Statistics object.
            rank_data = stats_u.SvdStatistics.PerRankIndex(
                rank_index=rank_index,
                model_accuracy=model_accuracy,
                model_compression_ratio=model_compression_ratio,
                layer_stats_list=layer_stats_list)
            rank_stats_list.append(rank_data)

        if not best_index:
            raise RuntimeError(
                'No suitable ranks found to compress model within defined error bounds.'
            )

        return best_index, svd_rank_pair_dict_best_index, rank_stats_list
    def _pick_compression_layers(self, cost_metric, layer_select_scheme,
                                 **kwargs):
        """
        Function to pick top N layer based on selection threshold provided by user and then
        store layer attributes for MO(ModelOptimization) like shape of weight matrix, activation dimensions,
         weights and bias etc.
        :param run_model: The function to use for running data through the graph to calculate input and output shape
        of layer (activation dimensions). This function will be used with the custom hook to feed 1 iteration of
        data.
        """
        # pylint: disable=too-many-locals, too-many-branches

        # Sanity check
        if not isinstance(cost_metric, CostMetric):
            raise TypeError("cost_metric is not of type CostMetric")

        if not isinstance(layer_select_scheme, LayerSelectionScheme):
            raise TypeError(
                "layer_selection_scheme is not of type Svd.LayerSelectionScheme"
            )

        # register custom hook for the model with run_graph provided by user
        # if the user wants to experiment with custom hook, we can support that option by
        # exposing the hook parameter to compress_net method
        pruned_list = []
        # cache the layer attributes list for further processing
        for layer in self._layer_database:
            # Heuristic1: Reject any ops whose param shape does not meet a base criterion
            if self._check_layer_with_smaller_dimensions(layer.module):
                pruned_list.append(layer)
            else:
                logger.debug("Pruning out %r: shape is %r", layer.module,
                             layer.module.weight.size())

        # Reset list of layers for the next phase
        layers = pruned_list
        pruned_list = []

        # Create a list of layer, cost tuples
        layer_cost_pairs = []
        for layer in layers:
            cost = cc.CostCalculator.compute_layer_cost(layer)
            layer_cost_pairs.append((layer, cost))

        # Sort list of layer-cost pairs
        if cost_metric == CostMetric.memory:
            layer_cost_pairs.sort(key=lambda x: x[1].memory, reverse=True)
        else:
            layer_cost_pairs.sort(key=lambda x: x[1].mac, reverse=True)

        if layer_select_scheme == LayerSelectionScheme.top_n_layers:
            num_layers = kwargs['num_layers']
            pruned_list_of_pairs = layer_cost_pairs[:num_layers]
            pruned_list = [pair[0] for pair in pruned_list_of_pairs]

        elif layer_select_scheme == LayerSelectionScheme.top_x_percent:
            percent_thresh = kwargs['percent_thresh']
            # get the network cost for Memory and MAC
            cost_calc = cc.CostCalculator()
            network_cost = cost_calc.compute_model_cost(self._layer_database)
            network_cost = network_cost.memory if cost_metric == CostMetric.memory else network_cost.mac
            accum_cost = 0.
            logger.debug("Total network cost: %f", network_cost)
            logger.debug(
                "Picking layers contributing to : %f (percent) of total network cost.",
                percent_thresh)

            for layer, cost in layer_cost_pairs:
                cost = cost.memory if (cost_metric
                                       == CostMetric.memory) else cost.mac

                if (100 * (cost + accum_cost) / network_cost) < percent_thresh:
                    accum_cost += cost
                    pruned_list.append(layer)
                    logger.debug("Layer Picked : %s with cost : %f",
                                 layer.module, cost)
                    logger.debug("Accumulated cost so far : %f", accum_cost)

        elif layer_select_scheme == LayerSelectionScheme.manual:
            layers_to_compress = kwargs['layers_to_compress']
            for layer, _ in layer_cost_pairs:
                if layer.module in layers_to_compress:
                    pruned_list.append(layer)

        if not pruned_list:
            raise RuntimeError('No suitable layers found in the model.')

        return pruned_list