def _update_statistics(self, baseline_accuracy, best_model, best_compressed_layers, rank_stats_list, best_index): """ Function that updates the SVD statistics after the svd compression is completed. :param baseline_accuracy: Accuracy of the model before it was compressed. :param best_index: The best rank index that was used to compress the model. :param rank_stats_list: A list of Rank index specific SVD statistics :return: SvdStatistics object that contains all of the SVD Statistics """ compressed_model_accuracy = self._run_model(best_model, self._run_model_iterations, self._use_cuda) cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(self._layer_database) memory_compression_ratio = ModelStats.compute_compression_ratio( best_compressed_layers, CostMetric.memory, network_cost) mac_compression_ratio = ModelStats.compute_compression_ratio( best_compressed_layers, CostMetric.mac, network_cost) stats = stats_u.SvdStatistics(base_accuracy=baseline_accuracy, comp_accuracy=compressed_model_accuracy, cost_metric=self._metric, best_index=best_index, mem_comp_ratio=memory_compression_ratio, mac_comp_ratio=mac_compression_ratio, rank_stats_list=rank_stats_list) return stats
def test_total_model_cost(self): # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph _ = mnist_tf_model.create_model(data_format='channels_last') sess.run(tf.compat.v1.global_variables_initializer()) layer_database = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=None) cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(layer_database) self.assertEqual(800 + 51200 + 3211264 + 10240, network_cost.memory) self.assertEqual(627200 + 10035200 + 3211264 + 10240, network_cost.mac) tf.compat.v1.reset_default_graph() sess.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def compute_per_layer_compression_ratio(orig_layer, split_layers, metric): """ Updates the per layer statistics :param orig_layer: The layer before it was split :param split_layers: List of split layers :param metric: Cost metric :return: The compression ratio of split layers """ cost_calc = cc.CostCalculator() orig_layer_cost = cost_calc.compute_layer_cost(orig_layer) split_layers_cost = cc.Cost(0, 0) for layer in split_layers: split_cost = cost_calc.compute_layer_cost(layer) split_layers_cost += split_cost savings = orig_layer_cost - split_layers_cost if metric is CostMetric.memory: ratio = savings.memory / orig_layer_cost.memory logger.debug('Original Layer Cost: %i Memory Compression Ratio: %f', orig_layer_cost.memory, ratio) else: ratio = savings.mac / orig_layer_cost.mac logger.debug('Original Layer Cost: %i MAC Compression Ratio: %f', orig_layer_cost.mac, ratio) return ratio
def test_total_model_cost(self): logger.debug(self.id()) model = MnistSequentialModel().to("cpu") layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(layer_database) self.assertEqual(800 + 51200 + 3211264 + 10240, network_cost.memory) self.assertEqual(627200 + 10035200 + 3211264 + 10240, network_cost.mac)
def split_manual_rank(self, model, run_model, run_model_iterations, use_cuda, metric, database, **kw_args): """ :param model: The original model :param run_model: Method to run evaluation on model :param run_model_iterations: Number of iterations for run_model :param use_cuda: Model is on GPU or not :param metric: cost metric :param database: Layer attribute database reference :param kw_args: layer rank list :return: """ # pylint: disable=too-many-locals cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(database) layer_rank_list = kw_args['layer_rank_list'] svd_rank_pair_dict = {} for name, module in model.named_modules(): for layer, rank in layer_rank_list: if layer is module: svd_rank_pair_dict[name] = (rank, 0) compressed_model, \ compressed_model_layers, \ layer_stats_list = svd_pruner_deprecated.ModelPruner().create_compressed_model(svd_rank_pair_dict=svd_rank_pair_dict, model=model, compressible_layers=database.get_compressible_layers(), svd_lib_ref=self._svd_lib_ref, metric=metric) model_perf = run_model(compressed_model, run_model_iterations, use_cuda) ms = MS.ModelStats compression_score = ms.compute_compression_ratio( compressed_model_layers, metric, network_cost) rank_data = stats_u.SvdStatistics.PerRankIndex( rank_index=0, model_accuracy=model_perf, model_compression_ratio=compression_score, layer_stats_list=layer_stats_list) rank_data_list = list() rank_data_list.append(rank_data) return rank_data_list, svd_rank_pair_dict
def compute_compression_ratio(compressed_layers, cost_metric, network_cost): """ Computes the compression ratio of a model :param compressed_layers: layers which are compressed :param cost_metric: cost metric is memory or mac :param network_cost: mac and memory cost calculated for the entire network :return: It returns the compression ratio for a network """ cost_calc = cc.CostCalculator() compressed_model_cost = cost_calc.compute_network_cost(compressed_layers) if cost_metric is CostMetric.memory: savings = network_cost.memory - compressed_model_cost.memory ratio = savings/network_cost.memory else: savings = network_cost.mac - compressed_model_cost.mac ratio = savings/network_cost.mac return ratio
def choose_best_rank(self, model, run_model, run_model_iterations, use_cuda, metric, error_margin, baseline_perf, num_rank_indices, database): """ :param model: Original model :param run_model: Method to run evaluation on model :param run_model_iterations: Number of iterations for run_model :param use_cuda: Model is on GPU or not :param metric: cost metric :param error_margin: permissible error allowed for rank selection :param baseline_perf: original model's accuracy :param num_rank_indices: number of rank indices :param database: reference to Layer Attribute Database :return: """ # pylint: disable=too-many-arguments, too-many-locals num_rank_indices = self._select_candidate_ranks(num_rank_indices) cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(database) # Ranks are in order from least compression to highest best_index = None min_objective_score = None # List to hold the SVD Statistics for all the Rank indices rank_stats_list = list() for rank_index in range(num_rank_indices): svd_rank_pair_dict = {} for layer in database.get_selected_layers(): # Get the candidate ranks for given rank index svd_ranks = self._svd_lib_ref.GetCandidateRanks( str(layer.name), rank_index) svd_rank_pair_dict[layer.name] = (svd_ranks[0], 0) # Compress the model given a rank index compressed_model, compressed_layers, layer_stats_list = svd_pruner_deprecated.ModelPruner( ).create_compressed_model( svd_rank_pair_dict=svd_rank_pair_dict, model=model, compressible_layers=database.get_compressible_layers(), svd_lib_ref=self._svd_lib_ref, metric=metric) ms = MS.ModelStats # Estimate relative compression score for this rank_index compression_score = ms.compute_compression_ratio( compressed_layers, metric, network_cost) logger.debug('Rank Index: %i, Compression Score: %f', rank_index, compression_score) # Get accuracy for the compressed model model_perf = run_model(compressed_model, run_model_iterations, use_cuda) model_accuracy = model_perf model_compression_ratio = compression_score objective_score = ms.compute_objective_score( model_perf, compression_score, error_margin, baseline_perf) logger.info( 'Compressed network with rank_index %i/%i: accuracy = %f percent ' 'with %f percent compression (%r option) and an objective score of %f', rank_index, num_rank_indices, model_perf * 100, compression_score * 100, metric, objective_score) if not min_objective_score: min_objective_score = objective_score logger.info( 'Initializing objective score to %f at rank index %i', min_objective_score, rank_index) if model_perf + error_margin / 100 < baseline_perf: logger.info( 'Model performance %f falls below %f percent of baseline performance %f' ' Ending rank selection', model_perf * 100, error_margin, baseline_perf * 100) break else: if objective_score <= min_objective_score: min_objective_score = objective_score logger.info( 'Found a better value for the objective score %f at rank_index %i', min_objective_score, rank_index) best_index = rank_index svd_rank_pair_dict_best_index = svd_rank_pair_dict # Create the Per Rank Index Statistics object. rank_data = stats_u.SvdStatistics.PerRankIndex( rank_index=rank_index, model_accuracy=model_accuracy, model_compression_ratio=model_compression_ratio, layer_stats_list=layer_stats_list) rank_stats_list.append(rank_data) if not best_index: raise RuntimeError( 'No suitable ranks found to compress model within defined error bounds.' ) return best_index, svd_rank_pair_dict_best_index, rank_stats_list
def _pick_compression_layers(self, cost_metric, layer_select_scheme, **kwargs): """ Function to pick top N layer based on selection threshold provided by user and then store layer attributes for MO(ModelOptimization) like shape of weight matrix, activation dimensions, weights and bias etc. :param run_model: The function to use for running data through the graph to calculate input and output shape of layer (activation dimensions). This function will be used with the custom hook to feed 1 iteration of data. """ # pylint: disable=too-many-locals, too-many-branches # Sanity check if not isinstance(cost_metric, CostMetric): raise TypeError("cost_metric is not of type CostMetric") if not isinstance(layer_select_scheme, LayerSelectionScheme): raise TypeError( "layer_selection_scheme is not of type Svd.LayerSelectionScheme" ) # register custom hook for the model with run_graph provided by user # if the user wants to experiment with custom hook, we can support that option by # exposing the hook parameter to compress_net method pruned_list = [] # cache the layer attributes list for further processing for layer in self._layer_database: # Heuristic1: Reject any ops whose param shape does not meet a base criterion if self._check_layer_with_smaller_dimensions(layer.module): pruned_list.append(layer) else: logger.debug("Pruning out %r: shape is %r", layer.module, layer.module.weight.size()) # Reset list of layers for the next phase layers = pruned_list pruned_list = [] # Create a list of layer, cost tuples layer_cost_pairs = [] for layer in layers: cost = cc.CostCalculator.compute_layer_cost(layer) layer_cost_pairs.append((layer, cost)) # Sort list of layer-cost pairs if cost_metric == CostMetric.memory: layer_cost_pairs.sort(key=lambda x: x[1].memory, reverse=True) else: layer_cost_pairs.sort(key=lambda x: x[1].mac, reverse=True) if layer_select_scheme == LayerSelectionScheme.top_n_layers: num_layers = kwargs['num_layers'] pruned_list_of_pairs = layer_cost_pairs[:num_layers] pruned_list = [pair[0] for pair in pruned_list_of_pairs] elif layer_select_scheme == LayerSelectionScheme.top_x_percent: percent_thresh = kwargs['percent_thresh'] # get the network cost for Memory and MAC cost_calc = cc.CostCalculator() network_cost = cost_calc.compute_model_cost(self._layer_database) network_cost = network_cost.memory if cost_metric == CostMetric.memory else network_cost.mac accum_cost = 0. logger.debug("Total network cost: %f", network_cost) logger.debug( "Picking layers contributing to : %f (percent) of total network cost.", percent_thresh) for layer, cost in layer_cost_pairs: cost = cost.memory if (cost_metric == CostMetric.memory) else cost.mac if (100 * (cost + accum_cost) / network_cost) < percent_thresh: accum_cost += cost pruned_list.append(layer) logger.debug("Layer Picked : %s with cost : %f", layer.module, cost) logger.debug("Accumulated cost so far : %f", accum_cost) elif layer_select_scheme == LayerSelectionScheme.manual: layers_to_compress = kwargs['layers_to_compress'] for layer, _ in layer_cost_pairs: if layer.module in layers_to_compress: pruned_list.append(layer) if not pruned_list: raise RuntimeError('No suitable layers found in the model.') return pruned_list