def test_calculate_weight_svd_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] for layer in layer_database: if isinstance(layer.module, nn.Conv2d): layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) else: layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) compressed_cost = cc.WeightSvdCostCalculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual(7031800, compressed_cost.mac)
def test_sort_on_occurrence(self): class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=3) self.conv2 = nn.Conv2d(10, 10, kernel_size=3) self.conv3 = nn.Conv2d(10, 10, kernel_size=3) self.conv4 = nn.Conv2d(10, 10, kernel_size=3) self.fc1 = nn.Linear(490, 300) self.fc2 = nn.Linear(300, 10) def forward(self, x): x = functional.relu(functional.max_pool2d(self.conv1(x), 2)) x = functional.relu(self.conv2(x)) x = functional.relu(self.conv3(x)) x = functional.relu(self.conv4(x)) x = x.view(x.size(0), -1) x = functional.relu(self.fc1(x)) x = self.fc2(x) return functional.log_softmax(x, dim=1) orig_model = Net() data_loader = unittest.mock.MagicMock() number_of_batches = unittest.mock.MagicMock() samples_per_image = unittest.mock.MagicMock() input_channel_pruner = InputChannelPruner( data_loader=data_loader, input_shape=(1, 1, 28, 28), num_reconstruction_samples=number_of_batches, allow_custom_downsample_ops=True) layer_comp_ratio_list = [ LayerCompRatioPair(Layer(orig_model.conv4, None, None), None), LayerCompRatioPair(Layer(orig_model.conv1, None, None), None), LayerCompRatioPair(Layer(orig_model.conv3, None, None), None), LayerCompRatioPair(Layer(orig_model.conv2, None, None), None) ] sorted_layer_comp_ratio_list = input_channel_pruner._sort_on_occurrence( orig_model, layer_comp_ratio_list) self.assertEqual(sorted_layer_comp_ratio_list[0].layer.module, orig_model.conv1) self.assertEqual(sorted_layer_comp_ratio_list[1].layer.module, orig_model.conv2) self.assertEqual(sorted_layer_comp_ratio_list[2].layer.module, orig_model.conv3) self.assertEqual(sorted_layer_comp_ratio_list[3].layer.module, orig_model.conv4) self.assertTrue( isinstance(sorted_layer_comp_ratio_list[0].layer, Layer)) self.assertTrue( isinstance(sorted_layer_comp_ratio_list[1].layer, Layer)) self.assertTrue( isinstance(sorted_layer_comp_ratio_list[2].layer, Layer)) self.assertTrue( isinstance(sorted_layer_comp_ratio_list[3].layer, Layer))
def test_prune_model_2_layers(self): """ Punning two layers with 0.5 comp-ratio in MNIST""" # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph _ = mnist_tf_model.create_model(data_format='channels_last') sess.run(tf.compat.v1.global_variables_initializer()) # Create a layer database orig_layer_db = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=None) conv1 = orig_layer_db.find_layer_by_name('conv2d/Conv2D') conv2 = orig_layer_db.find_layer_by_name('conv2d_1/Conv2D') layer_comp_ratio_list = [ LayerCompRatioPair(conv1, Decimal(0.5)), LayerCompRatioPair(conv2, Decimal(0.5)) ] spatial_svd_pruner = SpatialSvdPruner() comp_layer_db = spatial_svd_pruner.prune_model(orig_layer_db, layer_comp_ratio_list, CostMetric.mac, trainer=None) conv1_a = comp_layer_db.find_layer_by_name('conv2d_a/Conv2D') conv1_b = comp_layer_db.find_layer_by_name('conv2d_b/Conv2D') # Weights shape [kh, kw, Nic, Noc] self.assertEqual([5, 1, 1, 2], conv1_a.module.inputs[1].get_shape().as_list()) self.assertEqual([1, 5, 2, 32], conv1_b.module.inputs[1].get_shape().as_list()) conv2_a = comp_layer_db.find_layer_by_name('conv2d_1_a/Conv2D') conv2_b = comp_layer_db.find_layer_by_name('conv2d_1_b/Conv2D') self.assertEqual([5, 1, 32, 53], conv2_a.module.inputs[1].get_shape().as_list()) self.assertEqual([1, 5, 53, 64], conv2_b.module.inputs[1].get_shape().as_list()) for layer in comp_layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module.name)) tf.compat.v1.reset_default_graph() sess.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def test_prune_model_2_layers(self): model = mnist_torch_model.Net() # Create a layer database orig_layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) conv1 = comp_layer_db.find_layer_by_name('conv1') conv2 = comp_layer_db.find_layer_by_name('conv2') pruner = SpatialSvdPruner() layer_db = pruner.prune_model(orig_layer_db, [ LayerCompRatioPair(conv1, Decimal(0.5)), LayerCompRatioPair(conv2, Decimal(0.5)) ], CostMetric.mac, trainer=None) conv1_a = layer_db.find_layer_by_name('conv1.0') conv1_b = layer_db.find_layer_by_name('conv1.1') self.assertEqual((5, 1), conv1_a.module.kernel_size) self.assertEqual(1, conv1_a.module.in_channels) self.assertEqual(2, conv1_a.module.out_channels) self.assertEqual((1, 5), conv1_b.module.kernel_size) self.assertEqual(2, conv1_b.module.in_channels) self.assertEqual(32, conv1_b.module.out_channels) conv2_a = layer_db.find_layer_by_name('conv2.0') conv2_b = layer_db.find_layer_by_name('conv2.1') self.assertEqual((5, 1), conv2_a.module.kernel_size) self.assertEqual(32, conv2_a.module.in_channels) self.assertEqual(53, conv2_a.module.out_channels) self.assertEqual((1, 5), conv2_b.module.kernel_size) self.assertEqual(53, conv2_b.module.in_channels) self.assertEqual(64, conv2_b.module.out_channels) self.assertTrue(isinstance(layer_db.model.conv1, torch.nn.Sequential)) self.assertTrue(isinstance(layer_db.model.conv2, torch.nn.Sequential)) for layer in layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(layer_db.model)
def get_compressed_model_cost(cls, layer_db, layer_ratio_list, original_model_cost, cost_metric): """ computes compressed model cost metric with all layers included :param layer: layer data base :param layer: layer ratio list :param layer: original model cost :param layer: cost metric :return: comp ratio for compressed model """ # Add the layers that were not selected to this list to get the accurate cost of the compressed model for layer in layer_db: if layer not in layer_db.get_selected_layers(): layer_ratio_list.append(LayerCompRatioPair(layer, None)) # Calculate compressed model cost compressed_model_cost = cls.calculate_compressed_cost( layer_db, layer_ratio_list, cost_metric) if cost_metric == CostMetric.memory: current_comp_ratio = Decimal(compressed_model_cost.memory / original_model_cost.memory) else: current_comp_ratio = Decimal(compressed_model_cost.mac / original_model_cost.mac) return current_comp_ratio
def _sort_on_occurrence(self, sess: tf.compat.v1.Session, layer_comp_ratio_list: List[LayerCompRatioPair]) -> \ List[LayerCompRatioPair]: """ Function takes session and list of conv layer-comp ratio to sort, and sorts them based on occurrence in the model. :param sess: tf.compat.v1.Session :param layer_comp_ratio_list: layer compression ratio list :return: sorted_layer_comp_ratio_List """ sorted_layer_comp_ratio_list = [] ordered_ops = get_ordered_ops(graph=sess.graph, starting_op_names=self._input_op_names, output_op_names=self._output_op_names) for op in ordered_ops: if is_op_compressible(op): for pair in layer_comp_ratio_list: if op.name == pair.layer.name: sorted_layer_comp_ratio_list.append( LayerCompRatioPair(pair.layer, pair.comp_ratio)) return sorted_layer_comp_ratio_list
def test_calculate_spatial_svd_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) model_cost = cc.SpatialSvdCostCalculator.compute_model_cost( layer_database) self.assertEqual(627200 + 10035200 + 3211264 + 10240, model_cost.mac) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] for layer in layer_database: layer_ratio_list.append(LayerCompRatioPair(layer, Decimal(0.5))) compressed_cost = cc.SpatialSvdCostCalculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual( 5244960 + (3136 * 385 + 385 * 1024) + (1024 * 4 + 4 * 10), compressed_cost.mac)
def _compute_compressed_model_cost(self, layer_ratio_list, original_model_cost): """ Compute compression score Add the layers that were not selected to this list to get the accurate cost of the compressed model. :param layer_ratio_list: layers, comp ratio pair list :param original_model_cost: cost of the original model :return: model compression ratio of compressed model """ for layer in self._layer_db: if layer not in self._layer_db.get_selected_layers(): layer_ratio_list.append(LayerCompRatioPair(layer, None)) # Calculate compressed model cost compressed_model_cost = self._cost_calculator.calculate_compressed_cost( self._layer_db, layer_ratio_list, self._cost_metric) if self._cost_metric == CostMetric.memory: model_compression_ratio = Decimal(compressed_model_cost.memory / original_model_cost.memory) else: model_compression_ratio = Decimal(compressed_model_cost.mac / original_model_cost.mac) return model_compression_ratio
def sorting_hook(module, _inp, _out): """ hook to sort modules based on occurrence """ for pair in layer_comp_ratio_list: if pair.layer.module == module: sorted_layer_comp_ratio_list.append( LayerCompRatioPair(pair.layer, pair.comp_ratio))
def _compute_layerwise_eval_score_per_comp_ratio_candidate( self, tabular_progress_object, progress_bar, layer: Layer) -> Dict[Decimal, float]: """ Computes eval scores for each compression-ratio candidate for a given layer :param layer: Layer for which to calculate eval scores :return: Dictionary of {compression_ratio: eval_score} for each compression-ratio candidate """ layer_wise_eval_scores_dict = {} # Only publish plots to a document if a bokeh server session exists if self.bokeh_session: # plot to visualize the evaluation scores as they update for each layer layer_wise_eval_scores_plot = LinePlot( x_axis_label="Compression Ratios", y_axis_label="Eval Scores", title=layer.name, bokeh_session=self.bokeh_session) # Loop over each candidate for comp_ratio in self._comp_ratio_candidates: logger.info( "Analyzing compression ratio: %s =====================>", comp_ratio) # Prune layer given this comp ratio pruned_layer_db = self._pruner.prune_model( self._layer_db, [LayerCompRatioPair(layer, comp_ratio)], self._cost_metric, trainer=None) eval_score = self._eval_func(pruned_layer_db.model, self._eval_iter, use_cuda=self._is_cuda) layer_wise_eval_scores_dict[comp_ratio] = eval_score # destroy the layer database pruned_layer_db.destroy() pruned_layer_db = None logger.info("Layer %s, comp_ratio %f ==> eval_score=%f", layer.name, comp_ratio, eval_score) if self.bokeh_session: layer_wise_eval_scores_plot.update(new_x_coordinate=comp_ratio, new_y_coordinate=eval_score) # Update the data table by adding the computed eval score tabular_progress_object.update_table(str(comp_ratio), layer.name, eval_score) # Update the progress bar progress_bar.update() # remove plot so that we have a fresh figure to visualize for the next layer. if self.bokeh_session: layer_wise_eval_scores_plot.remove_plot() return layer_wise_eval_scores_dict
def _get_layer_pairs(layer_db: LayerDatabase, module_comp_ratio_pairs: List[ModuleCompRatioPair]): layer_comp_ratio_pairs = [] for pair in module_comp_ratio_pairs: layer_comp_ratio_pair = LayerCompRatioPair(layer_db.find_layer_by_module(pair.module), pair.comp_ratio) layer_comp_ratio_pairs.append(layer_comp_ratio_pair) return layer_comp_ratio_pairs
def _find_all_comp_ratios_given_eval_score(self, given_eval_score, eval_scores_dict): layer_ratio_list = [] for layer in self._layer_db.get_selected_layers(): comp_ratio = self._find_layer_comp_ratio_given_eval_score( eval_scores_dict, given_eval_score, layer) layer_ratio_list.append(LayerCompRatioPair(layer, comp_ratio)) return layer_ratio_list
def test_prune_model_tf_slim(self): """ Punning a model with tf slim api """ # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph x = tf.compat.v1.placeholder(tf.float32, [1, 32, 32, 3]) _ = tf_slim_basic_model(x) sess.run(tf.compat.v1.global_variables_initializer()) conn_graph_orig = ConnectedGraph(sess.graph, ['Placeholder'], ['tf_slim_model/Softmax']) num_ops_orig = len(conn_graph_orig.get_all_ops()) # Create a layer database orig_layer_db = LayerDatabase(model=sess, input_shape=(1, 32, 32, 3), working_dir=None) conv1 = orig_layer_db.find_layer_by_name('Conv_1/Conv2D') conv1_bias = BiasUtils.get_bias_as_numpy_data(orig_layer_db.model, conv1.module) layer_comp_ratio_list = [LayerCompRatioPair(conv1, Decimal(0.5))] spatial_svd_pruner = SpatialSvdPruner() comp_layer_db = spatial_svd_pruner.prune_model(orig_layer_db, layer_comp_ratio_list, CostMetric.mac, trainer=None) # Check that svd added these ops _ = comp_layer_db.model.graph.get_operation_by_name('Conv_1_a/Conv2D') _ = comp_layer_db.model.graph.get_operation_by_name('Conv_1_b/Conv2D') conn_graph_new = ConnectedGraph(comp_layer_db.model.graph, ['Placeholder'], ['tf_slim_model/Softmax']) num_ops_new = len(conn_graph_new.get_all_ops()) self.assertEqual(num_ops_orig + 1, num_ops_new) bias_add_op = comp_layer_db.model.graph.get_operation_by_name( 'Conv_1_b/BiasAdd') conv_1_b_op = comp_layer_db.model.graph.get_operation_by_name( 'Conv_1_b/Conv2D') self.assertEqual( conn_graph_new._module_identifier.get_op_info(bias_add_op), conn_graph_new._module_identifier.get_op_info(conv_1_b_op)) self.assertTrue( np.array_equal( conv1_bias, BiasUtils.get_bias_as_numpy_data(comp_layer_db.model, conv_1_b_op)))
def test_select_per_layer_comp_ratios_with_spatial_svd_pruner(self): pruner = SpatialSvdPruner() eval_func = unittest.mock.MagicMock() rounding_algo = unittest.mock.MagicMock() eval_func.side_effect = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 11, 21, 31, 35, 40, 45, 50, 55, 60 ] rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] model = mnist_torch_model.Net() layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) selected_layers = [ layer for layer in layer_db if isinstance(layer.module, nn.Conv2d) ] layer_db.mark_picked_layers(selected_layers) # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.4), 10, True, None, rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.3), actual_compression_ratio, abs_tol=0.8)) print('\n') for pair in layer_comp_ratio_list: print(pair)
def test_prune_model_2_layers(self): model = mnist_model.Net() # Create a layer database layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) fc1 = layer_db.find_layer_by_name('fc1') conv2 = layer_db.find_layer_by_name('conv2') pruner = WeightSvdPruner() layer_db = pruner.prune_model(layer_db, [LayerCompRatioPair(fc1, Decimal(0.5)), LayerCompRatioPair(conv2, Decimal(0.5))], aimet_common.defs.CostMetric.mac, trainer=None) fc1_a = layer_db.find_layer_by_name('fc1.0') fc1_b = layer_db.find_layer_by_name('fc1.1') self.assertEqual(3136, fc1_a.module.in_features) self.assertEqual(1024, fc1_b.module.out_features) conv2_a = layer_db.find_layer_by_name('conv2.0') conv2_b = layer_db.find_layer_by_name('conv2.1') self.assertEqual((1, 1), conv2_a.module.kernel_size) self.assertEqual(32, conv2_a.module.in_channels) self.assertEqual(15, conv2_a.module.out_channels) self.assertEqual((5, 5), conv2_b.module.kernel_size) self.assertEqual(15, conv2_b.module.in_channels) self.assertEqual(64, conv2_b.module.out_channels) self.assertTrue(isinstance(layer_db.model.fc1, nn.Sequential)) self.assertTrue(isinstance(layer_db.model.conv2, nn.Sequential)) for layer in layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(layer_db.model)
def test_calculate_channel_pruning_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] # Unfortunately in mnist we can only input channel prune conv2 for layer in layer_database: if layer.module is model.conv2: layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) else: layer_ratio_list.append(LayerCompRatioPair(layer, None)) # Create the Input channel pruner dataset_size = 1000 batch_size = 10 # create fake data loader with image size (1, 28, 28) data_loader = self.create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size) pruner = InputChannelPruner(data_loader=data_loader, input_shape=(1, 1, 28, 28), num_reconstruction_samples=10, allow_custom_downsample_ops=True) cost_calculator = ChannelPruningCostCalculator(pruner) compressed_cost = cost_calculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual(8552704, compressed_cost.mac)
def test_calculate_spatial_svd_cost_all_layers(self): # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph _ = mnist_tf_model.create_model(data_format='channels_last') sess.run(tf.compat.v1.global_variables_initializer()) layer_database = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=None) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] for layer in layer_database: if layer.module.type == 'Conv2D': layer_ratio_list.append(LayerCompRatioPair( layer, Decimal(0.5))) else: layer_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = cc.SpatialSvdCostCalculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual(8466464, compressed_cost.mac) tf.compat.v1.reset_default_graph() sess.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def _compute_comp_ratios_and_eval_scores(self, rank_index): """ :param rank_index: Rank index for which the comp ratio and eval score need to be computed across layers :return: layers<->comp_ratio<->eval_score associations for input rank index """ comp_ratio_eval_score_across_layers = [] layer_ratio_list = [] for layer in self._layer_db.get_selected_layers(): # Get the candidate rank for given rank index and layer rank = self._svd_lib_ref.GetCandidateRanks(str(layer.name), rank_index) # Get compression ratio for this layer ad rank index comp_ratio = self._cost_calculator.calculate_comp_ratio_given_rank( layer, rank[0], self._cost_metric) # Eval_score for this comp_ratio pruned_layer_db = self._pruner.prune_model( self._layer_db, [LayerCompRatioPair(layer=layer, comp_ratio=comp_ratio)], self._cost_metric, None) eval_score = self._eval_func(pruned_layer_db.model, self._eval_iter, use_cuda=self._is_cuda) # destroy the layer database pruned_layer_db.destroy() pruned_layer_db = None comp_ratio_eval_score_across_layers.append( LayerCompRatioEvalScore(layer, comp_ratio, eval_score)) layer_ratio_list.append( LayerCompRatioPair(layer=layer, comp_ratio=comp_ratio)) return layer_ratio_list, comp_ratio_eval_score_across_layers
def test_prune_conv_no_bias(self): """ Test spatial svd on a conv layer with no bias """ # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph inputs = tf.keras.Input(shape=( 32, 32, 3, )) x = tf.keras.layers.Conv2D(32, (3, 3), use_bias=False)(inputs) _ = tf.keras.layers.Flatten()(x) sess.run(tf.compat.v1.global_variables_initializer()) # Create a layer database orig_layer_db = LayerDatabase(model=sess, input_shape=(1, 32, 32, 3), working_dir=None) conv_op = orig_layer_db.find_layer_by_name('conv2d/Conv2D') layer_comp_ratio_list = [LayerCompRatioPair(conv_op, Decimal(0.5))] spatial_svd_pruner = SpatialSvdPruner() comp_layer_db = spatial_svd_pruner.prune_model(orig_layer_db, layer_comp_ratio_list, CostMetric.mac, trainer=None) # Check that svd added these ops _ = comp_layer_db.model.graph.get_operation_by_name('conv2d_a/Conv2D') conv2d_b_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d_b/Conv2D') reshape_op = comp_layer_db.model.graph.get_operation_by_name( 'flatten/Reshape') self.assertEqual(conv2d_b_op, reshape_op.inputs[0].op)
def _calculate_model_comp_ratio_for_given_eval_score( self, eval_score, eval_scores_dict, original_model_cost): # Calculate the compression ratios for each layer based on this score layer_ratio_list = self._find_all_comp_ratios_given_eval_score( eval_score, eval_scores_dict) for layer in self._layer_db: if layer not in self._layer_db.get_selected_layers(): layer_ratio_list.append(LayerCompRatioPair(layer, None)) # Calculate compressed model cost compressed_model_cost = self._cost_calculator.calculate_compressed_cost( self._layer_db, layer_ratio_list, self._cost_metric) if self._cost_metric == CostMetric.memory: current_comp_ratio = Decimal(compressed_model_cost.memory / original_model_cost.memory) else: current_comp_ratio = Decimal(compressed_model_cost.mac / original_model_cost.mac) return current_comp_ratio
def test_prune_model(self): """Test end to end prune model with Mnist""" class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=3) self.max_pool2d = nn.MaxPool2d(2) self.relu1 = nn.ReLU() self.conv2 = nn.Conv2d(10, 20, kernel_size=3) self.relu2 = nn.ReLU() self.conv3 = nn.Conv2d(20, 30, kernel_size=3) self.relu3 = nn.ReLU() self.conv4 = nn.Conv2d(30, 40, kernel_size=3) self.relu4 = nn.ReLU() self.fc1 = nn.Linear(7 * 7 * 40, 300) self.relu5 = nn.ReLU() self.fc2 = nn.Linear(300, 10) self.log_softmax = nn.LogSoftmax(dim=1) def forward(self, x): x = self.relu1(self.max_pool2d(self.conv1(x))) x = self.relu2(self.conv2(x)) x = self.relu3(self.conv3(x)) x = self.relu4(self.conv4(x)) x = x.view(x.size(0), -1) x = self.relu5(self.fc1(x)) x = self.fc2(x) return self.log_softmax(x) orig_model = Net() orig_model.eval() # Create a layer database orig_layer_db = LayerDatabase(orig_model, input_shape=(1, 1, 28, 28)) dataset_size = 1000 batch_size = 10 # max out number of batches number_of_batches = 100 samples_per_image = 10 # create fake data loader with image size (1, 28, 28) data_loader = create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size) input_channel_pruner = InputChannelPruner( data_loader=data_loader, input_shape=(1, 1, 28, 28), num_reconstruction_samples=number_of_batches, allow_custom_downsample_ops=True) # keeping compression ratio = 0.5 for all layers layer_comp_ratio_list = [ LayerCompRatioPair(Layer(orig_model.conv4, 'conv4', None), 0.5), LayerCompRatioPair(Layer(orig_model.conv3, 'conv3', None), 0.5), LayerCompRatioPair(Layer(orig_model.conv2, 'conv2', None), 0.5) ] comp_layer_db = input_channel_pruner.prune_model(orig_layer_db, layer_comp_ratio_list, CostMetric.mac, trainer=None) self.assertEqual(comp_layer_db.model.conv2.in_channels, 5) self.assertEqual(comp_layer_db.model.conv2.out_channels, 10) self.assertEqual(comp_layer_db.model.conv3.in_channels, 10) self.assertEqual(comp_layer_db.model.conv3.out_channels, 15) self.assertEqual(comp_layer_db.model.conv4.in_channels, 15) self.assertEqual(comp_layer_db.model.conv4.out_channels, 40)
def test_prune_model_with_seq(self): """Test end to end prune model with resnet18""" batch_size = 2 dataset_size = 1000 number_of_batches = 1 samples_per_image = 10 num_reconstruction_samples = number_of_batches * batch_size * samples_per_image resnet18_model = models.resnet18(pretrained=True) resnet18_model.eval() # Create a layer database orig_layer_db = LayerDatabase(resnet18_model, input_shape=(1, 3, 224, 224)) data_loader = create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size, image_size=(3, 224, 224)) input_channel_pruner = InputChannelPruner( data_loader=data_loader, input_shape=(1, 3, 224, 224), num_reconstruction_samples=num_reconstruction_samples, allow_custom_downsample_ops=True) # keeping compression ratio = 0.5 for all layers layer_comp_ratio_list = [ LayerCompRatioPair( Layer(resnet18_model.layer4[1].conv1, 'layer4.1.conv1', None), 0.5), LayerCompRatioPair( Layer(resnet18_model.layer3[1].conv1, 'layer3.1.conv1', None), 0.5), LayerCompRatioPair( Layer(resnet18_model.layer2[1].conv1, 'layer2.1.conv1', None), 0.5), LayerCompRatioPair( Layer(resnet18_model.layer1[1].conv1, 'layer1.1.conv1', None), 0.5), LayerCompRatioPair( Layer(resnet18_model.layer1[0].conv2, 'layer1.0.conv2', None), 0.5) ] comp_layer_db = input_channel_pruner.prune_model(orig_layer_db, layer_comp_ratio_list, CostMetric.mac, trainer=None) # 1) not below split self.assertEqual(comp_layer_db.model.layer1[0].conv2.in_channels, 32) self.assertEqual(comp_layer_db.model.layer1[0].conv2.out_channels, 64) self.assertEqual( list(comp_layer_db.model.layer1[0].conv2.weight.shape), [64, 32, 3, 3]) # impacted self.assertEqual(comp_layer_db.model.layer1[0].conv1.in_channels, 64) self.assertEqual(comp_layer_db.model.layer1[0].conv1.out_channels, 32) self.assertEqual( list(comp_layer_db.model.layer1[0].conv1.weight.shape), [32, 64, 3, 3]) # 2) below split # 64 * .5 self.assertEqual(comp_layer_db.model.layer1[1].conv1[1].in_channels, 32) self.assertEqual(comp_layer_db.model.layer1[1].conv1[1].out_channels, 64) self.assertEqual( list(comp_layer_db.model.layer1[1].conv1[1].weight.shape), [64, 32, 3, 3]) # 128 * .5 self.assertEqual(comp_layer_db.model.layer2[1].conv1[1].in_channels, 64) self.assertEqual(comp_layer_db.model.layer2[1].conv1[1].out_channels, 128) self.assertEqual( list(comp_layer_db.model.layer2[1].conv1[1].weight.shape), [128, 64, 3, 3]) # 256 * .5 self.assertEqual(comp_layer_db.model.layer3[1].conv1[1].in_channels, 128) self.assertEqual(comp_layer_db.model.layer3[1].conv1[1].out_channels, 256) self.assertEqual( list(comp_layer_db.model.layer3[1].conv1[1].weight.shape), [256, 128, 3, 3]) # 512 * .5 self.assertEqual(comp_layer_db.model.layer4[1].conv1[1].in_channels, 256) self.assertEqual(comp_layer_db.model.layer4[1].conv1[1].out_channels, 512) self.assertEqual( list(comp_layer_db.model.layer4[1].conv1[1].weight.shape), [512, 256, 3, 3])
def test_calculate_channel_pruning_cost_all_layers(self): config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # model will be constructed in default graph _ = mnist(data_format='channels_last') # initialize the weights and biases with appropriate initializer sess.run(tf.compat.v1.global_variables_initializer()) meta_path = str('./temp_working_dir/') if not os.path.exists(meta_path): os.mkdir(meta_path) layer_db = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=meta_path) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] # Unfortunately in mnist we can only input channel prune conv2d_1/Conv2D for layer in layer_db: if layer.module.name == 'conv2d_1/Conv2D': layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) else: layer_ratio_list.append(LayerCompRatioPair(layer, None)) inp_op_names = ['reshape_input'] output_op_names = ['dense_1/BiasAdd'] data_set = unittest.mock.MagicMock() batch_size = unittest.mock.MagicMock() num_reconstruction_samples = unittest.mock.MagicMock() pruner = InputChannelPruner( input_op_names=inp_op_names, output_op_names=output_op_names, data_set=data_set, batch_size=batch_size, num_reconstruction_samples=num_reconstruction_samples, allow_custom_downsample_ops=True) cost_calculator = ChannelPruningCostCalculator(pruner) compressed_cost = cost_calculator.calculate_compressed_cost( layer_db, layer_ratio_list, CostMetric.mac) self.assertEqual(8552704, compressed_cost.mac) self.assertEqual(3247504, compressed_cost.memory) # delete the meta and the checkpoint files shutil.rmtree(meta_path) layer_db.model.close()
def test_calculate_channel_pruning_cost_two_layers(self): """ test compressed model cost using two layers :return: """ config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # model will be constructed in default graph test_models.single_residual() init = tf.compat.v1.global_variables_initializer() # initialize the weights and biases with appropriate initializer sess.run(init) meta_path = str('./temp_working_dir/') if not os.path.exists(meta_path): os.mkdir(meta_path) layer_db = LayerDatabase(model=sess, input_shape=None, working_dir=meta_path) # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] layer_names = ['conv2d_2/Conv2D', 'conv2d_3/Conv2D'] for layer in layer_db: if layer.module.name in layer_names: layer_ratio_list.append(LayerCompRatioPair(layer, 0.5)) else: layer_ratio_list.append(LayerCompRatioPair(layer, None)) input_op_names = ['input_1'] output_op_names = ['single_residual/Softmax'] data_set = unittest.mock.MagicMock() batch_size = unittest.mock.MagicMock() num_reconstruction_samples = unittest.mock.MagicMock() pruner = InputChannelPruner( input_op_names=input_op_names, output_op_names=output_op_names, data_set=data_set, batch_size=batch_size, num_reconstruction_samples=num_reconstruction_samples, allow_custom_downsample_ops=True) cost_calculator = ChannelPruningCostCalculator(pruner) compressed_cost = cost_calculator.calculate_compressed_cost( layer_db, layer_ratio_list, CostMetric.mac) self.assertEqual(108544, compressed_cost.mac) self.assertEqual(1264, compressed_cost.memory) # delete the meta and the checkpoint files shutil.rmtree(meta_path) layer_db.model.close()
def test_prune_model(self): """ Test end-to-end prune_model with VGG16-imagenet """ AimetLogger.set_area_logger_level(AimetLogger.LogAreas.Winnow, logging.INFO) tf.compat.v1.reset_default_graph() batch_size = 1 input_data = np.random.rand(100, 224, 224, 3) dataset = tf.data.Dataset.from_tensor_slices(input_data) dataset = dataset.batch(batch_size=batch_size) orig_g = tf.Graph() with orig_g.as_default(): _ = VGG16(weights=None, input_shape=(224, 224, 3), include_top=False) orig_init = tf.compat.v1.global_variables_initializer() input_op_names = ['input_1'] output_op_names = ['block5_pool/MaxPool'] # create sess with graph orig_sess = tf.compat.v1.Session(graph=orig_g) # initialize all the variables in VGG16 orig_sess.run(orig_init) # create layer database layer_db = LayerDatabase(model=orig_sess, input_shape=(1, 224, 224, 3), working_dir=None) block1_conv2 = layer_db.model.graph.get_operation_by_name( 'block1_conv2/Conv2D') block2_conv1 = layer_db.model.graph.get_operation_by_name( 'block2_conv1/Conv2D') block2_conv2 = layer_db.model.graph.get_operation_by_name( 'block2_conv2/Conv2D') # output shape in NCHW format block1_conv2_output_shape = block1_conv2.outputs[0].shape block2_conv1_output_shape = block2_conv1.outputs[0].shape block2_conv2_output_shape = block2_conv2.outputs[0].shape # keeping compression ratio = 0.5 for all layers layer_comp_ratio_list = [ LayerCompRatioPair( Layer(model=layer_db.model, op=block1_conv2, output_shape=block1_conv2_output_shape), 0.5), LayerCompRatioPair( Layer(model=layer_db.model, op=block2_conv1, output_shape=block2_conv1_output_shape), 0.5), LayerCompRatioPair( Layer(model=layer_db.model, op=block2_conv2, output_shape=block2_conv2_output_shape), 0.5) ] cp = InputChannelPruner(input_op_names=input_op_names, output_op_names=output_op_names, data_set=dataset, batch_size=batch_size, num_reconstruction_samples=20, allow_custom_downsample_ops=True) comp_layer_db = cp.prune_model( layer_db=layer_db, layer_comp_ratio_list=layer_comp_ratio_list, cost_metric=CostMetric.mac, trainer=None) pruned_block1_conv2 = comp_layer_db.find_layer_by_name( 'reduced_reduced_block1_conv2/Conv2D') pruned_block2_conv1 = comp_layer_db.find_layer_by_name( 'reduced_reduced_block2_conv1/Conv2D') pruned_block2_conv2 = comp_layer_db.find_layer_by_name( 'reduced_block2_conv2/Conv2D') # input channels = 64 * 0.5 = 32 # output channels = 64 * 0.5 = 32 self.assertEqual(pruned_block1_conv2.weight_shape[1], 32) self.assertEqual(pruned_block1_conv2.weight_shape[0], 32) # input channels = 64 * 0.5 = 32 # output channels = 128 * 0.5 = 64 self.assertEqual(pruned_block2_conv1.weight_shape[1], 32) self.assertEqual(pruned_block2_conv1.weight_shape[0], 64) # input channels = 128 * 0.5 = 64 # output channels = 128 self.assertEqual(pruned_block2_conv2.weight_shape[1], 64) self.assertEqual(pruned_block2_conv2.weight_shape[0], 128) layer_db.model.close() comp_layer_db.model.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def test_sort_on_occurrence(self): """ Test sorting of ops based on occurrence """ AimetLogger.set_area_logger_level(AimetLogger.LogAreas.Winnow, logging.INFO) tf.compat.v1.reset_default_graph() orig_g = tf.Graph() with orig_g.as_default(): _ = VGG16(weights=None, input_shape=(224, 224, 3), include_top=False) orig_init = tf.compat.v1.global_variables_initializer() # create sess with graph orig_sess = tf.compat.v1.Session(graph=orig_g) orig_sess.run(orig_init) # create layer database layer_db = LayerDatabase(model=orig_sess, input_shape=(1, 224, 224, 3), working_dir=None) block1_conv2 = layer_db.model.graph.get_operation_by_name( 'block1_conv2/Conv2D') block2_conv1 = layer_db.model.graph.get_operation_by_name( 'block2_conv1/Conv2D') block2_conv2 = layer_db.model.graph.get_operation_by_name( 'block2_conv2/Conv2D') block5_conv3 = layer_db.model.graph.get_operation_by_name( 'block5_conv3/Conv2D') # output shape in NCHW format block1_conv2_output_shape = block1_conv2.outputs[0].shape block2_conv1_output_shape = block2_conv1.outputs[0].shape block2_conv2_output_shape = block2_conv2.outputs[0].shape block5_conv3_output_shape = block5_conv3.outputs[0].shape # keeping compression ratio = None for all layers layer_comp_ratio_list = [ LayerCompRatioPair( Layer(model=layer_db.model, op=block5_conv3, output_shape=block5_conv3_output_shape), None), LayerCompRatioPair( Layer(model=layer_db.model, op=block2_conv2, output_shape=block2_conv2_output_shape), None), LayerCompRatioPair( Layer(model=layer_db.model, op=block1_conv2, output_shape=block1_conv2_output_shape), None), LayerCompRatioPair( Layer(model=layer_db.model, op=block2_conv1, output_shape=block2_conv1_output_shape), None) ] input_op_names = ['input_1'] output_op_names = ['block5_pool/MaxPool'] dataset = unittest.mock.MagicMock() batch_size = unittest.mock.MagicMock() num_reconstruction_samples = unittest.mock.MagicMock() cp = InputChannelPruner( input_op_names=input_op_names, output_op_names=output_op_names, data_set=dataset, batch_size=batch_size, num_reconstruction_samples=num_reconstruction_samples, allow_custom_downsample_ops=True) sorted_layer_comp_ratio_list = cp._sort_on_occurrence( layer_db.model, layer_comp_ratio_list) self.assertEqual(sorted_layer_comp_ratio_list[0].layer.module, block1_conv2) self.assertEqual(sorted_layer_comp_ratio_list[1].layer.module, block2_conv1) self.assertEqual(sorted_layer_comp_ratio_list[2].layer.module, block2_conv2) self.assertEqual(sorted_layer_comp_ratio_list[3].layer.module, block5_conv3) self.assertEqual(len(sorted_layer_comp_ratio_list), 4) layer_db.model.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def test_select_per_layer_comp_ratios_with_spatial_svd_pruner(self): pruner = SpatialSvdPruner() eval_func = unittest.mock.MagicMock() rounding_algo = unittest.mock.MagicMock() eval_func.side_effect = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 11, 21, 31, 35, 40, 45, 50, 55, 60 ] rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph _ = mnist_tf_model.create_model(data_format='channels_last') sess.run(tf.compat.v1.global_variables_initializer()) # Create a layer database layer_db = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=None) selected_layers = [ layer for layer in layer_db if layer.module.type == 'Conv2D' ] layer_db.mark_picked_layers(selected_layers) url, process = start_bokeh_server_session(8006) bokeh_session = BokehServerSession(url=url, session_id="compression") # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db=layer_db, pruner=pruner, cost_calculator=SpatialSvdCostCalculator(), eval_func=eval_func, eval_iterations=20, cost_metric=CostMetric.mac, target_comp_ratio=Decimal(0.4), num_candidates=10, use_monotonic_fit=True, saved_eval_scores_dict=None, comp_ratio_rounding_algo=rounding_algo, use_cuda=False, bokeh_session=bokeh_session) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.3), actual_compression_ratio, abs_tol=0.8)) print('\n') for pair in layer_comp_ratio_list: print(pair) tf.compat.v1.reset_default_graph() sess.close() bokeh_session.server_session.close("test complete") os.killpg(os.getpgid(process.pid), signal.SIGTERM)
def test_select_per_layer_comp_ratios(self): pruner = unittest.mock.MagicMock() eval_func = unittest.mock.MagicMock() rounding_algo = unittest.mock.MagicMock() rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] eval_func.side_effect = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 11, 21, 31, 35, 40, 45, 50, 55, 60 ] model = mnist_torch_model.Net() layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) layer1 = layer_db.find_layer_by_name('conv1') layer2 = layer_db.find_layer_by_name('conv2') selected_layers = [layer1, layer2] layer_db.mark_picked_layers([layer1, layer2]) try: os.remove('./data/greedy_selection_eval_scores_dict.pkl') except OSError: pass # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.6), 10, True, None, rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.6), actual_compression_ratio, abs_tol=0.05)) self.assertTrue( os.path.isfile('./data/greedy_selection_eval_scores_dict.pkl')) print('\n') for pair in layer_comp_ratio_list: print(pair) # lets repeat with a saved eval_dict greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.6), 10, True, './data/greedy_selection_eval_scores_dict.pkl', rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.6), actual_compression_ratio, abs_tol=0.05)) print('\n') for pair in layer_comp_ratio_list: print(pair)