def test_choose_best_ranks(self): model = MnistModel().to("cpu") layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) run_model_return_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] run_model = unittest.mock.Mock(side_effect=run_model_return_values) with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'): with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=run_model, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) svd._network_cost = (500, 500) svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True) with unittest.mock.patch('aimet_torch.svd.model_stats_calculator.ModelStats.compute_compression_ratio') as compute_compression_ratio: with unittest.mock.patch('aimet_torch.svd.svd_pruner_deprecated.ModelPruner.create_compressed_model') as create_compressed_model: with unittest.mock.patch('aimet_torch.svd.rank_selector.RankSelector._select_candidate_ranks') as select_candidate_ranks: select_candidate_ranks.return_value = 20 compute_compression_ratio.side_effect = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] create_compressed_model.return_value = None, None, None rank_selector = rank_select.RankSelector(svd_lib_ref=svd._svd_lib_ref) rank_selector.choose_best_rank(model=model, run_model=run_model, run_model_iterations=1, use_cuda=False, metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, error_margin=1, baseline_perf=0.5, num_rank_indices=20, database=layer_database)
def test_split_manual_rank(self): model = MnistModel().to("cpu") run_model = mnist_model.evaluate logger.debug(self.id()) intf_defs = aimet_torch.svd.svd_intf_defs_deprecated layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'): with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=intf_defs.CompressionTechnique.svd, cost_metric=intf_defs.CostMetric.memory, layer_selection_scheme=intf_defs.LayerSelectionScheme.manual, layers_to_compress=[model.fc1]) layer_rank_list = [[model.fc1, 9]] with unittest.mock.patch('aimet_common.cost_calculator.CostCalculator.compute_network_cost') as compute_network_cost: compute_network_cost.return_value = cc.Cost(100, 200) svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True) split_weights = [np.zeros((400, model.fc1.in_features)).flatten().tolist(), np.zeros((model.fc1.out_features, 400)).flatten().tolist()] svd._svd_lib_ref.SplitLayerWeights.return_value = split_weights split_biases = [np.zeros(400).flatten().tolist(), np.zeros(model.fc1.out_features).flatten().tolist()] svd._svd_lib_ref.SplitLayerBiases.return_value = split_biases rank_selector = rank_select.RankSelector(svd_lib_ref=svd._svd_lib_ref) rank_data_list, svd_rank_pair_dict = rank_selector.split_manual_rank(model=model, run_model=run_model, run_model_iterations=1, use_cuda=False, metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, database=layer_database, layer_rank_list=layer_rank_list) self.assertEqual(len(svd_rank_pair_dict), 1)
def test_split_fc_layer_without_mo(self): AimetLogger.set_level_for_all_areas(logging.DEBUG) logger.debug(self.id()) model = MnistModel().to("cpu") with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'): with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) layer_attr = Layer(model.fc1, id(model.fc1), [3136, 1024, 1, 1]) svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True) split_weights = [np.zeros((400, model.fc1.in_features)).flatten().tolist(), np.zeros((model.fc1.out_features, 400)).flatten().tolist()] svd._svd_lib_ref.SplitLayerWeights.return_value = split_weights split_biases = [np.zeros(400).flatten().tolist(), np.zeros(model.fc1.out_features).flatten().tolist()] svd._svd_lib_ref.SplitLayerBiases.return_value = split_biases split_layer = svd_pruner_deprecated.DeprecatedSvdPruner seq, layer_a_attr, layer_b_attr = split_layer.prune_layer(layer_attr, 400, svd_lib_ref=svd._svd_lib_ref) self.assertEqual((400, model.fc1.in_features), seq[0].weight.shape) self.assertEqual([400], list(seq[0].bias.shape)) self.assertEqual((model.fc1.out_features, 400), seq[1].weight.shape) self.assertEqual([model.fc1.out_features], list(seq[1].bias.shape)) self.assertEqual(layer_a_attr.module, seq[0]) self.assertEqual(layer_b_attr.module, seq[1])
def test_model_allocation_gpu(self): model = MnistModel().to("cuda") svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) self.assertTrue(svd._is_model_on_gpu()) # copy one layer to CPU model.conv1.to("cpu") self.assertFalse(svd._is_model_on_gpu()) model = MnistModel().to("cpu") svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) self.assertFalse(svd._is_model_on_gpu()) # copy entire model on GPU model.cuda() self.assertTrue(svd._is_model_on_gpu())
def test_create_compressed_model(self): AimetLogger.set_level_for_all_areas(logging.DEBUG) logger.debug(self.id()) model = MnistModel().to("cpu") with unittest.mock.patch('aimet_torch.svd.layer_database.LayerDatabase'): with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) ls.LayerSelectorDeprecated._pick_compression_layers = create_autospec(ls.LayerSelectorDeprecated._pick_compression_layers) layer_attr1 = Layer(model.fc2, id(model.fc2), model.fc2.weight.shape) layer_attr1.parent_module = model layer_attr1.var_name_of_module_in_parent = "fc2" layer_attr1.output_shape = [0, 0, 1, 1] layer_attr1.name = 'fc2' layer_attr2 = Layer(model.conv2, id(model.conv2), model.conv2.weight.shape) layer_attr2.parent_module = model layer_attr2.var_name_of_module_in_parent = "conv2" layer_attr2.name = 'conv2' layer_attr1.output_shape = [0, 0, 14, 14] ls.LayerSelectorDeprecated._pick_compression_layers.return_value = [layer_attr1, layer_attr2] svd._compressible_layers = {id(model.conv2): layer_attr2, id(model.fc2): layer_attr1} ls.LayerSelectorDeprecated._perform_layer_selection(model) svd._select_candidate_ranks(20) svd_rank_pair_dict = {'conv2': (31,0), 'fc2': (9,0)} c_model, c_layer_attr, _ = svd._create_compressed_model(svd_rank_pair_dict) self.assertTrue(c_model is not model) self.assertTrue(c_model.conv1 is not model.conv1) self.assertTrue(c_model.conv2 is not model.conv2) self.assertFalse(isinstance(svd._model, nn.Sequential)) self.assertEqual((9, 1024), c_model.fc2[0].weight.shape) self.assertEqual([9], list(c_model.fc2[0].bias.shape)) self.assertEqual((10, 9), c_model.fc2[1].weight.shape) self.assertEqual([10], list(c_model.fc2[1].bias.shape)) self.assertEqual((31, 32, 1, 1), c_model.conv2[0].weight.shape) self.assertEqual([31], list(c_model.conv2[0].bias.shape)) self.assertEqual((64, 31, 5, 5), c_model.conv2[1].weight.shape) self.assertEqual([64], list(c_model.conv2[1].bias.shape)) self.assertEqual(svd._model.conv1.weight.shape, c_model.conv1.weight.shape) self.assertEqual(svd._model.fc1.weight.shape, c_model.fc1.weight.shape) # Expect double the number of layers in layer_attr_list self.assertEqual(4, len(c_layer_attr))
def test_svd_sequential_with_mo(self): logger.debug(self.id()) model = MnistSequentialModel().to("cpu") svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_x_percent, percent_thresh=60) c_model, svd_stats = svd.compress_net(rank_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.RankSelectionScheme.auto, num_rank_indices=20, error_margin=10) # Log the SVD Statistics # Do not pass in a logger. # In this case the default root logger will be used. svd_stats.pretty_print(logger=None)
def test_split_conv_layer_with_mo(self): logger.debug(self.id()) model = mnist_model.Net().to("cpu") layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) conv2 = layer_database.find_layer_by_module(model.conv2) pymo_utils.PymoSvdUtils.configure_layers_in_pymo_svd([conv2], aimet_common.defs.CostMetric.mac, svd._svd_lib_ref) split_layer = svd_pruner_deprecated.DeprecatedSvdPruner seq, conv_a, conv_b = split_layer.prune_layer(conv2, 28, svd._svd_lib_ref) print('\n') weight_arr = conv_a.module.weight.detach().numpy().flatten() weight_arr = weight_arr[0:10] print(weight_arr) self.assertEqual((28, model.conv2.in_channels, 1, 1), conv_a.module.weight.shape) self.assertEqual([28], list(conv_a.module.bias.shape)) self.assertEqual((model.conv2.out_channels, 28, 5, 5), conv_b.module.weight.shape) self.assertEqual([model.conv2.out_channels], list(conv_b.module.bias.shape)) self.assertEqual(model.conv2.stride, conv_a.module.stride) self.assertEqual(model.conv2.stride, conv_b.module.stride) self.assertEqual((0, 0), conv_a.module.padding) self.assertEqual(model.conv2.padding, conv_b.module.padding) self.assertEqual((1, 1), conv_a.module.kernel_size) self.assertEqual(model.conv2.kernel_size, conv_b.module.kernel_size)
def compress_model(model, run_model, run_model_iterations, input_shape, compression_type, cost_metric, layer_selection_scheme, rank_selection_scheme, **kw_layer_rank_params): """ Runs rank selection on the model, and compresses it using the method and parameters provided :param model: The model which needs to be compressed :param run_model: The evaluation function that needs to be passed for one forward pass :param run_model_iterations: The number of iterations of forward pass for the run_model :param input_shape: Shape of the input to the model :param compression_type: Enum argument. Options available: svd , ssvd. :param cost_metric: Enum argument. Options available: mac, memory :param layer_selection_scheme: Enum argument. Options available: manual, top_n_layers, top_x_percent :param rank_selection_scheme: Enum argument. Options available: manual, auto :param kw_layer_rank_params: Params for layer and rank selection. Params depend on modes selected :return: compressed model and Model statistics **Note regarding kw_layer_rank_params**: - If the layer_selection_scheme is manual then user has to specify the list of layers by using- layers_to_compress= [list of layers], - If the layer_selection_scheme is top_n_layers then the user has to specify the number of layers as num_layers= <number> - If the layer_selection_scheme is top_x_percent then the user has to specify percentage threshold by using percent_thresh= <number> - If the mode is manual then user has to specify the layers and the respective ranks by specifying a list as layer_rank = [[layer, rank]] - If the mode is auto then user has to specify maximum rank till the optimum rank search has to happen as max_ranks_error_margin= [maximum rank, error margin] """ Svd._validate_layer_rank_params(model, layer_selection_scheme, rank_selection_scheme, **kw_layer_rank_params) # Sanity check for run_model_iterations if run_model_iterations <= 0: raise ValueError( "run_model_iterations: {} unexpected value. " "Expect at least 1 iteration".format(run_model_iterations)) # Instantiate the SVD impl class if rank_selection_scheme == rank_selection_scheme.auto: svd_obj = svd_impl.SvdImpl(model, run_model, run_model_iterations, input_shape, compression_type, cost_metric, layer_selection_scheme, **kw_layer_rank_params) compressed_model, stats = svd_obj.compress_net( rank_selection_scheme=rank_selection_scheme, **kw_layer_rank_params) elif rank_selection_scheme == rank_selection_scheme.manual: layers_to_compress = [ layer for layer, _ in kw_layer_rank_params['layer_rank_list'] ] svd_obj = svd_impl.SvdImpl(model, run_model, run_model_iterations, input_shape, compression_type, cost_metric, LayerSelectionScheme.manual, layers_to_compress=layers_to_compress) compressed_model, stats = svd_obj.compress_net( rank_selection_scheme=rank_selection_scheme, **kw_layer_rank_params) return compressed_model, stats