def test_choose_best_ranks(self):

        model = MnistModel().to("cpu")
        layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28))
        run_model_return_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
        run_model = unittest.mock.Mock(side_effect=run_model_return_values)

        with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'):
            with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'):
                svd = s.SvdImpl(model=model, run_model=run_model, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                                compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                                layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)

        svd._network_cost = (500, 500)

        svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True)
        with unittest.mock.patch('aimet_torch.svd.model_stats_calculator.ModelStats.compute_compression_ratio') as compute_compression_ratio:
            with unittest.mock.patch('aimet_torch.svd.svd_pruner_deprecated.ModelPruner.create_compressed_model') as create_compressed_model:
                with unittest.mock.patch('aimet_torch.svd.rank_selector.RankSelector._select_candidate_ranks') as select_candidate_ranks:
                    select_candidate_ranks.return_value = 20
                    compute_compression_ratio.side_effect = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
                    create_compressed_model.return_value = None, None, None
                    rank_selector = rank_select.RankSelector(svd_lib_ref=svd._svd_lib_ref)
                    rank_selector.choose_best_rank(model=model, run_model=run_model, run_model_iterations=1,
                                                   use_cuda=False, metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, error_margin=1,
                                                   baseline_perf=0.5, num_rank_indices=20, database=layer_database)
    def test_split_manual_rank(self):
        model = MnistModel().to("cpu")
        run_model = mnist_model.evaluate
        logger.debug(self.id())

        intf_defs = aimet_torch.svd.svd_intf_defs_deprecated

        layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28))
        with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'):
            with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'):
                svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                                compression_type=intf_defs.CompressionTechnique.svd,
                                cost_metric=intf_defs.CostMetric.memory,
                                layer_selection_scheme=intf_defs.LayerSelectionScheme.manual,
                                layers_to_compress=[model.fc1])
        layer_rank_list = [[model.fc1, 9]]
        with unittest.mock.patch('aimet_common.cost_calculator.CostCalculator.compute_network_cost') as compute_network_cost:
            compute_network_cost.return_value = cc.Cost(100, 200)
            svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True)
            split_weights = [np.zeros((400, model.fc1.in_features)).flatten().tolist(),
                             np.zeros((model.fc1.out_features, 400)).flatten().tolist()]
            svd._svd_lib_ref.SplitLayerWeights.return_value = split_weights

            split_biases = [np.zeros(400).flatten().tolist(),
                            np.zeros(model.fc1.out_features).flatten().tolist()]
            svd._svd_lib_ref.SplitLayerBiases.return_value = split_biases
            rank_selector = rank_select.RankSelector(svd_lib_ref=svd._svd_lib_ref)
            rank_data_list, svd_rank_pair_dict = rank_selector.split_manual_rank(model=model, run_model=run_model,
                                                                                 run_model_iterations=1, use_cuda=False,
                                                                                 metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                                                                                 database=layer_database,
                                                                                 layer_rank_list=layer_rank_list)
            self.assertEqual(len(svd_rank_pair_dict), 1)
    def test_split_fc_layer_without_mo(self):

        AimetLogger.set_level_for_all_areas(logging.DEBUG)
        logger.debug(self.id())
        model = MnistModel().to("cpu")

        with unittest.mock.patch('aimet_torch.layer_database.LayerDatabase'):
            with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'):
                svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                                compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                                layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)

        layer_attr = Layer(model.fc1, id(model.fc1), [3136, 1024, 1, 1])

        svd._svd_lib_ref = create_autospec(pymo.Svd, instance=True)
        split_weights = [np.zeros((400, model.fc1.in_features)).flatten().tolist(),
                         np.zeros((model.fc1.out_features, 400)).flatten().tolist()]
        svd._svd_lib_ref.SplitLayerWeights.return_value = split_weights

        split_biases = [np.zeros(400).flatten().tolist(),
                        np.zeros(model.fc1.out_features).flatten().tolist()]
        svd._svd_lib_ref.SplitLayerBiases.return_value = split_biases

        split_layer = svd_pruner_deprecated.DeprecatedSvdPruner

        seq, layer_a_attr, layer_b_attr = split_layer.prune_layer(layer_attr, 400, svd_lib_ref=svd._svd_lib_ref)

        self.assertEqual((400, model.fc1.in_features), seq[0].weight.shape)
        self.assertEqual([400], list(seq[0].bias.shape))
        self.assertEqual((model.fc1.out_features, 400), seq[1].weight.shape)
        self.assertEqual([model.fc1.out_features], list(seq[1].bias.shape))

        self.assertEqual(layer_a_attr.module, seq[0])
        self.assertEqual(layer_b_attr.module, seq[1])
    def test_model_allocation_gpu(self):

        model = MnistModel().to("cuda")
        svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                        compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                        layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)
        self.assertTrue(svd._is_model_on_gpu())
        # copy one layer to CPU
        model.conv1.to("cpu")
        self.assertFalse(svd._is_model_on_gpu())

        model = MnistModel().to("cpu")
        svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                        compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                        layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)
        self.assertFalse(svd._is_model_on_gpu())
        # copy entire model on GPU
        model.cuda()
        self.assertTrue(svd._is_model_on_gpu())
    def test_create_compressed_model(self):
        AimetLogger.set_level_for_all_areas(logging.DEBUG)
        logger.debug(self.id())
        model = MnistModel().to("cpu")

        with unittest.mock.patch('aimet_torch.svd.layer_database.LayerDatabase'):
            with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'):
                svd = s.SvdImpl(model=model, run_model=None, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                                compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                                layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)

        ls.LayerSelectorDeprecated._pick_compression_layers = create_autospec(ls.LayerSelectorDeprecated._pick_compression_layers)
        layer_attr1 = Layer(model.fc2, id(model.fc2), model.fc2.weight.shape)
        layer_attr1.parent_module = model
        layer_attr1.var_name_of_module_in_parent = "fc2"
        layer_attr1.output_shape = [0, 0, 1, 1]
        layer_attr1.name = 'fc2'

        layer_attr2 = Layer(model.conv2, id(model.conv2), model.conv2.weight.shape)
        layer_attr2.parent_module = model
        layer_attr2.var_name_of_module_in_parent = "conv2"
        layer_attr2.name = 'conv2'
        layer_attr1.output_shape = [0, 0, 14, 14]

        ls.LayerSelectorDeprecated._pick_compression_layers.return_value = [layer_attr1, layer_attr2]

        svd._compressible_layers = {id(model.conv2): layer_attr2,
                                    id(model.fc2):   layer_attr1}

        ls.LayerSelectorDeprecated._perform_layer_selection(model)

        svd._select_candidate_ranks(20)
        svd_rank_pair_dict = {'conv2': (31,0), 'fc2': (9,0)}
        c_model, c_layer_attr, _ = svd._create_compressed_model(svd_rank_pair_dict)

        self.assertTrue(c_model is not model)
        self.assertTrue(c_model.conv1 is not model.conv1)
        self.assertTrue(c_model.conv2 is not model.conv2)

        self.assertFalse(isinstance(svd._model, nn.Sequential))
        self.assertEqual((9, 1024), c_model.fc2[0].weight.shape)
        self.assertEqual([9], list(c_model.fc2[0].bias.shape))
        self.assertEqual((10, 9), c_model.fc2[1].weight.shape)
        self.assertEqual([10], list(c_model.fc2[1].bias.shape))

        self.assertEqual((31, 32, 1, 1), c_model.conv2[0].weight.shape)
        self.assertEqual([31], list(c_model.conv2[0].bias.shape))
        self.assertEqual((64, 31, 5, 5), c_model.conv2[1].weight.shape)
        self.assertEqual([64], list(c_model.conv2[1].bias.shape))

        self.assertEqual(svd._model.conv1.weight.shape, c_model.conv1.weight.shape)
        self.assertEqual(svd._model.fc1.weight.shape, c_model.fc1.weight.shape)

        # Expect double the number of layers in layer_attr_list
        self.assertEqual(4, len(c_layer_attr))
    def test_svd_sequential_with_mo(self):

        logger.debug(self.id())
        model = MnistSequentialModel().to("cpu")
        svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28),
                        compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                        layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_x_percent, percent_thresh=60)

        c_model, svd_stats = svd.compress_net(rank_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.RankSelectionScheme.auto,
                                              num_rank_indices=20, error_margin=10)

        # Log the  SVD Statistics
        # Do not pass in a logger.
        # In this case the default root logger will be used.
        svd_stats.pretty_print(logger=None)
    def test_split_conv_layer_with_mo(self):

        logger.debug(self.id())
        model = mnist_model.Net().to("cpu")

        layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28))

        with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'):
            svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1,
                            input_shape=(1, 1, 28, 28),
                            compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory,
                            layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2)

        conv2 = layer_database.find_layer_by_module(model.conv2)
        pymo_utils.PymoSvdUtils.configure_layers_in_pymo_svd([conv2], aimet_common.defs.CostMetric.mac, svd._svd_lib_ref)

        split_layer = svd_pruner_deprecated.DeprecatedSvdPruner
        seq, conv_a, conv_b = split_layer.prune_layer(conv2, 28, svd._svd_lib_ref)

        print('\n')
        weight_arr = conv_a.module.weight.detach().numpy().flatten()
        weight_arr = weight_arr[0:10]
        print(weight_arr)

        self.assertEqual((28, model.conv2.in_channels, 1, 1), conv_a.module.weight.shape)
        self.assertEqual([28], list(conv_a.module.bias.shape))
        self.assertEqual((model.conv2.out_channels, 28, 5, 5), conv_b.module.weight.shape)
        self.assertEqual([model.conv2.out_channels], list(conv_b.module.bias.shape))

        self.assertEqual(model.conv2.stride, conv_a.module.stride)
        self.assertEqual(model.conv2.stride, conv_b.module.stride)

        self.assertEqual((0, 0), conv_a.module.padding)
        self.assertEqual(model.conv2.padding, conv_b.module.padding)

        self.assertEqual((1, 1), conv_a.module.kernel_size)
        self.assertEqual(model.conv2.kernel_size, conv_b.module.kernel_size)
Exemple #8
0
    def compress_model(model, run_model, run_model_iterations, input_shape,
                       compression_type, cost_metric, layer_selection_scheme,
                       rank_selection_scheme, **kw_layer_rank_params):
        """
        Runs rank selection on the model, and compresses it using the method and parameters provided

        :param model: The model which needs to be compressed
        :param run_model: The evaluation function that needs to be passed for one forward pass
        :param run_model_iterations: The number of iterations of forward pass for the run_model
        :param input_shape: Shape of the input to the model
        :param compression_type: Enum argument. Options available: svd , ssvd.
        :param cost_metric: Enum argument. Options available: mac, memory
        :param layer_selection_scheme: Enum argument. Options available: manual, top_n_layers, top_x_percent
        :param rank_selection_scheme: Enum argument. Options available: manual, auto
        :param kw_layer_rank_params: Params for layer and rank selection. Params depend on modes selected
        :return: compressed model and Model statistics

        **Note regarding kw_layer_rank_params**:
         - If the layer_selection_scheme is manual then user has to specify the list of layers by using- layers_to_compress= [list of layers],
         - If the layer_selection_scheme is top_n_layers then the user has to specify the number of layers as num_layers= <number>
         - If the layer_selection_scheme is top_x_percent then the user has to specify percentage threshold by using percent_thresh= <number>
         - If the mode is manual then user has to specify the layers and the respective ranks by specifying a list as layer_rank = [[layer, rank]]
         - If the mode is auto then user has to specify maximum rank till the optimum rank search has to happen as max_ranks_error_margin= [maximum rank, error margin]

        """
        Svd._validate_layer_rank_params(model, layer_selection_scheme,
                                        rank_selection_scheme,
                                        **kw_layer_rank_params)

        # Sanity check for run_model_iterations
        if run_model_iterations <= 0:
            raise ValueError(
                "run_model_iterations: {} unexpected value. "
                "Expect at least 1 iteration".format(run_model_iterations))

        # Instantiate the SVD impl class
        if rank_selection_scheme == rank_selection_scheme.auto:
            svd_obj = svd_impl.SvdImpl(model, run_model, run_model_iterations,
                                       input_shape, compression_type,
                                       cost_metric, layer_selection_scheme,
                                       **kw_layer_rank_params)
            compressed_model, stats = svd_obj.compress_net(
                rank_selection_scheme=rank_selection_scheme,
                **kw_layer_rank_params)

        elif rank_selection_scheme == rank_selection_scheme.manual:
            layers_to_compress = [
                layer for layer, _ in kw_layer_rank_params['layer_rank_list']
            ]
            svd_obj = svd_impl.SvdImpl(model,
                                       run_model,
                                       run_model_iterations,
                                       input_shape,
                                       compression_type,
                                       cost_metric,
                                       LayerSelectionScheme.manual,
                                       layers_to_compress=layers_to_compress)
            compressed_model, stats = svd_obj.compress_net(
                rank_selection_scheme=rank_selection_scheme,
                **kw_layer_rank_params)
        return compressed_model, stats