def weights_sparsity_summary(model,
                             return_total_sparsity=False,
                             param_dims=[2, 4]):

    df = pd.DataFrame(columns=[
        'Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', 'Cols (%)', 'Rows (%)',
        'Ch (%)', '2D (%)', '3D (%)', 'Fine (%)', 'Std', 'Mean', 'Abs-Mean'
    ])
    pd.set_option('precision', 2)
    params_size = 0
    sparse_params_size = 0
    summary_param_types = ['weight', 'bias']
    for name, param in model.state_dict().items():
        # Extract just the actual parameter's name, which in this context we treat as its "type"
        curr_param_type = name.split('.')[-1]
        if param.dim(
        ) in param_dims and curr_param_type in summary_param_types:
            _density = distiller.density(param)
            params_size += torch.numel(param)
            sparse_params_size += param.numel() * _density
            df.loc[len(df.index)] = ([
                name,
                distiller.size_to_str(param.size()),
                torch.numel(param),
                int(_density * param.numel()),
                distiller.sparsity_cols(param) * 100,
                distiller.sparsity_rows(param) * 100,
                distiller.sparsity_ch(param) * 100,
                distiller.sparsity_2D(param) * 100,
                distiller.sparsity_3D(param) * 100, (1 - _density) * 100,
                param.std().item(),
                param.mean().item(),
                param.abs().mean().item()
            ])

    total_sparsity = (1 - sparse_params_size / params_size) * 100

    df.loc[len(df.index)] = ([
        'Total sparsity:', '-', params_size,
        int(sparse_params_size), 0, 0, 0, 0, 0, total_sparsity, 0, 0, 0
    ])

    if return_total_sparsity:
        return df, total_sparsity
    return df
def test_sparsity():
    zeros = torch.zeros(2, 3, 5, 6)
    print(distiller.sparsity(zeros))
    assert distiller.sparsity(zeros) == 1.0
    assert distiller.sparsity_3D(zeros) == 1.0
    assert distiller.density_3D(zeros) == 0.0
    ones = torch.ones(12, 43, 4, 6)
    assert distiller.sparsity(ones) == 0.0
    x = torch.tensor([[1., 2., 0, 4., 0], [1., 2., 0, 4., 0]])
    assert distiller.density(x) == 0.6
    assert distiller.density_cols(x, transposed=False) == 0.6
    assert distiller.sparsity_rows(x, transposed=False) == 0
    x = torch.tensor([[0., 0., 0], [1., 4., 0], [1., 2., 0], [0., 0., 0]])
    assert distiller.density(x) == 4 / 12
    assert distiller.sparsity_rows(x, transposed=False) == 0.5
    assert common.almost_equal(distiller.sparsity_cols(x, transposed=False),
                               1 / 3)
    assert common.almost_equal(distiller.sparsity_rows(x), 1 / 3)
def weights_sparsity_summary(model, return_total_sparsity=False, param_dims=[2,4]):

    df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)',
                               'Cols (%)','Rows (%)', 'Ch (%)', '2D (%)', '3D (%)',
                               'Fine (%)', 'Std', 'Mean', 'Abs-Mean'])
    pd.set_option('precision', 2)
    params_size = 0
    sparse_params_size = 0
    for name, param in model.state_dict().items():
        if (param.dim() in param_dims) and any(type in name for type in ['weight', 'bias']):
            _density = distiller.density(param)
            params_size += torch.numel(param)
            sparse_params_size += param.numel() * _density
            df.loc[len(df.index)] = ([
                name,
                distiller.size_to_str(param.size()),
                torch.numel(param),
                int(_density * param.numel()),
                distiller.sparsity_cols(param)*100,
                distiller.sparsity_rows(param)*100,
                distiller.sparsity_ch(param)*100,
                distiller.sparsity_2D(param)*100,
                distiller.sparsity_3D(param)*100,
                (1-_density)*100,
                param.std().item(),
                param.mean().item(),
                param.abs().mean().item()
            ])

    total_sparsity = (1 - sparse_params_size/params_size)*100

    df.loc[len(df.index)] = ([
        'Total sparsity:',
        '-',
        params_size,
        int(sparse_params_size),
        0, 0, 0, 0, 0,
        total_sparsity,
        0, 0, 0])

    if return_total_sparsity:
        return df, total_sparsity
    return df
Exemple #4
0
    def rank_and_prune_channels(fraction_to_prune, param, param_name=None,
                                zeros_mask_dict=None, model=None, binary_map=None, 
                                magnitude_fn=distiller.norms.l1_norm, group_size=1, rounding_fn=math.floor,
                                noise=0):
        assert binary_map is None
        if binary_map is None:
            bottomk_channels, channel_mags = distiller.norms.rank_channels(param, group_size, magnitude_fn,
                                                                           fraction_to_prune, rounding_fn, noise)

            # Todo: this little piece of code can be refactored
            if bottomk_channels is None:
                # Empty list means that fraction_to_prune is too low to prune anything
                return

            threshold = bottomk_channels[-1]
            binary_map = channel_mags.gt(threshold)

            # These are the indices of channels we want to keep
            indices = binary_map.nonzero().squeeze()
            if len(indices.shape) == 0:
                indices = indices.expand(1)

            # Find the module representing this layer
            distiller.assign_layer_fq_names(model)
            layer_name = _param_name_2_layer_name(param_name)
            conv = distiller.find_module_by_fq_name(model, layer_name)
            try:
                Y = model.intermediate_fms['output_fms'][layer_name]
                X = model.intermediate_fms['input_fms'][layer_name]
            except AttributeError:
                raise ValueError("To use FMReconstructionChannelPruner you must first collect input statistics")

            # We need to remove the chosen weights channels.  Because we are using 
            # min(MSE) to compute the weights, we need to start by removing feature-map 
            # channels from the input.  Then we perform the MSE regression to generate
            # a smaller weights tensor.
            if op_type == 'fc':
                X = X[:, binary_map]
            elif conv.kernel_size == (1, 1):
                X = X[:, binary_map, :]
                X = X.transpose(1, 2)
                X = X.contiguous().view(-1, X.size(2))
            else:
                # X is (batch, ck^2, num_pts)
                # we want:   (batch, c, k^2, num_pts)
                X = X.view(X.size(0), -1, np.prod(conv.kernel_size), X.size(2))
                X = X[:, binary_map, :, :]
                X = X.view(X.size(0), -1, X.size(3))
                X = X.transpose(1, 2)
                X = X.contiguous().view(-1, X.size(2))

            # Approximate the weights given input-FMs and output-FMs
            new_w = _least_square_sklearn(X, Y)
            new_w = torch.from_numpy(new_w) # shape: (num_filters, num_non_masked_channels * k^2)
            cnt_retained_channels = binary_map.sum()

            if op_type == 'conv':
                # Expand the weights back to their original size,
                new_w = new_w.contiguous().view(param.size(0), cnt_retained_channels, param.size(2), param.size(3))

                # Copy the weights that we learned from minimizing the feature-maps least squares error,
                # to our actual weights tensor.
                param.detach()[:, indices, :,   :] = new_w.type(param.type())
            else:
                param.detach()[:, indices] = new_w.type(param.type())

        if zeros_mask_dict is not None:
            binary_map = binary_map.type(param.type())
            if op_type == 'conv':
                zeros_mask_dict[param_name].mask, _ = distiller.thresholding.expand_binary_map(param,
                                                                                               'Channels', binary_map)
                msglogger.info("FMReconstructionChannelPruner - param: %s pruned=%.3f goal=%.3f (%d/%d)",
                               param_name,
                               distiller.sparsity_ch(zeros_mask_dict[param_name].mask),
                               fraction_to_prune, binary_map.sum().item(), param.size(1))
            else:
                msglogger.error("fc sparsity = %.2f" % (1 - binary_map.sum().item() / binary_map.size(0)))
                zeros_mask_dict[param_name].mask = binary_map.expand(param.size(0), param.size(1))
                msglogger.info("FMReconstructionChannelPruner - param: %s pruned=%.3f goal=%.3f (%d/%d)",
                               param_name,
                               distiller.sparsity_cols(zeros_mask_dict[param_name].mask),
                               fraction_to_prune, binary_map.sum().item(), param.size(1))
        return binary_map
def weights_sparsity_summary(model,
                             return_total_sparsity=False,
                             param_dims=[2, 4]):
    df = pd.DataFrame(columns=[
        "Name",
        "Shape",
        "NNZ (dense)",
        "NNZ (sparse)",
        "Cols (%)",
        "Rows (%)",
        "Ch (%)",
        "2D (%)",
        "3D (%)",
        "Fine (%)",
        "Std",
        "Mean",
        "Abs-Mean",
    ])
    pd.set_option("precision", 2)
    params_size = 0
    sparse_params_size = 0
    for name, param in model.state_dict().items():
        # Extract just the actual parameter's name, which in this context we treat as its "type"
        if param.dim() in param_dims and any(type in name
                                             for type in ["weight", "bias"]):
            _density = distiller.density(param)
            params_size += torch.numel(param)
            sparse_params_size += param.numel() * _density
            df.loc[len(df.index)] = [
                name,
                distiller.size_to_str(param.size()),
                torch.numel(param),
                int(_density * param.numel()),
                distiller.sparsity_cols(param) * 100,
                distiller.sparsity_rows(param) * 100,
                distiller.sparsity_ch(param) * 100,
                distiller.sparsity_2D(param) * 100,
                distiller.sparsity_3D(param) * 100,
                (1 - _density) * 100,
                param.std().item(),
                param.mean().item(),
                param.abs().mean().item(),
            ]

    total_sparsity = (1 - sparse_params_size / params_size) * 100

    df.loc[len(df.index)] = [
        "Total sparsity:",
        "-",
        params_size,
        int(sparse_params_size),
        0,
        0,
        0,
        0,
        0,
        total_sparsity,
        0,
        0,
        0,
    ]

    if return_total_sparsity:
        return df, total_sparsity
    return df