Example #1
0
def test_get_node_params(prunable_onnx_model):
    with pytest.raises(ValueError):
        get_node_params(prunable_onnx_model,
                        prunable_onnx_model.graph.node[-1])
    for node, expected_params in zip(
            prunable_onnx_model.graph.node[:-1],
        [
            (NodeParam("node1.weight", None), NodeParam("node1.bias", None)),
            (NodeParam("node2.weight", None), None),
            (NodeParam("node3.weight", None), None),
        ],
    ):
        assert (get_node_params(prunable_onnx_model,
                                node,
                                include_values=False) == expected_params)
Example #2
0
def pruning_loss_sens_magnitude_iter(
    model: Union[str, ModelProto],
    sparsity_levels: Union[List[float],
                           Tuple[float,
                                 ...]] = default_pruning_sparsities_loss(True),
) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress],
               None, None]:
    """
    Approximated kernel sparsity (pruning) loss analysis for a given model.
    Iteratively builds a KSLossSensitivityAnalysis object and yields an updated
    version after each layer is run. The final result is the complete
    analysis object.

    :param model: the loaded model or a file path to the onnx model
        to calculate the sparse sensitivity analysis for
    :param sparsity_levels: the sparsity levels to calculate the loss for for each param
    :return: the analysis results for the model with an additional layer at each
        iteration along with a float representing the iteration progress
    """
    model = check_load_model(model)
    prunable = get_prunable_nodes(model)
    analysis = PruningLossSensitivityAnalysis()
    num_layers = len(prunable)

    for index, node in enumerate(prunable):
        node_id = extract_node_id(node)

        yield analysis, KSSensitivityProgress(index, node_id, num_layers,
                                              float(index) / float(num_layers))

        weight, bias = get_node_params(model, node)
        values = numpy.sort(numpy.abs(weight.val.flatten()))
        prev_index = 0

        for sparsity in sparsity_levels:
            val_index = round(sparsity * values.size)

            if val_index >= len(values):
                val_index = len(values) - 1

            if sparsity <= 1e-9:
                baseline = True
                sparsity = 0.0
                sparse_avg = 0.0
            else:
                baseline = False

                if val_index > prev_index:
                    sparse_avg = values[prev_index:val_index].mean().item()
                    prev_index = val_index
                else:
                    sparse_avg = values[val_index].item()
                    prev_index = val_index + 1

            analysis.add_result(node_id, weight.name, index, sparsity,
                                sparse_avg, baseline)

    yield analysis, KSSensitivityProgress(num_layers, None, num_layers, 1.0)
Example #3
0
def test_prune_model_one_shot(
    onnx_repo_models: OnnxRepoModelFixture, sparsity: List[float]  # noqa: F811
):
    model_path = onnx_repo_models.model_path
    model = load_model(model_path)
    nodes = [
        node
        for node in model.graph.node
        if node.op_type == "Conv" or node.op_type == "Gemm"
    ]
    prune_model_one_shot(model, nodes, sparsity)

    for node in nodes:
        weight, _ = get_node_params(model, node)
        _test_correct_sparsity(weight.val, sparsity, 5.5e-3)
Example #4
0
def test_prune_model_one_shot_sparsity_list(
    onnx_repo_models: OnnxRepoModelFixture,  # noqa: F811
):
    model_path = onnx_repo_models.model_path
    model = load_model(model_path)
    nodes = [
        node
        for node in model.graph.node
        if node.op_type == "Conv" or node.op_type == "Gemm"
    ]
    sparsities = numpy.random.random_sample([len(nodes)])

    prune_model_one_shot(model, nodes, sparsities)

    for node, sparsity in zip(nodes, sparsities):
        weight, _ = get_node_params(model, node)
        weight_val = weight.val
        _test_correct_sparsity(weight_val, sparsity, 5.5e-3)
Example #5
0
def pruning_loss_sens_one_shot_iter(
    model: Union[str, ModelProto],
    data: DataLoader,
    batch_size: int,
    steps_per_measurement: int,
    sparsity_levels: List[float] = default_pruning_sparsities_loss(False),
    use_deepsparse_inference: bool = False,
) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress],
               None, None]:
    """
    Run a one shot sensitivity analysis for kernel sparsity.
    It does not retrain.
    Moves layer by layer to calculate the sensitivity analysis for each and
    resets the previously run layers.
    Updates and yeilds the KSLossSensitivityAnalysis at each layer.
    The loss is calculated by taking the kl_divergence of
    pruned values from the baseline.

    :param model: the loaded model or a file path to the onnx model
        to calculate the sparse sensitivity analysis for
    :param data: the data to run through the model
    :param batch_size: the batch size the data is created for
    :param steps_per_measurement: number of steps (batches) to run through
        the model for each sparsity level on each node
    :param sparsity_levels: the sparsity levels to calculate the loss for for each param
    :param use_deepsparse_inference: True to use the DeepSparse inference engine
        to run the analysis, False to use onnxruntime
    :return: the sensitivity results for every node that is prunable,
        yields update at each layer along with iteration progress
    """
    model = check_load_model(model)
    prunable_nodes = get_prunable_nodes(model)
    analysis = PruningLossSensitivityAnalysis()
    num_updates = len(prunable_nodes) * len(sparsity_levels) + 1
    update_num = 0

    yield analysis, KSSensitivityProgress(update_num, None, num_updates, 0.0)

    runner = (ORTModelRunner(model) if not use_deepsparse_inference else
              DeepSparseModelRunner(model, batch_size))
    _LOGGER.debug("created runner for one shot analysis {}".format(runner))
    base_outputs, _ = runner.run(
        data,
        desc="",
        show_progress=False,
        max_steps=steps_per_measurement,
    )
    _LOGGER.debug("recorded base outputs")
    del runner

    for index, node in enumerate(prunable_nodes):
        node_id = extract_node_id(node)
        weight, bias = get_node_params(model, node)
        _LOGGER.debug("running one shot for node {}".format(node_id))

        for sparsity in sparsity_levels:
            update_num += 1
            yield analysis, KSSensitivityProgress(
                update_num,
                {
                    "node_id": node_id,
                    "sparsity": sparsity
                },
                num_updates,
                float(update_num) / float(num_updates),
            )

            prune_model_one_shot(model, [node], sparsity)
            _LOGGER.debug(
                "created one shot pruned model for sparsity {}".format(
                    sparsity))
            runner = (ORTModelRunner(model) if not use_deepsparse_inference
                      else DeepSparseModelRunner(model, batch_size))
            _LOGGER.debug(
                "created runner for one shot analysis {}".format(runner))
            pruned_outputs, _ = runner.run(
                data,
                desc="",
                show_progress=False,
                max_steps=steps_per_measurement,
            )
            del runner
            _LOGGER.debug("recorded outputs")

            for base, pruned in zip(base_outputs, pruned_outputs):
                batch_losses = []

                for key, base_array in base.items():
                    pruned_array = pruned[key]
                    loss = kl_divergence(
                        pruned_array,
                        base_array,
                        min(base_array.min(), pruned_array.min()),
                    )
                    batch_losses.append(loss)

                analysis.add_result(
                    node_id,
                    weight.name,
                    index,
                    sparsity,
                    sum(batch_losses),
                    baseline=sparsity < 1e-9,
                )
        # reset node to its baseline density
        update_model_param(model, weight.name, weight.val)

    yield analysis, KSSensitivityProgress(num_updates, None, num_updates, 1.0)
Example #6
0
    def __init__(
        self,
        model: Union[ModelProto, None],
        node: Union[Any, None],
        node_shape: Union[NodeShape, None] = None,
        **kwargs,
    ):
        if model is None and node is None:
            self._id = kwargs["id"]
            self._op_type = kwargs["op_type"]
            self._input_names = kwargs["input_names"]
            self._output_names = kwargs["output_names"]
            self._input_shapes = kwargs["input_shapes"]
            self._output_shapes = kwargs["output_shapes"]
            self._params = kwargs["params"]
            self._prunable = kwargs["prunable"]
            self._prunable_params_zeroed = kwargs["prunable_params_zeroed"]
            self._weight_name = kwargs["weight_name"]
            self._weight_shape = kwargs["weight_shape"]
            self._bias_name = kwargs["bias_name"]
            self._bias_shape = kwargs["bias_shape"]
            self._attributes = kwargs["attributes"]
            self._flops = kwargs["flops"]
            self._prunable_equation_sensitivity = (
                kwargs["prunable_equation_sensitivity"]
                if "prunable_equation_sensitivity" in kwargs
                else None
            )

            return

        if model is None or node is None:
            raise ValueError("both model and node must not be None")

        self._id = extract_node_id(node)
        self._op_type = node.op_type
        self._input_names = get_node_inputs(model, node)
        self._output_names = get_node_outputs(model, node)

        if node_shape is None:
            self._input_shapes = None
            self._output_shapes = None
        else:
            self._input_shapes = node_shape.input_shapes
            self._output_shapes = node_shape.output_shapes

        self._params = 0
        self._prunable = is_prunable_node(model, node)
        self._prunable_params = 0
        self._prunable_params_zeroed = 0
        self._weight_name = None
        self._weight_shape = None
        self._bias_name = None
        self._bias_shape = None
        self._attributes = get_node_attributes(node)

        if self._prunable:
            weight, bias = get_node_params(model, node)
            self._params += weight.val.size
            self._prunable_params += weight.val.size
            self._prunable_params_zeroed += weight.val.size - numpy.count_nonzero(
                weight.val
            )
            self._weight_name = weight.name
            self._weight_shape = [s for s in weight.val.shape]

            if bias is not None:
                self._bias_name = bias.name
                self._params += bias.val.size
                self._bias_shape = [s for s in bias.val.shape]

        kernel_shape = get_kernel_shape(self._attributes)
        self._flops = calculate_flops(
            self._op_type,
            input_shape=self._input_shapes,
            output_shape=self._output_shapes,
            weight_shape=self._weight_shape,
            kernel_shape=kernel_shape,
            bias_shape=self._bias_shape,
            attributes=self._attributes,
        )

        self._prunable_equation_sensitivity = (
            pruning_loss_sens_approx(
                self._input_shapes,
                self._output_shapes,
                self._params,
                apply_shape_change_mult=True,
            )
            if self._prunable
            else None
        )