def test_get_node_params(prunable_onnx_model): with pytest.raises(ValueError): get_node_params(prunable_onnx_model, prunable_onnx_model.graph.node[-1]) for node, expected_params in zip( prunable_onnx_model.graph.node[:-1], [ (NodeParam("node1.weight", None), NodeParam("node1.bias", None)), (NodeParam("node2.weight", None), None), (NodeParam("node3.weight", None), None), ], ): assert (get_node_params(prunable_onnx_model, node, include_values=False) == expected_params)
def pruning_loss_sens_magnitude_iter( model: Union[str, ModelProto], sparsity_levels: Union[List[float], Tuple[float, ...]] = default_pruning_sparsities_loss(True), ) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress], None, None]: """ Approximated kernel sparsity (pruning) loss analysis for a given model. Iteratively builds a KSLossSensitivityAnalysis object and yields an updated version after each layer is run. The final result is the complete analysis object. :param model: the loaded model or a file path to the onnx model to calculate the sparse sensitivity analysis for :param sparsity_levels: the sparsity levels to calculate the loss for for each param :return: the analysis results for the model with an additional layer at each iteration along with a float representing the iteration progress """ model = check_load_model(model) prunable = get_prunable_nodes(model) analysis = PruningLossSensitivityAnalysis() num_layers = len(prunable) for index, node in enumerate(prunable): node_id = extract_node_id(node) yield analysis, KSSensitivityProgress(index, node_id, num_layers, float(index) / float(num_layers)) weight, bias = get_node_params(model, node) values = numpy.sort(numpy.abs(weight.val.flatten())) prev_index = 0 for sparsity in sparsity_levels: val_index = round(sparsity * values.size) if val_index >= len(values): val_index = len(values) - 1 if sparsity <= 1e-9: baseline = True sparsity = 0.0 sparse_avg = 0.0 else: baseline = False if val_index > prev_index: sparse_avg = values[prev_index:val_index].mean().item() prev_index = val_index else: sparse_avg = values[val_index].item() prev_index = val_index + 1 analysis.add_result(node_id, weight.name, index, sparsity, sparse_avg, baseline) yield analysis, KSSensitivityProgress(num_layers, None, num_layers, 1.0)
def test_prune_model_one_shot( onnx_repo_models: OnnxRepoModelFixture, sparsity: List[float] # noqa: F811 ): model_path = onnx_repo_models.model_path model = load_model(model_path) nodes = [ node for node in model.graph.node if node.op_type == "Conv" or node.op_type == "Gemm" ] prune_model_one_shot(model, nodes, sparsity) for node in nodes: weight, _ = get_node_params(model, node) _test_correct_sparsity(weight.val, sparsity, 5.5e-3)
def test_prune_model_one_shot_sparsity_list( onnx_repo_models: OnnxRepoModelFixture, # noqa: F811 ): model_path = onnx_repo_models.model_path model = load_model(model_path) nodes = [ node for node in model.graph.node if node.op_type == "Conv" or node.op_type == "Gemm" ] sparsities = numpy.random.random_sample([len(nodes)]) prune_model_one_shot(model, nodes, sparsities) for node, sparsity in zip(nodes, sparsities): weight, _ = get_node_params(model, node) weight_val = weight.val _test_correct_sparsity(weight_val, sparsity, 5.5e-3)
def pruning_loss_sens_one_shot_iter( model: Union[str, ModelProto], data: DataLoader, batch_size: int, steps_per_measurement: int, sparsity_levels: List[float] = default_pruning_sparsities_loss(False), use_deepsparse_inference: bool = False, ) -> Generator[Tuple[PruningLossSensitivityAnalysis, KSSensitivityProgress], None, None]: """ Run a one shot sensitivity analysis for kernel sparsity. It does not retrain. Moves layer by layer to calculate the sensitivity analysis for each and resets the previously run layers. Updates and yeilds the KSLossSensitivityAnalysis at each layer. The loss is calculated by taking the kl_divergence of pruned values from the baseline. :param model: the loaded model or a file path to the onnx model to calculate the sparse sensitivity analysis for :param data: the data to run through the model :param batch_size: the batch size the data is created for :param steps_per_measurement: number of steps (batches) to run through the model for each sparsity level on each node :param sparsity_levels: the sparsity levels to calculate the loss for for each param :param use_deepsparse_inference: True to use the DeepSparse inference engine to run the analysis, False to use onnxruntime :return: the sensitivity results for every node that is prunable, yields update at each layer along with iteration progress """ model = check_load_model(model) prunable_nodes = get_prunable_nodes(model) analysis = PruningLossSensitivityAnalysis() num_updates = len(prunable_nodes) * len(sparsity_levels) + 1 update_num = 0 yield analysis, KSSensitivityProgress(update_num, None, num_updates, 0.0) runner = (ORTModelRunner(model) if not use_deepsparse_inference else DeepSparseModelRunner(model, batch_size)) _LOGGER.debug("created runner for one shot analysis {}".format(runner)) base_outputs, _ = runner.run( data, desc="", show_progress=False, max_steps=steps_per_measurement, ) _LOGGER.debug("recorded base outputs") del runner for index, node in enumerate(prunable_nodes): node_id = extract_node_id(node) weight, bias = get_node_params(model, node) _LOGGER.debug("running one shot for node {}".format(node_id)) for sparsity in sparsity_levels: update_num += 1 yield analysis, KSSensitivityProgress( update_num, { "node_id": node_id, "sparsity": sparsity }, num_updates, float(update_num) / float(num_updates), ) prune_model_one_shot(model, [node], sparsity) _LOGGER.debug( "created one shot pruned model for sparsity {}".format( sparsity)) runner = (ORTModelRunner(model) if not use_deepsparse_inference else DeepSparseModelRunner(model, batch_size)) _LOGGER.debug( "created runner for one shot analysis {}".format(runner)) pruned_outputs, _ = runner.run( data, desc="", show_progress=False, max_steps=steps_per_measurement, ) del runner _LOGGER.debug("recorded outputs") for base, pruned in zip(base_outputs, pruned_outputs): batch_losses = [] for key, base_array in base.items(): pruned_array = pruned[key] loss = kl_divergence( pruned_array, base_array, min(base_array.min(), pruned_array.min()), ) batch_losses.append(loss) analysis.add_result( node_id, weight.name, index, sparsity, sum(batch_losses), baseline=sparsity < 1e-9, ) # reset node to its baseline density update_model_param(model, weight.name, weight.val) yield analysis, KSSensitivityProgress(num_updates, None, num_updates, 1.0)
def __init__( self, model: Union[ModelProto, None], node: Union[Any, None], node_shape: Union[NodeShape, None] = None, **kwargs, ): if model is None and node is None: self._id = kwargs["id"] self._op_type = kwargs["op_type"] self._input_names = kwargs["input_names"] self._output_names = kwargs["output_names"] self._input_shapes = kwargs["input_shapes"] self._output_shapes = kwargs["output_shapes"] self._params = kwargs["params"] self._prunable = kwargs["prunable"] self._prunable_params_zeroed = kwargs["prunable_params_zeroed"] self._weight_name = kwargs["weight_name"] self._weight_shape = kwargs["weight_shape"] self._bias_name = kwargs["bias_name"] self._bias_shape = kwargs["bias_shape"] self._attributes = kwargs["attributes"] self._flops = kwargs["flops"] self._prunable_equation_sensitivity = ( kwargs["prunable_equation_sensitivity"] if "prunable_equation_sensitivity" in kwargs else None ) return if model is None or node is None: raise ValueError("both model and node must not be None") self._id = extract_node_id(node) self._op_type = node.op_type self._input_names = get_node_inputs(model, node) self._output_names = get_node_outputs(model, node) if node_shape is None: self._input_shapes = None self._output_shapes = None else: self._input_shapes = node_shape.input_shapes self._output_shapes = node_shape.output_shapes self._params = 0 self._prunable = is_prunable_node(model, node) self._prunable_params = 0 self._prunable_params_zeroed = 0 self._weight_name = None self._weight_shape = None self._bias_name = None self._bias_shape = None self._attributes = get_node_attributes(node) if self._prunable: weight, bias = get_node_params(model, node) self._params += weight.val.size self._prunable_params += weight.val.size self._prunable_params_zeroed += weight.val.size - numpy.count_nonzero( weight.val ) self._weight_name = weight.name self._weight_shape = [s for s in weight.val.shape] if bias is not None: self._bias_name = bias.name self._params += bias.val.size self._bias_shape = [s for s in bias.val.shape] kernel_shape = get_kernel_shape(self._attributes) self._flops = calculate_flops( self._op_type, input_shape=self._input_shapes, output_shape=self._output_shapes, weight_shape=self._weight_shape, kernel_shape=kernel_shape, bias_shape=self._bias_shape, attributes=self._attributes, ) self._prunable_equation_sensitivity = ( pruning_loss_sens_approx( self._input_shapes, self._output_shapes, self._params, apply_shape_change_mult=True, ) if self._prunable else None )