Ejemplo n.º 1
0
 def set_statistics(self, statistics: dict):
     network_stats = {}
     for layer_name, node_statistic in statistics.items():
         network_stats[layer_name] = ie.LayerStats(
             min=tuple(node_statistic.min_outputs),
             max=tuple(node_statistic.max_outputs))
     self.ie_network.stats.update(network_stats)
Ejemplo n.º 2
0
    def save(self, model_file_path: str, weights_file_path: str, quantization_level: dict, statistics):
        '''
        Save calibration results.
        '''


        if not statistics:
            raise ValueError("statistics is empy")

        network = self.create_network()

        network_stats = {}
        for layer_name, node_statistic in statistics.items():
            network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
                                                      max=tuple(node_statistic.max_outputs))
        network.stats.update(network_stats)

        for layer in network.layers.values():
            if self.is_quantization_supported(layer.type) and layer.name in quantization_level:
                params = layer.params
                params["quantization_level"] = quantization_level[layer.name]
                layer.params = params

        network.serialize(model_file_path, weights_file_path)
Ejemplo n.º 3
0
    def set_single_layer_networks(self):
        assert self._configuration is not None, "Configuration should be set"
        assert self._per_layer_statistics is not None, "Statistics should be set"

        network_info = NetworkInfo(self._configuration.model)

        index = 1
        for layer in self._network.layers.values():
            if layer.name not in self._ignore_layer_names and \
                    self._normalizer.is_quantization_supported(layer.type):
                layer_info = network_info.get_layer(layer.name)
                if (len(layer_info.outputs) == 1) and (len(
                        layer_info.outputs[0].layer.inputs) == 1):
                    activation_layer = self._network.layers[layer_info.outputs[0].layer.name] if \
                        (len(layer_info.outputs) == 1 and
                         self._normalizer.is_quantization_fusing_supported(layer_info,
                                                                           layer_info.outputs[
                                                                               0].layer)) else None
                    if activation_layer:
                        debug(
                            "create network #{} for layer {} ({}) -> {} ({})".
                            format(index, layer.name, layer.type,
                                   activation_layer.name,
                                   activation_layer.type))
                    else:
                        debug("create network #{} for layer {} ({})".format(
                            index, layer.name, layer.type))

                    layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer(
                        None, layer, layer_info, activation_layer)

                    Network.reshape(layer_network,
                                    self._configuration.batch_size)

                    network_stats = {}
                    # TODO: initialize only neccessary statistic
                    for layer_name, node_statistic in self._per_layer_statistics.items(
                    ):
                        network_stats[layer_name] = ie.LayerStats(
                            min=tuple(node_statistic.min_outputs),
                            max=tuple(node_statistic.max_outputs))
                    layer_network.stats.update(network_stats)

                    params = layer_network.layers[layer.name].params
                    params[
                        "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision
                    layer_network.layers[layer.name].params = params

                    exec_network = self._normalizer.plugin.load(
                        network=layer_network,
                        config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"})

                    if len(layer_network.inputs) != 1:
                        raise ValueError("created network has several inputs")

                    network_input_layer_name = next(
                        iter(layer_network.inputs.keys()))

                    single_layer_network = SingleLayerNetwork(
                        network=layer_network,
                        exec_network=exec_network,
                        input_layer_name=network_input_layer_name,
                        layer_name=layer.name,
                        output_layer_name=layer.name + "_",
                        reference_output_layer_name=reference_output_layer_name
                    )

                    self._single_layer_networks = np.append(
                        self._single_layer_networks, single_layer_network)
                    self._layers_to_return_to_fp32 = np.append(
                        self._layers_to_return_to_fp32, layer)
                    index += 1
Ejemplo n.º 4
0
    def run(self,
            network: Network = None,
            statistics=None,
            quantization_levels=None,
            iterations_count: int = 1000) -> BenchmarkResult:

        model = self._configuration.config['models'][0]
        launcher_config = model['launchers'][0]
        dataset_config = model['datasets'][0]

        model_evaluator = ModelEvaluator.from_configs(launcher_config,
                                                      dataset_config)
        try:
            if network:
                del model_evaluator.launcher.network
                del model_evaluator.launcher.exec_network
                model_evaluator.launcher.network = network.ie_network
                model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load(
                    network.ie_network)

            ie_network = model_evaluator.launcher.network

            if statistics:
                network_stats = {}
                for layer_name, node_statistic in statistics.items():
                    network_stats[layer_name] = ie.LayerStats(
                        min=tuple(node_statistic.min_outputs),
                        max=tuple(node_statistic.max_outputs))
                ie_network.stats.update(network_stats)

            if quantization_levels:
                for layer_name, value in quantization_levels.items():
                    params = ie_network.layers[layer_name].params
                    params["quantization_level"] = value
                    ie_network.layers[layer_name].params = params

            if model_evaluator.dataset.size != 1:
                info(
                    "only one first image is used from dataset annotation to perform benchmark"
                )
                model_evaluator.dataset.size = 1

            process_dataset_callback = BenchmarkCallback(
                configuration=self._configuration,
                network=network,
                iterations_count=iterations_count)

            model_evaluator.process_dataset(
                None,
                progress_reporter=None,
                output_callback=process_dataset_callback.output_callback,
                benchmark=process_dataset_callback.benchmark_callback)

            if len(model_evaluator.launcher.exec_network.requests) != 1:
                raise ValueError("unexpected network requests count")

            latency = process_dataset_callback.latency
        finally:
            model_evaluator.release()

        return BenchmarkResult(latency)
Ejemplo n.º 5
0
    def collect_in_thread(
            self, statistics: dict(), full_network_result: InferenceResult,
            network: ie.IENetwork, network_info: NetworkInfo,
            quantization_layer: QuantizationLayer) -> LayerAccuracyDropInfo:

        index = quantization_layer.index
        layer_to_clone = quantization_layer.layer
        layer_to_clone_info = network_info.get_layer(layer_to_clone.name)

        activation_layer = network.layers[
            layer_to_clone_info.outputs[0].layer.name] if (
                len(layer_to_clone_info.outputs) == 1
                and self._normalizer.is_quantization_fusing_supported(
                    layer_to_clone_info,
                    layer_to_clone_info.outputs[0].layer)) else None
        if activation_layer:
            debug("create network #{} for layer {} ({}) -> {} ({})".format(
                index, layer_to_clone.name, layer_to_clone.type,
                activation_layer.name, activation_layer.type))
        else:
            debug("create network #{} for layer {} ({})".format(
                index, layer_to_clone.name, layer_to_clone.type))

        layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer(
            self._configuration.weights, layer_to_clone, layer_to_clone_info,
            activation_layer)

        Network.reshape(layer_network, self._configuration.batch_size)

        network_stats = {}
        # TODO: initialize only neccessary statistic
        for layer_name, node_statistic in statistics.items():
            network_stats[layer_name] = ie.LayerStats(
                min=tuple(node_statistic.min_outputs),
                max=tuple(node_statistic.max_outputs))
        layer_network.stats.update(network_stats)

        params = layer_network.layers[layer_to_clone.name].params
        params[
            "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision
        layer_network.layers[layer_to_clone.name].params = params

        exec_network = self._plugin.load(
            network=layer_network, config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"})

        if len(layer_network.inputs) != 1:
            raise ValueError("created network has several inputs")

        network_input_layer_name = next(iter(layer_network.inputs.keys()))

        with SingleLayerNetwork(
                network=layer_network,
                exec_network=exec_network,
                input_layer_name=network_input_layer_name,
                layer_name=layer_to_clone.name,
                output_layer_name=layer_to_clone.name + "_",
                reference_output_layer_name=reference_output_layer_name
        ) as single_layer_network:

            debug("single layer #{} {} network infer".format(
                index, single_layer_network.layer_name))
            accuracy_drop_list = self.infer_single_layer_network(
                single_layer_network, full_network_result)

            return LayerAccuracyDropInfo(
                layer_name=single_layer_network.layer_name,
                value=LayerAccuracyDropInfo.calculate(accuracy_drop_list))
Ejemplo n.º 6
0
    def _create_single_layer_networks(self, stat):
        '''
        Method get layers which can be quantized and affect on final accuracy. Separate network is created for each layer.
        '''
        network = ie.IENetwork(self._configuration.model,
                               self._configuration.weights)
        # if self._configuration.batch_size:
        #     # need to use reshape API
        #     network.batch_size = self._configuration.batch_size

        try:
            network_info = NetworkInfo(self._configuration.model)

            # CVS-14302: IE Network INT8 Normalizer: scale factor calculation is incorrect
            # for layer_name, layer_statistics in stat.items():
            #     layer_info = network_info.get_layer(layer_name)
            #     if layer_info.type == 'Convolution' and \
            #         layer_info.outputs and \
            #         layer_info.outputs[0].layer.type == 'ReLU' and \
            #         layer_info.outputs[0].layer.outputs[0] and \
            #         len(layer_statistics.max_outputs) > len(stat[layer_info.outputs[0].layer.name].max_outputs):

            #         relu_max_outputs = stat[layer_info.outputs[0].layer.name].max_outputs
            #         relu_min_outputs = stat[layer_info.outputs[0].layer.name].min_outputs

            #         while len(layer_statistics.max_outputs) > len(relu_max_outputs):
            #             relu_max_outputs.append(relu_max_outputs[-1])
            #             relu_min_outputs.append(relu_min_outputs[-1])

            single_layer_networks = dict()

            layer_index = 1
            for layer_to_clone in network.layers.values():
                layer_to_clone_info = network_info.get_layer(
                    layer_to_clone.name)
                if layer_to_clone.name in self._ignore_layer_names or \
                        not self._normalizer.is_quantization_supported(layer_to_clone.type) or \
                        len(layer_to_clone_info.outputs) != 1 or \
                        len(layer_to_clone_info.outputs[0].layer.inputs != 1):
                    continue

                activation_layer = network.layers[
                    layer_to_clone_info.outputs[0].layer.name] if (
                        len(layer_to_clone_info.outputs) == 1
                        and self._normalizer.is_quantization_fusing_supported(
                            layer_to_clone_info,
                            layer_to_clone_info.outputs[0].layer)) else None
                if activation_layer:
                    debug("create network #{} for layer {} ({}) -> {} ({})".
                          format(layer_index, layer_to_clone.name,
                                 layer_to_clone.type, activation_layer.name,
                                 activation_layer.type))
                else:
                    debug("create network #{} for layer {} ({})".format(
                        layer_index, layer_to_clone.name, layer_to_clone.type))

                layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer(
                    self._configuration.weights, layer_to_clone,
                    layer_to_clone_info, activation_layer)

                Network.reshape(layer_network, self._configuration.batch_size)

                network_stats = {}
                # TODO: initialize only neccessary statistic
                for layer_name, node_statistic in stat.items():
                    network_stats[layer_name] = ie.LayerStats(
                        min=tuple(node_statistic.min_outputs),
                        max=tuple(node_statistic.max_outputs))
                layer_network.stats.update(network_stats)

                params = layer_network.layers[layer_to_clone.name].params
                params[
                    "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision
                layer_network.layers[layer_to_clone.name].params = params

                exec_network = self._plugin.load(
                    network=layer_network,
                    config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"})

                if len(layer_network.inputs) != 1:
                    raise ValueError("created network has several inputs")

                network_input_layer_name = next(
                    iter(layer_network.inputs.keys()))

                single_layer_networks[
                    layer_to_clone.name] = SingleLayerNetwork(
                        network=layer_network,
                        exec_network=exec_network,
                        input_layer_name=network_input_layer_name,
                        layer_name=layer_to_clone.name,
                        output_layer_name=layer_to_clone.name + "_",
                        reference_output_layer_name=reference_output_layer_name
                    )

                layer_index += 1

            return single_layer_networks
        finally:
            del network