def set_statistics(self, statistics: dict): network_stats = {} for layer_name, node_statistic in statistics.items(): network_stats[layer_name] = ie.LayerStats( min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) self.ie_network.stats.update(network_stats)
def save(self, model_file_path: str, weights_file_path: str, quantization_level: dict, statistics): ''' Save calibration results. ''' if not statistics: raise ValueError("statistics is empy") network = self.create_network() network_stats = {} for layer_name, node_statistic in statistics.items(): network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) network.stats.update(network_stats) for layer in network.layers.values(): if self.is_quantization_supported(layer.type) and layer.name in quantization_level: params = layer.params params["quantization_level"] = quantization_level[layer.name] layer.params = params network.serialize(model_file_path, weights_file_path)
def set_single_layer_networks(self): assert self._configuration is not None, "Configuration should be set" assert self._per_layer_statistics is not None, "Statistics should be set" network_info = NetworkInfo(self._configuration.model) index = 1 for layer in self._network.layers.values(): if layer.name not in self._ignore_layer_names and \ self._normalizer.is_quantization_supported(layer.type): layer_info = network_info.get_layer(layer.name) if (len(layer_info.outputs) == 1) and (len( layer_info.outputs[0].layer.inputs) == 1): activation_layer = self._network.layers[layer_info.outputs[0].layer.name] if \ (len(layer_info.outputs) == 1 and self._normalizer.is_quantization_fusing_supported(layer_info, layer_info.outputs[ 0].layer)) else None if activation_layer: debug( "create network #{} for layer {} ({}) -> {} ({})". format(index, layer.name, layer.type, activation_layer.name, activation_layer.type)) else: debug("create network #{} for layer {} ({})".format( index, layer.name, layer.type)) layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer( None, layer, layer_info, activation_layer) Network.reshape(layer_network, self._configuration.batch_size) network_stats = {} # TODO: initialize only neccessary statistic for layer_name, node_statistic in self._per_layer_statistics.items( ): network_stats[layer_name] = ie.LayerStats( min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) layer_network.stats.update(network_stats) params = layer_network.layers[layer.name].params params[ "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision layer_network.layers[layer.name].params = params exec_network = self._normalizer.plugin.load( network=layer_network, config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"}) if len(layer_network.inputs) != 1: raise ValueError("created network has several inputs") network_input_layer_name = next( iter(layer_network.inputs.keys())) single_layer_network = SingleLayerNetwork( network=layer_network, exec_network=exec_network, input_layer_name=network_input_layer_name, layer_name=layer.name, output_layer_name=layer.name + "_", reference_output_layer_name=reference_output_layer_name ) self._single_layer_networks = np.append( self._single_layer_networks, single_layer_network) self._layers_to_return_to_fp32 = np.append( self._layers_to_return_to_fp32, layer) index += 1
def run(self, network: Network = None, statistics=None, quantization_levels=None, iterations_count: int = 1000) -> BenchmarkResult: model = self._configuration.config['models'][0] launcher_config = model['launchers'][0] dataset_config = model['datasets'][0] model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config) try: if network: del model_evaluator.launcher.network del model_evaluator.launcher.exec_network model_evaluator.launcher.network = network.ie_network model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load( network.ie_network) ie_network = model_evaluator.launcher.network if statistics: network_stats = {} for layer_name, node_statistic in statistics.items(): network_stats[layer_name] = ie.LayerStats( min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) ie_network.stats.update(network_stats) if quantization_levels: for layer_name, value in quantization_levels.items(): params = ie_network.layers[layer_name].params params["quantization_level"] = value ie_network.layers[layer_name].params = params if model_evaluator.dataset.size != 1: info( "only one first image is used from dataset annotation to perform benchmark" ) model_evaluator.dataset.size = 1 process_dataset_callback = BenchmarkCallback( configuration=self._configuration, network=network, iterations_count=iterations_count) model_evaluator.process_dataset( None, progress_reporter=None, output_callback=process_dataset_callback.output_callback, benchmark=process_dataset_callback.benchmark_callback) if len(model_evaluator.launcher.exec_network.requests) != 1: raise ValueError("unexpected network requests count") latency = process_dataset_callback.latency finally: model_evaluator.release() return BenchmarkResult(latency)
def collect_in_thread( self, statistics: dict(), full_network_result: InferenceResult, network: ie.IENetwork, network_info: NetworkInfo, quantization_layer: QuantizationLayer) -> LayerAccuracyDropInfo: index = quantization_layer.index layer_to_clone = quantization_layer.layer layer_to_clone_info = network_info.get_layer(layer_to_clone.name) activation_layer = network.layers[ layer_to_clone_info.outputs[0].layer.name] if ( len(layer_to_clone_info.outputs) == 1 and self._normalizer.is_quantization_fusing_supported( layer_to_clone_info, layer_to_clone_info.outputs[0].layer)) else None if activation_layer: debug("create network #{} for layer {} ({}) -> {} ({})".format( index, layer_to_clone.name, layer_to_clone.type, activation_layer.name, activation_layer.type)) else: debug("create network #{} for layer {} ({})".format( index, layer_to_clone.name, layer_to_clone.type)) layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer( self._configuration.weights, layer_to_clone, layer_to_clone_info, activation_layer) Network.reshape(layer_network, self._configuration.batch_size) network_stats = {} # TODO: initialize only neccessary statistic for layer_name, node_statistic in statistics.items(): network_stats[layer_name] = ie.LayerStats( min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) layer_network.stats.update(network_stats) params = layer_network.layers[layer_to_clone.name].params params[ "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision layer_network.layers[layer_to_clone.name].params = params exec_network = self._plugin.load( network=layer_network, config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"}) if len(layer_network.inputs) != 1: raise ValueError("created network has several inputs") network_input_layer_name = next(iter(layer_network.inputs.keys())) with SingleLayerNetwork( network=layer_network, exec_network=exec_network, input_layer_name=network_input_layer_name, layer_name=layer_to_clone.name, output_layer_name=layer_to_clone.name + "_", reference_output_layer_name=reference_output_layer_name ) as single_layer_network: debug("single layer #{} {} network infer".format( index, single_layer_network.layer_name)) accuracy_drop_list = self.infer_single_layer_network( single_layer_network, full_network_result) return LayerAccuracyDropInfo( layer_name=single_layer_network.layer_name, value=LayerAccuracyDropInfo.calculate(accuracy_drop_list))
def _create_single_layer_networks(self, stat): ''' Method get layers which can be quantized and affect on final accuracy. Separate network is created for each layer. ''' network = ie.IENetwork(self._configuration.model, self._configuration.weights) # if self._configuration.batch_size: # # need to use reshape API # network.batch_size = self._configuration.batch_size try: network_info = NetworkInfo(self._configuration.model) # CVS-14302: IE Network INT8 Normalizer: scale factor calculation is incorrect # for layer_name, layer_statistics in stat.items(): # layer_info = network_info.get_layer(layer_name) # if layer_info.type == 'Convolution' and \ # layer_info.outputs and \ # layer_info.outputs[0].layer.type == 'ReLU' and \ # layer_info.outputs[0].layer.outputs[0] and \ # len(layer_statistics.max_outputs) > len(stat[layer_info.outputs[0].layer.name].max_outputs): # relu_max_outputs = stat[layer_info.outputs[0].layer.name].max_outputs # relu_min_outputs = stat[layer_info.outputs[0].layer.name].min_outputs # while len(layer_statistics.max_outputs) > len(relu_max_outputs): # relu_max_outputs.append(relu_max_outputs[-1]) # relu_min_outputs.append(relu_min_outputs[-1]) single_layer_networks = dict() layer_index = 1 for layer_to_clone in network.layers.values(): layer_to_clone_info = network_info.get_layer( layer_to_clone.name) if layer_to_clone.name in self._ignore_layer_names or \ not self._normalizer.is_quantization_supported(layer_to_clone.type) or \ len(layer_to_clone_info.outputs) != 1 or \ len(layer_to_clone_info.outputs[0].layer.inputs != 1): continue activation_layer = network.layers[ layer_to_clone_info.outputs[0].layer.name] if ( len(layer_to_clone_info.outputs) == 1 and self._normalizer.is_quantization_fusing_supported( layer_to_clone_info, layer_to_clone_info.outputs[0].layer)) else None if activation_layer: debug("create network #{} for layer {} ({}) -> {} ({})". format(layer_index, layer_to_clone.name, layer_to_clone.type, activation_layer.name, activation_layer.type)) else: debug("create network #{} for layer {} ({})".format( layer_index, layer_to_clone.name, layer_to_clone.type)) layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer( self._configuration.weights, layer_to_clone, layer_to_clone_info, activation_layer) Network.reshape(layer_network, self._configuration.batch_size) network_stats = {} # TODO: initialize only neccessary statistic for layer_name, node_statistic in stat.items(): network_stats[layer_name] = ie.LayerStats( min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs)) layer_network.stats.update(network_stats) params = layer_network.layers[layer_to_clone.name].params params[ "quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision layer_network.layers[layer_to_clone.name].params = params exec_network = self._plugin.load( network=layer_network, config={"EXCLUSIVE_ASYNC_REQUESTS": "YES"}) if len(layer_network.inputs) != 1: raise ValueError("created network has several inputs") network_input_layer_name = next( iter(layer_network.inputs.keys())) single_layer_networks[ layer_to_clone.name] = SingleLayerNetwork( network=layer_network, exec_network=exec_network, input_layer_name=network_input_layer_name, layer_name=layer_to_clone.name, output_layer_name=layer_to_clone.name + "_", reference_output_layer_name=reference_output_layer_name ) layer_index += 1 return single_layer_networks finally: del network