def optimize_model(args): model_config, engine_config, dataset_config, algorithms = get_configs(args) # Step 1: Load the model. model = load_model(model_config) # Step 2: Initialize the data loader. data_loader = ImageNetDataLoader(dataset_config) # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric. metric = Accuracy(top_k=1) # Step 4: Initialize the engine for metric calculation and statistics collection. engine = IEEngine(engine_config, data_loader, metric) # Step 5: Create a pipeline of compression algorithms. pipeline = create_pipeline(algorithms, engine) # Step 6: Execute the pipeline. compressed_model = pipeline.run(model) # Step 7 (Optional): Compress model weights quantized precision # in order to reduce the size of final .bin file. compress_model_weights(compressed_model) return compressed_model, pipeline
def optimize(self, optimization_type: OptimizationType, dataset: DatasetEntity, output_model: ModelEntity, optimization_parameters: Optional[OptimizationParameters]): if optimization_type is not OptimizationType.POT: raise ValueError( "POT is the only supported optimization type for OpenVino models" ) data_loader = OTEOpenVinoDataLoader(dataset, self.inferencer) with tempfile.TemporaryDirectory() as tempdir: xml_path = os.path.join(tempdir, "model.xml") bin_path = os.path.join(tempdir, "model.bin") with open(xml_path, "wb") as f: f.write(self.model.get_data("openvino.xml")) with open(bin_path, "wb") as f: f.write(self.model.get_data("openvino.bin")) model_config = ADDict({ 'model_name': 'openvino_model', 'model': xml_path, 'weights': bin_path }) model = load_model(model_config) if get_nodes_by_type(model, ["FakeQuantize"]): raise RuntimeError("Model is already optimized by POT") engine_config = ADDict({'device': 'CPU'}) stat_subset_size = self.hparams.pot_parameters.stat_subset_size preset = self.hparams.pot_parameters.preset.name.lower() algorithms = [{ 'name': 'DefaultQuantization', 'params': { 'target_device': 'ANY', 'preset': preset, 'stat_subset_size': min(stat_subset_size, len(data_loader)), 'shuffle_data': True } }] engine = IEEngine(config=engine_config, data_loader=data_loader, metric=None) pipeline = create_pipeline(algorithms, engine) compressed_model = pipeline.run(model) compress_model_weights(compressed_model) with tempfile.TemporaryDirectory() as tempdir: save_model(compressed_model, tempdir, model_name="model") with open(os.path.join(tempdir, "model.xml"), "rb") as f: output_model.set_data("openvino.xml", f.read()) with open(os.path.join(tempdir, "model.bin"), "rb") as f: output_model.set_data("openvino.bin", f.read()) output_model.set_data( "label_schema.json", label_schema_to_bytes(self.task_environment.label_schema)) # set model attributes for quantized model output_model.model_format = ModelFormat.OPENVINO output_model.optimization_type = ModelOptimizationType.POT output_model.optimization_methods = [OptimizationMethod.QUANTIZATION] output_model.precision = [ModelPrecision.INT8] self.model = output_model self.inferencer = self.load_inferencer()
"device": "CPU", "stat_requests_number": 4, "eval_requests_number": 4 }) dataset_config = Dict({ "preprocessed_data_dir": args.preprocessed_data_dir, }) algorithms = [{ 'name': 'DefaultQuantization', 'params': { 'target_device': 'CPU', 'preset': 'performance', 'stat_subset_size': 300 } }] model = load_model(model_config) data_loader = MyDataLoader(dataset_config) metric = MyMetric() loss = None engine = IEEngine(engine_config, data_loader, metric) pipeline = create_pipeline(algorithms, engine) compressed_model = pipeline.run(model) save_model(compressed_model, args.int8_directory) print('Calibrated model successfully saved to: {}'.format(args.int8_directory))
def optimize( self, optimization_type: OptimizationType, dataset: DatasetEntity, output_model: ModelEntity, optimization_parameters: Optional[OptimizationParameters], ): """Optimize the model. Args: optimization_type (OptimizationType): Type of optimization [POT or NNCF] dataset (DatasetEntity): Input Dataset. output_model (ModelEntity): Output model. optimization_parameters (Optional[OptimizationParameters]): Optimization parameters. Raises: ValueError: When the optimization type is not POT, which is the only support type at the moment. """ if optimization_type is not OptimizationType.POT: raise ValueError( "POT is the only supported optimization type for OpenVINO models" ) data_loader = OTEOpenVINOAnomalyDataloader(config=self.config, dataset=dataset, inferencer=self.inferencer) with tempfile.TemporaryDirectory() as tempdir: xml_path = os.path.join(tempdir, "model.xml") bin_path = os.path.join(tempdir, "model.bin") self.__save_weights( xml_path, self.task_environment.model.get_data("openvino.xml")) self.__save_weights( bin_path, self.task_environment.model.get_data("openvino.bin")) model_config = { "model_name": "openvino_model", "model": xml_path, "weights": bin_path, } model = load_model(model_config) if get_nodes_by_type(model, ["FakeQuantize"]): logger.warning("Model is already optimized by POT") return engine = IEEngine(config=ADDict({"device": "CPU"}), data_loader=data_loader, metric=None) pipeline = create_pipeline( algo_config=self._get_optimization_algorithms_configs(), engine=engine) compressed_model = pipeline.run(model) compress_model_weights(compressed_model) with tempfile.TemporaryDirectory() as tempdir: save_model(compressed_model, tempdir, model_name="model") self.__load_weights(path=os.path.join(tempdir, "model.xml"), output_model=output_model, key="openvino.xml") self.__load_weights(path=os.path.join(tempdir, "model.bin"), output_model=output_model, key="openvino.bin") output_model.set_data( "label_schema.json", label_schema_to_bytes(self.task_environment.label_schema)) output_model.set_data( "threshold", self.task_environment.model.get_data("threshold")) output_model.model_status = ModelStatus.SUCCESS output_model.model_format = ModelFormat.OPENVINO output_model.optimization_type = ModelOptimizationType.POT output_model.optimization_methods = [OptimizationMethod.QUANTIZATION] output_model.precision = [ModelPrecision.INT8] self.task_environment.model = output_model self.inferencer = self.load_inferencer()
self.generate_experimental_IR_V10 = True self.blobs_as_inputs = True self.generate_deprecated_IR_V7 = False model = load_model(model_config) data_loader = MyDataLoader(dataset_config) metric = MyMetric() engine = IEEngine(engine_config, data_loader, metric) if args.accuracy_aware_quantization: # https://docs.openvinotoolkit.org/latest/_compression_algorithms_quantization_accuracy_aware_README.html print(bcolors.BOLD + "Accuracy-aware quantization method" + bcolors.ENDC) pipeline = create_pipeline(accuracy_aware_quantization_algorithm, engine) else: print(bcolors.BOLD + "Default quantization method" + bcolors.ENDC) pipeline = create_pipeline(default_quantization_algorithm, engine) metric_results_FP32 = pipeline.evaluate(model) compressed_model = pipeline.run(model) save_model(compressed_model, args.int8_directory) metric_results_INT8 = pipeline.evaluate(compressed_model) print(bcolors.BOLD + "\nFINAL RESULTS" + bcolors.ENDC) # print metric value if metric_results_FP32: