Beispiel #1
0
def optimize_model(args):
    model_config, engine_config, dataset_config, algorithms = get_configs(args)

    # Step 1: Load the model.
    model = load_model(model_config)

    # Step 2: Initialize the data loader.
    data_loader = ImageNetDataLoader(dataset_config)

    # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric.
    metric = Accuracy(top_k=1)

    # Step 4: Initialize the engine for metric calculation and statistics collection.
    engine = IEEngine(engine_config, data_loader, metric)

    # Step 5: Create a pipeline of compression algorithms.
    pipeline = create_pipeline(algorithms, engine)

    # Step 6: Execute the pipeline.
    compressed_model = pipeline.run(model)

    # Step 7 (Optional): Compress model weights quantized precision
    #                    in order to reduce the size of final .bin file.
    compress_model_weights(compressed_model)

    return compressed_model, pipeline
Beispiel #2
0
    def optimize(self, optimization_type: OptimizationType,
                 dataset: DatasetEntity, output_model: ModelEntity,
                 optimization_parameters: Optional[OptimizationParameters]):

        if optimization_type is not OptimizationType.POT:
            raise ValueError(
                "POT is the only supported optimization type for OpenVino models"
            )

        data_loader = OTEOpenVinoDataLoader(dataset, self.inferencer)

        with tempfile.TemporaryDirectory() as tempdir:
            xml_path = os.path.join(tempdir, "model.xml")
            bin_path = os.path.join(tempdir, "model.bin")
            with open(xml_path, "wb") as f:
                f.write(self.model.get_data("openvino.xml"))
            with open(bin_path, "wb") as f:
                f.write(self.model.get_data("openvino.bin"))

            model_config = ADDict({
                'model_name': 'openvino_model',
                'model': xml_path,
                'weights': bin_path
            })

            model = load_model(model_config)

            if get_nodes_by_type(model, ["FakeQuantize"]):
                raise RuntimeError("Model is already optimized by POT")

        engine_config = ADDict({'device': 'CPU'})

        stat_subset_size = self.hparams.pot_parameters.stat_subset_size
        preset = self.hparams.pot_parameters.preset.name.lower()

        algorithms = [{
            'name': 'DefaultQuantization',
            'params': {
                'target_device': 'ANY',
                'preset': preset,
                'stat_subset_size': min(stat_subset_size, len(data_loader)),
                'shuffle_data': True
            }
        }]

        engine = IEEngine(config=engine_config,
                          data_loader=data_loader,
                          metric=None)

        pipeline = create_pipeline(algorithms, engine)

        compressed_model = pipeline.run(model)

        compress_model_weights(compressed_model)

        with tempfile.TemporaryDirectory() as tempdir:
            save_model(compressed_model, tempdir, model_name="model")
            with open(os.path.join(tempdir, "model.xml"), "rb") as f:
                output_model.set_data("openvino.xml", f.read())
            with open(os.path.join(tempdir, "model.bin"), "rb") as f:
                output_model.set_data("openvino.bin", f.read())

        output_model.set_data(
            "label_schema.json",
            label_schema_to_bytes(self.task_environment.label_schema))

        # set model attributes for quantized model
        output_model.model_format = ModelFormat.OPENVINO
        output_model.optimization_type = ModelOptimizationType.POT
        output_model.optimization_methods = [OptimizationMethod.QUANTIZATION]
        output_model.precision = [ModelPrecision.INT8]

        self.model = output_model
        self.inferencer = self.load_inferencer()
    "device": "CPU",
    "stat_requests_number": 4,
    "eval_requests_number": 4
})

dataset_config = Dict({
    "preprocessed_data_dir": args.preprocessed_data_dir,
})

algorithms = [{
    'name': 'DefaultQuantization',
    'params': {
        'target_device': 'CPU',
        'preset': 'performance',
        'stat_subset_size': 300
    }
}]

model = load_model(model_config)

data_loader = MyDataLoader(dataset_config)
metric = MyMetric()

loss = None
engine = IEEngine(engine_config, data_loader, metric)
pipeline = create_pipeline(algorithms, engine)

compressed_model = pipeline.run(model)
save_model(compressed_model, args.int8_directory)

print('Calibrated model successfully saved to: {}'.format(args.int8_directory))
    def optimize(
        self,
        optimization_type: OptimizationType,
        dataset: DatasetEntity,
        output_model: ModelEntity,
        optimization_parameters: Optional[OptimizationParameters],
    ):
        """Optimize the model.

        Args:
            optimization_type (OptimizationType): Type of optimization [POT or NNCF]
            dataset (DatasetEntity): Input Dataset.
            output_model (ModelEntity): Output model.
            optimization_parameters (Optional[OptimizationParameters]): Optimization parameters.

        Raises:
            ValueError: When the optimization type is not POT, which is the only support type at the moment.
        """
        if optimization_type is not OptimizationType.POT:
            raise ValueError(
                "POT is the only supported optimization type for OpenVINO models"
            )

        data_loader = OTEOpenVINOAnomalyDataloader(config=self.config,
                                                   dataset=dataset,
                                                   inferencer=self.inferencer)

        with tempfile.TemporaryDirectory() as tempdir:
            xml_path = os.path.join(tempdir, "model.xml")
            bin_path = os.path.join(tempdir, "model.bin")

            self.__save_weights(
                xml_path, self.task_environment.model.get_data("openvino.xml"))
            self.__save_weights(
                bin_path, self.task_environment.model.get_data("openvino.bin"))

            model_config = {
                "model_name": "openvino_model",
                "model": xml_path,
                "weights": bin_path,
            }
            model = load_model(model_config)

            if get_nodes_by_type(model, ["FakeQuantize"]):
                logger.warning("Model is already optimized by POT")
                return

        engine = IEEngine(config=ADDict({"device": "CPU"}),
                          data_loader=data_loader,
                          metric=None)
        pipeline = create_pipeline(
            algo_config=self._get_optimization_algorithms_configs(),
            engine=engine)
        compressed_model = pipeline.run(model)
        compress_model_weights(compressed_model)

        with tempfile.TemporaryDirectory() as tempdir:
            save_model(compressed_model, tempdir, model_name="model")
            self.__load_weights(path=os.path.join(tempdir, "model.xml"),
                                output_model=output_model,
                                key="openvino.xml")
            self.__load_weights(path=os.path.join(tempdir, "model.bin"),
                                output_model=output_model,
                                key="openvino.bin")

        output_model.set_data(
            "label_schema.json",
            label_schema_to_bytes(self.task_environment.label_schema))
        output_model.set_data(
            "threshold", self.task_environment.model.get_data("threshold"))
        output_model.model_status = ModelStatus.SUCCESS
        output_model.model_format = ModelFormat.OPENVINO
        output_model.optimization_type = ModelOptimizationType.POT
        output_model.optimization_methods = [OptimizationMethod.QUANTIZATION]
        output_model.precision = [ModelPrecision.INT8]

        self.task_environment.model = output_model
        self.inferencer = self.load_inferencer()
Beispiel #5
0
        self.generate_experimental_IR_V10 = True
        self.blobs_as_inputs = True
        self.generate_deprecated_IR_V7 = False


model = load_model(model_config)

data_loader = MyDataLoader(dataset_config)
metric = MyMetric()

engine = IEEngine(engine_config, data_loader, metric)

if args.accuracy_aware_quantization:
    # https://docs.openvinotoolkit.org/latest/_compression_algorithms_quantization_accuracy_aware_README.html
    print(bcolors.BOLD + "Accuracy-aware quantization method" + bcolors.ENDC)
    pipeline = create_pipeline(accuracy_aware_quantization_algorithm, engine)
else:
    print(bcolors.BOLD + "Default quantization method" + bcolors.ENDC)
    pipeline = create_pipeline(default_quantization_algorithm, engine)

metric_results_FP32 = pipeline.evaluate(model)

compressed_model = pipeline.run(model)
save_model(compressed_model, args.int8_directory)

metric_results_INT8 = pipeline.evaluate(compressed_model)

print(bcolors.BOLD + "\nFINAL RESULTS" + bcolors.ENDC)

# print metric value
if metric_results_FP32: