コード例 #1
0
    def generate_quantized_model(self,
                                 model_path,
                                 data_path,
                                 algo="KL",
                                 round_type="round",
                                 quantizable_op_type=["conv2d"],
                                 is_full_quantize=False,
                                 is_use_cache_file=False,
                                 is_optimize_model=False,
                                 batch_size=10,
                                 batch_nums=10,
                                 onnx_format=False):

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.global_scope()
        batch_generator = self.get_batch_reader(data_path, place)

        ptq = PostTrainingQuantization(executor=exe,
                                       model_dir=model_path,
                                       batch_generator=batch_generator,
                                       batch_nums=batch_nums,
                                       algo=algo,
                                       quantizable_op_type=quantizable_op_type,
                                       round_type=round_type,
                                       is_full_quantize=is_full_quantize,
                                       optimize_model=is_optimize_model,
                                       onnx_format=onnx_format,
                                       is_use_cache_file=is_use_cache_file)
        ptq.quantize()
        ptq.save_quantized_model(self.int8_model_path)
コード例 #2
0
    def generate_quantized_model(self,
                                 model_path,
                                 algo="KL",
                                 quantizable_op_type=["conv2d"],
                                 is_full_quantize=False,
                                 is_use_cache_file=False,
                                 is_optimize_model=False,
                                 batch_size=10,
                                 batch_nums=10):

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.global_scope()
        val_reader = paddle.dataset.mnist.train()

        ptq = PostTrainingQuantization(executor=exe,
                                       model_dir=model_path,
                                       sample_generator=val_reader,
                                       batch_size=batch_size,
                                       batch_nums=batch_nums,
                                       algo=algo,
                                       quantizable_op_type=quantizable_op_type,
                                       is_full_quantize=is_full_quantize,
                                       optimize_model=is_optimize_model,
                                       is_use_cache_file=is_use_cache_file)
        ptq.quantize()
        ptq.save_quantized_model(self.int8_model_path)
コード例 #3
0
    def generate_quantized_model(self,
                                 model_path,
                                 algo="KL",
                                 is_full_quantize=False):
        try:
            os.system("mkdir " + self.int8_model)
        except Exception as e:
            print("Failed to create {} due to {}".format(self.int8_model,
                                                         str(e)))
            sys.exit(-1)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.global_scope()
        val_reader = val()
        quantizable_op_type = [
            "conv2d", "depthwise_conv2d", "mul", "pool2d", "elementwise_add"
        ]

        ptq = PostTrainingQuantization(
            executor=exe,
            sample_generator=val_reader,
            model_dir=model_path,
            algo=algo,
            quantizable_op_type=quantizable_op_type,
            is_full_quantize=is_full_quantize)
        ptq.quantize()
        ptq.save_quantized_model(self.int8_model)
コード例 #4
0
    def generate_quantized_model(self,
                                 model_path,
                                 quantizable_op_type,
                                 algo="KL",
                                 is_full_quantize=False,
                                 is_use_cache_file=False,
                                 is_optimize_model=False):
        try:
            os.system("mkdir " + self.int8_model)
        except Exception as e:
            print("Failed to create {} due to {}".format(
                self.int8_model, str(e)))
            sys.exit(-1)

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.global_scope()
        val_reader = val()

        ptq = PostTrainingQuantization(executor=exe,
                                       sample_generator=val_reader,
                                       model_dir=model_path,
                                       algo=algo,
                                       quantizable_op_type=quantizable_op_type,
                                       is_full_quantize=is_full_quantize,
                                       optimize_model=is_optimize_model,
                                       is_use_cache_file=is_use_cache_file)
        ptq.quantize()
        ptq.save_quantized_model(self.int8_model)
コード例 #5
0
    def generate_quantized_model(self,
                                 model_path,
                                 algo="KL",
                                 quantizable_op_type=["conv2d"],
                                 is_full_quantize=False,
                                 is_use_cache_file=False,
                                 is_optimize_model=False,
                                 batch_size=10,
                                 batch_nums=10,
                                 is_data_loader=False):

        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.global_scope()
        val_reader = paddle.dataset.mnist.train()

        def val_data_generator():
            batches = []
            for data in val_reader():
                batches.append(data[0].reshape(1, 28, 28))
                if len(batches) == batch_size:
                    batches = np.asarray(batches)
                    yield {"x": batches}
                    batches = []

        ptq = PostTrainingQuantization(
            executor=exe,
            model_dir=model_path,
            model_filename='model.pdmodel',
            params_filename='model.pdiparams',
            sample_generator=val_reader if not is_data_loader else None,
            data_loader=val_data_generator if is_data_loader else None,
            batch_size=batch_size,
            batch_nums=batch_nums,
            algo=algo,
            quantizable_op_type=quantizable_op_type,
            is_full_quantize=is_full_quantize,
            optimize_model=is_optimize_model,
            is_use_cache_file=is_use_cache_file)
        ptq.quantize()
        ptq.save_quantized_model(
            self.int8_model_path,
            model_filename='model.pdmodel',
            params_filename='model.pdiparams')
コード例 #6
0
    def _post_training_quantization(algo, batch_size):
        def _batch_generator_func():
            batch_data = [[], []]
            for data in eval_dataset:
                batch_data[0].append(data['input_ids'])
                batch_data[1].append(data['token_type_ids'])
                if len(batch_data[0]) == batch_size:
                    input_ids = Pad(axis=0, pad_val=0)(batch_data[0])
                    segment_ids = Pad(axis=0, pad_val=0)(batch_data[1])
                    yield [input_ids, segment_ids]
                    batch_data = [[], []]

        post_training_quantization = PostTrainingQuantization(
            executor=exe,
            batch_generator=_batch_generator_func,
            model_dir=input_dir,
            model_filename=quantization_config.input_filename_prefix +
            ".pdmodel",
            params_filename=quantization_config.input_filename_prefix +
            ".pdiparams",
            batch_size=batch_size,
            batch_nums=1,
            scope=None,
            algo=algo,
            hist_percent=0.9999,
            bias_correction=False,
            quantizable_op_type=['matmul', 'matmul_v2'],
            is_full_quantize=False,
            weight_bits=8,
            activation_bits=8,
            activation_quantize_type='range_abs_max',
            weight_quantize_type='channel_wise_abs_max',
            onnx_format=True,
            optimize_model=False)
        post_training_quantization.quantize()
        post_training_quantization.save_quantized_model(
            save_model_path=os.path.join(output_dir, algo + str(batch_size)),
            model_filename=quantization_config.output_filename_prefix +
            ".pdmodel",
            params_filename=quantization_config.output_filename_prefix +
            ".pdiparams")
コード例 #7
0
ファイル: quanter.py プロジェクト: will-jl944/PaddleSlim
def quant_post_static(
        executor,
        model_dir,
        quantize_model_path,
        batch_generator=None,
        sample_generator=None,
        model_filename=None,
        params_filename=None,
        save_model_filename='__model__',
        save_params_filename='__params__',
        batch_size=16,
        batch_nums=None,
        scope=None,
        algo='KL',
        quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
        is_full_quantize=False,
        weight_bits=8,
        activation_bits=8,
        activation_quantize_type='range_abs_max',
        weight_quantize_type='channel_wise_abs_max',
        optimize_model=False,
        is_use_cache_file=False,
        cache_dir="./temp_post_training"):
    """
    The function utilizes static post training quantization method to
    quantize the fp32 model. It uses calibrate data to calculate the
    scale factor of quantized variables, and inserts fake quantization
    and dequantization operators to obtain the quantized model.

    Args:
        executor(paddle.static.Executor): The executor to load, run and save the 
            quantized model.
        model_dir(str): The path of fp32 model that will be quantized, and 
            the model and params that saved by ``paddle.static.io.save_inference_model`` 
            are under the path.
        quantize_model_path(str): The path to save quantized model using api
            ``paddle.static.io.save_inference_model``.
        batch_generator(Python Generator): The batch generator provides 
                calibrate data for DataLoader, and it returns a batch every
                time. For sample_generator and batch_generator, only one
                can be set. Beisdes, batch_generator supports lod tensor.
        sample_generator(Python Generator): The sample generator provides 
            calibrate data for DataLoader, and it only returns a sample every time.
        model_filename(str, optional): The name of model file. If parameters 
            are saved in separate files, set it as 'None'. Default: 'None'.
        params_filename(str, optional): The name of params file.
                When all parameters are saved in a single file, set it 
                as filename. If parameters are saved in separate files, 
                set it as 'None'. Default : 'None'.
        save_model_filename(str): The name of model file to save the quantized inference program.  Default: '__model__'.
        save_params_filename(str): The name of file to save all related parameters. 
                If it is set None, parameters will be saved in separate files. Default: '__params__'.
        batch_size(int, optional): The batch size of DataLoader, default is 16.
        batch_nums(int, optional): If batch_nums is not None, the number of calibrate 
                        data is 'batch_size*batch_nums'. If batch_nums is None, use all data
                        generated by sample_generator  as calibrate data.
        scope(paddle.static.Scope, optional): The scope to run program, use it to load 
                        and save variables. If scope is None, will use paddle.static.global_scope().
        algo(str, optional): If algo=KL, use KL-divergenc method to 
                        get the more precise scale factor. If algo='direct', use 
                        abs_max method to get the scale factor. Default: 'KL'.
        quantizable_op_type(list[str], optional): The list of op types
                        that will be quantized. Default: ["conv2d", "depthwise_conv2d", 
                        "mul"].
        weight_bits(int, optional): quantization bit number for weights.
        activation_bits(int): quantization bit number for activation.
	activation_quantize_type(str): quantization type for activation,
                now support 'range_abs_max', 'moving_average_abs_max' and 'abs_max'.
                This parameter only specifies the fake ops in quantized model.
                If it is 'range_abs_max' or 'moving_average_abs_max', we save the scale
                obtained by post training quantization in fake ops. If it
                is 'abs_max', the scale will not be saved in fake ops.
        weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. Compared to 'abs_max',
                the model accuracy is usually higher when using 'channel_wise_abs_max'.
        is_full_quantize(bool): if True, apply quantization to all supported quantizable op type.
                        If False, only apply quantization to the input quantizable_op_type. Default is False.
        optimize_model(bool, optional): If set optimize_model as True, it applies some 
                passes to optimize the model before quantization. So far, the place of
                executor must be cpu it supports fusing batch_norm into convs.
        is_use_cache_file(bool): This param is deprecated.
        cache_dir(str): This param is deprecated.
    
    Returns:
        None
    """
    post_training_quantization = PostTrainingQuantization(
        executor=executor,
        sample_generator=sample_generator,
        batch_generator=batch_generator,
        model_dir=model_dir,
        model_filename=model_filename,
        params_filename=params_filename,
        batch_size=batch_size,
        batch_nums=batch_nums,
        scope=scope,
        algo=algo,
        quantizable_op_type=quantizable_op_type,
        is_full_quantize=is_full_quantize,
        weight_bits=weight_bits,
        activation_bits=activation_bits,
        activation_quantize_type=activation_quantize_type,
        weight_quantize_type=weight_quantize_type,
        optimize_model=optimize_model)
    post_training_quantization.quantize()
    post_training_quantization.save_quantized_model(
        quantize_model_path,
        model_filename=save_model_filename,
        params_filename=save_params_filename)
コード例 #8
0
)
parser.add_argument("--is_full_quantize", type=str, default="False", help="")
parser.add_argument("--batch_size", type=int, default=10, help="")
parser.add_argument("--batch_nums", type=int, default=10, help="")
parser.add_argument("--use_gpu", type=str, default="False", help="")
args = parser.parse_args()

print("-------------------args----------------------")
for arg, value in sorted(six.iteritems(vars(args))):
    print("%s: %s" % (arg, value))
print("---------------------------------------------")

place = fluid.CUDAPlace(0) if args.use_gpu == "True" else fluid.CPUPlace()
exe = fluid.Executor(place)
sample_generator = reader.val(data_dir=args.data_path)

ptq = PostTrainingQuantization(
    executor=exe,
    sample_generator=sample_generator,
    model_dir=args.model_dir,
    model_filename=args.model_filename,
    params_filename=args.params_filename,
    batch_size=args.batch_size,
    batch_nums=args.batch_nums,
    algo=args.algo,
    is_full_quantize=args.is_full_quantize == "True")
quantized_program = ptq.quantize()
ptq.save_quantized_model(args.save_model_path)

print("post training quantization finish.\n")
コード例 #9
0
ファイル: quanter.py プロジェクト: vincentXiyu/PaddleSlim
def quant_post(executor,
               model_dir,
               quantize_model_path,
               sample_generator,
               model_filename=None,
               params_filename=None,
               batch_size=16,
               batch_nums=None,
               scope=None,
               algo='KL',
               quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
               is_full_quantize=False,
               is_use_cache_file=False,
               cache_dir="./temp_post_training"):
    """
    The function utilizes post training quantization method to quantize the 
    fp32 model. It uses calibrate data to calculate the scale factor of 
    quantized variables, and inserts fake quantization and dequantization 
    operators to obtain the quantized model.

    Args:
        executor(fluid.Executor): The executor to load, run and save the 
            quantized model.
        model_dir(str): The path of fp32 model that will be quantized, and 
            the model and params that saved by ``fluid.io.save_inference_model`` 
            are under the path.
        quantize_model_path(str): The path to save quantized model using api
            ``fluid.io.save_inference_model``.
        sample_generator(Python Generator): The sample generator provides 
            calibrate data for DataLoader, and it only returns a sample every time.
        model_filename(str, optional): The name of model file. If parameters 
            are saved in separate files, set it as 'None'. Default: 'None'.
        params_filename(str, optional): The name of params file.
                When all parameters are saved in a single file, set it 
                as filename. If parameters are saved in separate files, 
                set it as 'None'. Default : 'None'.
        batch_size(int, optional): The batch size of DataLoader, default is 16.
        batch_nums(int, optional): If batch_nums is not None, the number of calibrate 
                        data is 'batch_size*batch_nums'. If batch_nums is None, use all data
                        generated by sample_generator  as calibrate data.
        scope(fluid.Scope, optional): The scope to run program, use it to load 
                        and save variables. If scope is None, will use fluid.global_scope().
        algo(str, optional): If algo=KL, use KL-divergenc method to 
                        get the more precise scale factor. If algo='direct', use 
                        abs_max method to get the scale factor. Default: 'KL'.
        quantizable_op_type(list[str], optional): The list of op types
                        that will be quantized. Default: ["conv2d", "depthwise_conv2d", 
                        "mul"].
        is_full_quantize(bool): if True, apply quantization to all supported quantizable op type.
                        If False, only apply quantization to the input quantizable_op_type. Default is False.
        is_use_cache_file(bool): If False, all temp data will be saved in memory. If True,
                                all temp data will be saved to disk. Defalut: False.
        cache_dir(str): When 'is_use_cache_file' is True, temp data will be save in 'cache_dir'. Default is './temp_post_training'.
    
    Returns:
        None
    """
    post_training_quantization = PostTrainingQuantization(
        executor=executor,
        sample_generator=sample_generator,
        model_dir=model_dir,
        model_filename=model_filename,
        params_filename=params_filename,
        batch_size=batch_size,
        batch_nums=batch_nums,
        scope=scope,
        algo=algo,
        quantizable_op_type=quantizable_op_type,
        is_full_quantize=is_full_quantize,
        is_use_cache_file=is_use_cache_file,
        cache_dir=cache_dir)
    post_training_quantization.quantize()
    post_training_quantization.save_quantized_model(quantize_model_path)
コード例 #10
0
ファイル: quanter.py プロジェクト: RachelXu7/PaddleSlim
def quant_post_static(
        executor,
        model_dir,
        quantize_model_path,
        batch_generator=None,
        sample_generator=None,
        data_loader=None,
        model_filename=None,
        params_filename=None,
        save_model_filename='__model__',
        save_params_filename='__params__',
        batch_size=1,
        batch_nums=None,
        scope=None,
        algo='hist',
        round_type='round',
        hist_percent=0.9999,
        bias_correction=False,
        quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
        is_full_quantize=False,
        weight_bits=8,
        activation_bits=8,
        activation_quantize_type='range_abs_max',
        weight_quantize_type='channel_wise_abs_max',
        optimize_model=False,
        onnx_format=False,
        is_use_cache_file=False,
        cache_dir="./temp_post_training"):
    """
    The function utilizes static post training quantization method to
    quantize the fp32 model. It uses calibrate data to calculate the
    scale factor of quantized variables, and inserts fake quantization
    and dequantization operators to obtain the quantized model.

    Args:
        executor(paddle.static.Executor): The executor to load, run and save the 
            quantized model.
        model_dir(str): The path of fp32 model that will be quantized, and 
            the model and params that saved by ``paddle.static.io.save_inference_model`` 
            are under the path.
        quantize_model_path(str): The path to save quantized model using api
            ``paddle.static.io.save_inference_model``.
        batch_generator(Python Generator): The batch generator provides 
                calibrate data for DataLoader, and it returns a batch every
                time. For sample_generator and batch_generator, only one
                can be set. Beisdes, batch_generator supports lod tensor.
        sample_generator(Python Generator): The sample generator provides 
            calibrate data for DataLoader, and it only returns a sample every time.
        data_loader(Python Generator, Paddle.io.DataLoader, optional): The
            Generator or Dataloader provides calibrate data, and it could
            return a batch every time.
        model_filename(str, optional): The name of model file. If parameters 
            are saved in separate files, set it as 'None'. Default: 'None'.
        params_filename(str, optional): The name of params file.
                When all parameters are saved in a single file, set it 
                as filename. If parameters are saved in separate files, 
                set it as 'None'. Default : 'None'.
        save_model_filename(str): The name of model file to save the quantized inference program.  Default: '__model__'.
        save_params_filename(str): The name of file to save all related parameters. 
                If it is set None, parameters will be saved in separate files. Default: '__params__'.
        batch_size(int, optional): The batch size of DataLoader, default is 1.
        batch_nums(int, optional): If batch_nums is not None, the number of calibrate 
                        data is 'batch_size*batch_nums'. If batch_nums is None, use all data
                        generated by sample_generator  as calibrate data.
        scope(paddle.static.Scope, optional): The scope to run program, use it to load 
                        and save variables. If scope is None, will use paddle.static.global_scope().
        algo(str, optional): If algo='KL', use KL-divergenc method to 
                        get the scale factor. If algo='hist', use the hist_percent of histogram 
                        to get the scale factor. If algo='mse', search for the best scale factor which
                        makes the mse loss minimal. Use one batch of data for mse is enough. If 
                        algo='avg', use the average of abs_max values  to get the scale factor. If 
                        algo='abs_max', use abs_max method to get the scale factor. Default: 'hist'.
        round_type(str, optional): The method of converting the quantized weights value
                        from float to int. Currently supports ['round', 'adaround'] methods.
                        Default is `round`, which is rounding nearest to the nearest whole number.
        hist_percent(float, optional): The percentile of histogram for algo hist.Default:0.9999.
        bias_correction(bool, optional): Bias correction method of https://arxiv.org/abs/1810.05723.
                        Default: False.
        quantizable_op_type(list[str], optional): The list of op types
                        that will be quantized. Default: ["conv2d", "depthwise_conv2d", 
                        "mul"].
        weight_bits(int, optional): quantization bit number for weights.
        activation_bits(int): quantization bit number for activation.
	activation_quantize_type(str): quantization type for activation,
                now support 'range_abs_max', 'moving_average_abs_max' and 'abs_max'.
                This parameter only specifies the fake ops in quantized model.
                If it is 'range_abs_max' or 'moving_average_abs_max', we save the scale
                obtained by post training quantization in fake ops. If it
                is 'abs_max', the scale will not be saved in fake ops.
        weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. Compared to 'abs_max',
                the model accuracy is usually higher when using 'channel_wise_abs_max'.
        is_full_quantize(bool): if True, apply quantization to all supported quantizable op type.
                        If False, only apply quantization to the input quantizable_op_type. Default is False.
        optimize_model(bool, optional): If set optimize_model as True, it applies some 
                passes to optimize the model before quantization. So far, the place of
                executor must be cpu it supports fusing batch_norm into convs.
        is_use_cache_file(bool): This param is deprecated.
        cache_dir(str): This param is deprecated.
    
    Returns:
        None
    """
    post_training_quantization = PostTrainingQuantization(
        executor=executor,
        sample_generator=sample_generator,
        batch_generator=batch_generator,
        data_loader=data_loader,
        model_dir=model_dir,
        model_filename=model_filename,
        params_filename=params_filename,
        batch_size=batch_size,
        batch_nums=batch_nums,
        scope=scope,
        algo=algo,
        round_type=round_type,
        hist_percent=hist_percent,
        bias_correction=bias_correction,
        quantizable_op_type=quantizable_op_type,
        is_full_quantize=is_full_quantize,
        weight_bits=weight_bits,
        activation_bits=activation_bits,
        activation_quantize_type=activation_quantize_type,
        weight_quantize_type=weight_quantize_type,
        onnx_format=onnx_format,
        optimize_model=optimize_model)
    post_training_quantization.quantize()
    post_training_quantization.save_quantized_model(
        quantize_model_path,
        model_filename=save_model_filename,
        params_filename=save_params_filename)