Python WeightQuantization 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: paddle.fluid.contrib.slim.quantization

클래스/타입: WeightQuantization

hotexamples.com에서의 예제들: 4

Python WeightQuantization - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 paddle.fluid.contrib.slim.quantization.WeightQuantization에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

WeightQuantization(4)

quantize_weight_to_int(3)

convert_weight_to_fp16(1)

예제 #1

파일 보기

    def quantize_to_int(self, model_name, model_data_url, model_data_md5,
                        weight_bits, quantizable_op_type, weight_quantize_type,
                        generate_test_model, threshold_rate):

        model_dir = self.download_model(model_name, model_data_url,
                                        model_data_md5)
        load_model_dir = os.path.join(model_dir, model_name)

        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        save_model_dir = os.path.join(
            os.getcwd(),
            model_name + "_wq_" + str(weight_bits) + "_" + timestamp)

        weight_quant = WeightQuantization(model_dir=load_model_dir)
        weight_quant.quantize_weight_to_int(
            save_model_dir=save_model_dir,
            weight_bits=weight_bits,
            quantizable_op_type=quantizable_op_type,
            weight_quantize_type=weight_quantize_type,
            generate_test_model=generate_test_model,
            threshold_rate=threshold_rate)
        print("finish weight quantization for " + model_name + "\n")

        try:
            os.system("rm -rf {}".format(save_model_dir))
        except Exception as e:
            print("Failed to delete {} due to {}".format(
                save_model_dir, str(e)))

예제 #2

파일 보기

def quant_post_only_weight(model_dir,
                           save_model_dir,
                           model_filename=None,
                           params_filename=None,
                           save_model_filename=None,
                           save_params_filename=None,
                           quantizable_op_type=["conv2d", "mul"],
                           weight_bits=8,
                           generate_test_model=False):
    '''
    In order to reduce the size of model, this api quantizes the weight
    of some ops from float32 to int8/16. In the inference stage, the 
    quantized weight will be dequantized to float32 again.
        
    Args:
        model_dir(str): The path of the fp32 model that will be quantized,
                    and the model and params files are under the path.
        save_model_dir(str): The path to save the quantized model.
        model_filename(str, optional): The name of file used to load the inference
                    program. If it is None, the default filename '__model__' will be used. Default is 'None'.
        params_filename(str, optional): The name of file used to load all parameters. When all parameters were saved 
                in a single binary file, set it as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
        save_model_dir(str): The path used to save the quantized model.
        save_model_filename(str, optional): The name of file to 
                save the inference program. If it is None, the default 
                filename '__model__' will be used. Default is 'None'.
        save_params_filename(str, optional): The name of file to 
                save all parameters. If it is None, parameters were 
                saved in separate files. If it is not None, all 
                parameters were saved in a single binary file.
        quantizable_op_type(list[str], optional): The list of ops 
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"]. 
                Default is ["conv2d", "depthwise_conv2d", "mul"].
        weight_bits(int, optional): The bits for the quantized weight, 
                and it should be 8 or 16. Default is 8.
        generate_test_model(bool, optional): If set generate_test_model 
                as True, it saves a fake quantized model, in which the weights 
                are quantized and dequantized. We can use PaddlePaddle to load 
                the fake quantized model and test the accuracy on GPU or CPU.
    '''

    weight_quant = WeightQuantization(model_dir=model_dir,
                                      model_filename=model_filename,
                                      params_filename=params_filename)
    weight_quant.quantize_weight_to_int(
        save_model_dir=save_model_dir,
        save_model_filename=save_model_filename,
        save_params_filename=save_params_filename,
        quantizable_op_type=quantizable_op_type,
        weight_bits=weight_bits,
        generate_test_model=generate_test_model)

예제 #3

파일 보기

    def convert_to_fp16(self, model_name, model_data_url, model_data_md5,
                        model_filename, params_filename):
        model_dir = self.download_model(model_name, model_data_url,
                                        model_data_md5)
        load_model_dir = os.path.join(model_dir, model_name)

        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        save_model_dir = os.path.join(os.getcwd(),
                                      model_name + "_wq_fp16_" + timestamp)

        weight_quant = WeightQuantization(load_model_dir, model_filename,
                                          params_filename)

        weight_quant.convert_weight_to_fp16(save_model_dir)

        print("finish converting the data type of weights to fp16 for " +
              model_name)
        print("fp16 model saved in " + save_model_dir + "\n")

        input_data = np.ones([1, 3, 224, 224], dtype=np.float32)
        res_fp32 = self.run_models(load_model_dir, model_filename,
                                   params_filename, input_data, False)
        res_fp16 = self.run_models(save_model_dir, model_filename,
                                   params_filename, input_data, True)

        self.assertTrue(
            np.allclose(res_fp32,
                        res_fp16,
                        rtol=1e-5,
                        atol=1e-08,
                        equal_nan=True),
            msg='Failed to test the accuracy of the fp32 and fp16 model.')

        try:
            os.system("rm -rf {}".format(save_model_dir))
        except Exception as e:
            print("Failed to delete {} due to {}".format(
                save_model_dir, str(e)))

예제 #4

파일 보기

파일: quanter.py 프로젝트: will-jl944/PaddleSlim

def quant_post_dynamic(model_dir,
                       save_model_dir,
                       model_filename=None,
                       params_filename=None,
                       save_model_filename=None,
                       save_params_filename=None,
                       quantizable_op_type=["conv2d", "mul"],
                       weight_bits=8,
                       generate_test_model=False):
    '''
    The function utilizes static post training quantization method to
    quantize the fp32 model. In details, it quantizes the weight of some
    ops from float32 to int8/16. For the quantized model, there are two
    kinds of calculation method in the reference stage. Firstly, the
    quantized weight will be dequantized to float32, and then apply the
    float32 calculation. Secondly, collect the quantized scales of the
    inputs, and then apply the int8 calculation.
        
    Args:
        model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
        save_model_dir(str): The path to save the quantized model.
        model_filename(str, optional): The name of file used to load the
                inference program. If it is None, the default filename
                '__model__' will be used. Default is 'None'.
        params_filename(str, optional): The name of file used to load all
                parameters. When all parameters were saved in a single
                binary file, set it as the real filename. If parameters
                were saved in separate files, set it as 'None'. Default is
                'None'.
        save_model_dir(str): The path used to save the quantized model.
        save_model_filename(str, optional): The name of file to 
                save the inference program. If it is None, the default 
                filename '__model__' will be used. Default is 'None'.
        save_params_filename(str, optional): The name of file to 
                save all parameters. If it is None, parameters were 
                saved in separate files. If it is not None, all 
                parameters were saved in a single binary file.
        quantizable_op_type(list[str], optional): The list of ops 
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"]. 
                Default is ["conv2d", "depthwise_conv2d", "mul"].
        weight_bits(int, optional): The bits for the quantized weight, 
                and it should be 8 or 16. Default is 8.
        generate_test_model(bool, optional): If set generate_test_model 
                as True, it saves a fake quantized model, in which the weights 
                are quantized and dequantized. We can use PaddlePaddle to load 
                the fake quantized model and test the accuracy on GPU or CPU.
    '''

    weight_quant = WeightQuantization(model_dir=model_dir,
                                      model_filename=model_filename,
                                      params_filename=params_filename)

    weight_quant.quantize_weight_to_int(
        save_model_dir=save_model_dir,
        save_model_filename=save_model_filename,
        save_params_filename=save_params_filename,
        quantizable_op_type=quantizable_op_type,
        weight_bits=weight_bits,
        generate_test_model=generate_test_model)