예제 #1
0
    def quantize_to_int(self, model_name, model_data_url, model_data_md5,
                        weight_bits, quantizable_op_type, weight_quantize_type,
                        generate_test_model, threshold_rate):

        model_dir = self.download_model(model_name, model_data_url,
                                        model_data_md5)
        load_model_dir = os.path.join(model_dir, model_name)

        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        save_model_dir = os.path.join(
            os.getcwd(),
            model_name + "_wq_" + str(weight_bits) + "_" + timestamp)

        weight_quant = WeightQuantization(model_dir=load_model_dir)
        weight_quant.quantize_weight_to_int(
            save_model_dir=save_model_dir,
            weight_bits=weight_bits,
            quantizable_op_type=quantizable_op_type,
            weight_quantize_type=weight_quantize_type,
            generate_test_model=generate_test_model,
            threshold_rate=threshold_rate)
        print("finish weight quantization for " + model_name + "\n")

        try:
            os.system("rm -rf {}".format(save_model_dir))
        except Exception as e:
            print("Failed to delete {} due to {}".format(
                save_model_dir, str(e)))
예제 #2
0
def quant_post_only_weight(model_dir,
                           save_model_dir,
                           model_filename=None,
                           params_filename=None,
                           save_model_filename=None,
                           save_params_filename=None,
                           quantizable_op_type=["conv2d", "mul"],
                           weight_bits=8,
                           generate_test_model=False):
    '''
    In order to reduce the size of model, this api quantizes the weight
    of some ops from float32 to int8/16. In the inference stage, the 
    quantized weight will be dequantized to float32 again.
        
    Args:
        model_dir(str): The path of the fp32 model that will be quantized,
                    and the model and params files are under the path.
        save_model_dir(str): The path to save the quantized model.
        model_filename(str, optional): The name of file used to load the inference
                    program. If it is None, the default filename '__model__' will be used. Default is 'None'.
        params_filename(str, optional): The name of file used to load all parameters. When all parameters were saved 
                in a single binary file, set it as the real filename. If parameters were saved in separate files,
                set it as 'None'. Default is 'None'.
        save_model_dir(str): The path used to save the quantized model.
        save_model_filename(str, optional): The name of file to 
                save the inference program. If it is None, the default 
                filename '__model__' will be used. Default is 'None'.
        save_params_filename(str, optional): The name of file to 
                save all parameters. If it is None, parameters were 
                saved in separate files. If it is not None, all 
                parameters were saved in a single binary file.
        quantizable_op_type(list[str], optional): The list of ops 
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"]. 
                Default is ["conv2d", "depthwise_conv2d", "mul"].
        weight_bits(int, optional): The bits for the quantized weight, 
                and it should be 8 or 16. Default is 8.
        generate_test_model(bool, optional): If set generate_test_model 
                as True, it saves a fake quantized model, in which the weights 
                are quantized and dequantized. We can use PaddlePaddle to load 
                the fake quantized model and test the accuracy on GPU or CPU.
    '''

    weight_quant = WeightQuantization(model_dir=model_dir,
                                      model_filename=model_filename,
                                      params_filename=params_filename)
    weight_quant.quantize_weight_to_int(
        save_model_dir=save_model_dir,
        save_model_filename=save_model_filename,
        save_params_filename=save_params_filename,
        quantizable_op_type=quantizable_op_type,
        weight_bits=weight_bits,
        generate_test_model=generate_test_model)
예제 #3
0
    def convert_to_fp16(self, model_name, model_data_url, model_data_md5,
                        model_filename, params_filename):
        model_dir = self.download_model(model_name, model_data_url,
                                        model_data_md5)
        load_model_dir = os.path.join(model_dir, model_name)

        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        save_model_dir = os.path.join(os.getcwd(),
                                      model_name + "_wq_fp16_" + timestamp)

        weight_quant = WeightQuantization(load_model_dir, model_filename,
                                          params_filename)

        weight_quant.convert_weight_to_fp16(save_model_dir)

        print("finish converting the data type of weights to fp16 for " +
              model_name)
        print("fp16 model saved in " + save_model_dir + "\n")

        input_data = np.ones([1, 3, 224, 224], dtype=np.float32)
        res_fp32 = self.run_models(load_model_dir, model_filename,
                                   params_filename, input_data, False)
        res_fp16 = self.run_models(save_model_dir, model_filename,
                                   params_filename, input_data, True)

        self.assertTrue(
            np.allclose(res_fp32,
                        res_fp16,
                        rtol=1e-5,
                        atol=1e-08,
                        equal_nan=True),
            msg='Failed to test the accuracy of the fp32 and fp16 model.')

        try:
            os.system("rm -rf {}".format(save_model_dir))
        except Exception as e:
            print("Failed to delete {} due to {}".format(
                save_model_dir, str(e)))
예제 #4
0
def quant_post_dynamic(model_dir,
                       save_model_dir,
                       model_filename=None,
                       params_filename=None,
                       save_model_filename=None,
                       save_params_filename=None,
                       quantizable_op_type=["conv2d", "mul"],
                       weight_bits=8,
                       generate_test_model=False):
    '''
    The function utilizes static post training quantization method to
    quantize the fp32 model. In details, it quantizes the weight of some
    ops from float32 to int8/16. For the quantized model, there are two
    kinds of calculation method in the reference stage. Firstly, the
    quantized weight will be dequantized to float32, and then apply the
    float32 calculation. Secondly, collect the quantized scales of the
    inputs, and then apply the int8 calculation.
        
    Args:
        model_dir(str): The path of the fp32 model that will be quantized,
                and the model and params files are under the path.
        save_model_dir(str): The path to save the quantized model.
        model_filename(str, optional): The name of file used to load the
                inference program. If it is None, the default filename
                '__model__' will be used. Default is 'None'.
        params_filename(str, optional): The name of file used to load all
                parameters. When all parameters were saved in a single
                binary file, set it as the real filename. If parameters
                were saved in separate files, set it as 'None'. Default is
                'None'.
        save_model_dir(str): The path used to save the quantized model.
        save_model_filename(str, optional): The name of file to 
                save the inference program. If it is None, the default 
                filename '__model__' will be used. Default is 'None'.
        save_params_filename(str, optional): The name of file to 
                save all parameters. If it is None, parameters were 
                saved in separate files. If it is not None, all 
                parameters were saved in a single binary file.
        quantizable_op_type(list[str], optional): The list of ops 
                that will be quantized, and the quantized ops should be
                contained in ["conv2d", "depthwise_conv2d", "mul"]. 
                Default is ["conv2d", "depthwise_conv2d", "mul"].
        weight_bits(int, optional): The bits for the quantized weight, 
                and it should be 8 or 16. Default is 8.
        generate_test_model(bool, optional): If set generate_test_model 
                as True, it saves a fake quantized model, in which the weights 
                are quantized and dequantized. We can use PaddlePaddle to load 
                the fake quantized model and test the accuracy on GPU or CPU.
    '''

    weight_quant = WeightQuantization(model_dir=model_dir,
                                      model_filename=model_filename,
                                      params_filename=params_filename)

    weight_quant.quantize_weight_to_int(
        save_model_dir=save_model_dir,
        save_model_filename=save_model_filename,
        save_params_filename=save_params_filename,
        quantizable_op_type=quantizable_op_type,
        weight_bits=weight_bits,
        generate_test_model=generate_test_model)