def __init__(self, cfg, name='ACTION'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.topk = cfg[name]['topk']
        self.frame_offset = cfg[name]['nms_offset']
        self.nms_thread = cfg[name]['nms_thread']
        self.cls_thread = cfg[name]['classify_score_thread']
        self.iou_thread = cfg[name]['iou_score_thread']

        self.label_map_file = cfg['COMMON']['label_dic']
        self.fps = cfg['COMMON']['fps']
        self.nms_id = 5

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input1_tensor = self.predictor.get_input_handle(input_names[0])
        #self.input2_tensor = self.predictor.get_input_handle(input_names[1])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
Exemple #2
0
    def __init__(self, cfg, name='PPTSM'): 
        name = name.upper()
        self.name           = name
        model_file          = cfg[name]['model_file']
        params_file         = cfg[name]['params_file']
        gpu_mem             = cfg[name]['gpu_mem']
        device_id           = cfg[name]['device_id']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()

        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        print("output_names = ", output_names)
        #self.output_tensor = self.predictor.get_output_handle(output_names[1])
        self.output_tensor = self.predictor.get_output_handle(output_names[0])
Exemple #3
0
    def __init__(self, cfg, name='BMN'):
        name = name.upper()
        self.name = name
        model_file = cfg[name]['model_file']
        params_file = cfg[name]['params_file']
        gpu_mem = cfg[name]['gpu_mem']
        device_id = cfg[name]['device_id']

        self.nms_thread = cfg[name]['nms_thread']
        self.min_pred_score = cfg[name]['score_thread']
        self.min_frame_thread = cfg['COMMON']['fps']

        # model init
        config = Config(model_file, params_file)
        config.enable_use_gpu(gpu_mem, device_id)
        config.switch_ir_optim(True)  # default true
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        self.predictor = create_predictor(config)

        input_names = self.predictor.get_input_names()
        self.input_tensor = self.predictor.get_input_handle(input_names[0])

        output_names = self.predictor.get_output_names()
        self.output1_tensor = self.predictor.get_output_handle(output_names[0])
        self.output2_tensor = self.predictor.get_output_handle(output_names[1])
        self.output3_tensor = self.predictor.get_output_handle(output_names[2])
Exemple #4
0
 def inference_config_str(self, config: paddle_infer.Config) -> str:
     dic = {}
     enable_trt = config.tensorrt_engine_enabled()
     trt_precison = config.tensorrt_precision_mode()
     trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled()
     if enable_trt:
         dic['use_trt'] = True
         dic['trt_precision'] = trt_precison
         dic['use_dynamic_shape'] = trt_dynamic_shape
     else:
         dic['use_trt'] = False
     return str(dic)
def create_predictors(args):
    config_ul_0 = Config(args.model_file_0, args.params_file_0)
    config_ul_1 = Config(args.model_file_1, args.params_file_1)
    config_ul_2 = Config(args.model_file_2, args.params_file_2)
    config_ul_3 = Config(args.model_file_3, args.params_file_3)
    config_wash = Config(args.wash_model_file, args.wash_params_file)
    ultra_model_0 = create_paddle_predictor(args, config_ul_0)
    ultra_model_1 = create_paddle_predictor(args, config_ul_1)
    ultra_model_2 = create_paddle_predictor(args, config_ul_2)
    ultra_model_3 = create_paddle_predictor(args, config_ul_3)
    wash_model = create_paddle_predictor(args, config_wash)
    return ultra_model_0, ultra_model_1, ultra_model_2, ultra_model_3, wash_model
Exemple #6
0
 def test_static_save_and_run_inference_predictor(self):
     paddle.enable_static()
     np_data = np.random.random((1, 1, 28, 28)).astype("float32")
     np_label = np.random.random((1, 1)).astype("int64")
     path_prefix = "custom_op_inference/custom_relu"
     from paddle.inference import Config
     from paddle.inference import create_predictor
     for device in self.devices:
         predict = custom_relu_static_inference(
             self.custom_ops[0], device, np_data, np_label, path_prefix)
         # load inference model
         config = Config(path_prefix + ".pdmodel",
                         path_prefix + ".pdiparams")
         predictor = create_predictor(config)
         input_tensor = predictor.get_input_handle(predictor.get_input_names(
         )[0])
         input_tensor.reshape(np_data.shape)
         input_tensor.copy_from_cpu(np_data.copy())
         predictor.run()
         output_tensor = predictor.get_output_handle(
             predictor.get_output_names()[0])
         predict_infer = output_tensor.copy_to_cpu()
         self.assertTrue(
             np.isclose(
                 predict, predict_infer, rtol=5e-5).any(),
             "custom op predict: {},\n custom op infer predict: {}".format(
                 predict, predict_infer))
     paddle.disable_static()
def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)

    predictor = create_predictor(config)
    return predictor
Exemple #8
0
def set_config(args):
    config = Config(args.model_file, args.params_file)
    config.enable_lite_engine(PrecisionType.Float32, True)
    # use lite xpu subgraph
    config.enable_xpu(10 * 1024 * 1024)
    # use lite cuda subgraph
    # config.enable_use_gpu(100, 0)
    config.set_cpu_math_library_num_threads(args.math_thread_num)
    return config
Exemple #9
0
def init_predictor(args):
    config = Config()
    if args.model_dir == "":
        config.set_model(args.model_file, args.params_file)
    else:
        config.set_model(args.model_dir)
    #config.disable_glog_info()
    config.enable_use_gpu(1000, 3)
    predictor = create_predictor(config)
    return predictor
def collect_dynamic_shape(args):

    if not is_support_collecting():
        logger.error("The Paddle does not support collecting dynamic shape, " \
            "please reinstall the PaddlePaddle (latest gpu version).")

    # prepare config
    cfg = DeployConfig(args.config)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(1000, 0)
    pred_cfg.collect_shape_range_info(args.dynamic_shape_path)

    # create predictor
    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    # get images
    img_path_list, _ = get_image_list(args.image_path)
    if not isinstance(img_path_list, (list, tuple)):
        img_path_list = [img_path_list]
    logger.info(f"The num of images is {len(img_path_list)} \n")

    # collect
    progbar_val = progbar.Progbar(target=len(img_path_list))
    for idx, img_path in enumerate(img_path_list):
        data = np.array([cfg.transforms(img_path)[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)

        try:
            predictor.run()
        except:
            logger.info(
                "Fail to collect dynamic shape. Usually, the error is out of "
                "GPU memory, for the model and image are too large.\n")
            del predictor
            if os.path.exists(args.dynamic_shape_path):
                os.remove(args.dynamic_shape_path)

        progbar_val.update(idx + 1)

    logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
Exemple #11
0
    def init_predictor(self, cfg):
        model_dir = cfg['model_dir']
        params_file = cfg['params_file']
        use_gpu = cfg['use_gpu']

        config = Config(model_dir, params_file)

        # config.enable_memory_optim()

        if use_gpu:
            config.enable_use_gpu(1000, 0)
        else:
            # If not specific mkldnn, you can set the blas thread.
            # The thread num should not be greater than the number of cores in the CPU.
            config.set_cpu_math_library_num_threads(4)
            config.enable_mkldnn()
        config.disable_glog_info()

        predictor = create_predictor(config)
        return predictor
Exemple #12
0
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args
        self.compose = T.Compose(self.cfg.transforms)
        resize_h, resize_w = args.input_shape

        self.disflow = cv2.DISOpticalFlow_create(
            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
        self.prev_gray = np.zeros((resize_h, resize_w), np.uint8)
        self.prev_cfd = np.zeros((resize_h, resize_w), np.float32)
        self.is_init = True

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

        self.predictor = create_predictor(pred_cfg)
        if self.args.test_speed:
            self.cost_averager = TimeAverager()
def init_predictor(args):
    config = Config('./model')
    config.enable_memory_optim()
    config.enable_use_gpu(100, 0)

    # using dynamic shpae mode, the max_batch_size will be ignored.
    config.enable_tensorrt_engine(workspace_size=1 << 30,
                                  max_batch_size=1,
                                  min_subgraph_size=5,
                                  precision_mode=PrecisionType.Float32,
                                  use_static=False,
                                  use_calib_mode=False)

    head_number = 12

    names = [
        "placeholder_0", "placeholder_1", "placeholder_2", "stack_0.tmp_0"
    ]
    min_input_shape = [1, 1, 1]
    max_input_shape = [100, 128, 1]
    opt_input_shape = [10, 60, 1]

    config.set_trt_dynamic_shape_info(
        {
            names[0]: min_input_shape,
            names[1]: min_input_shape,
            names[2]: min_input_shape,
            names[3]: [1, head_number, 1, 1]
        }, {
            names[0]: max_input_shape,
            names[1]: max_input_shape,
            names[2]: max_input_shape,
            names[3]: [100, head_number, 128, 128]
        }, {
            names[0]: opt_input_shape,
            names[1]: opt_input_shape,
            names[2]: opt_input_shape,
            names[3]: [10, head_number, 60, 60]
        })
    predictor = create_predictor(config)
    return predictor
Exemple #14
0
    def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
        '''
        load the model config

        modelpath: inference model path

        use_gpu: use gpu or not

        use_mkldnn: use mkldnn or not

        combined: inference model format is combined or not
        '''
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.'
                )
                use_gpu = False

        # 加载模型参数
        if combined:
            model = os.path.join(modelpath, "__model__")
            params = os.path.join(modelpath, "__params__")
            config = Config(model, params)
        else:
            config = Config(modelpath)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            if use_mkldnn:
                config.enable_mkldnn()

        # 返回配置
        return config
Exemple #15
0
def init_predictor(args):
    if args.model_dir:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        config.disable_gpu()
        print(config)
        # config.delete('repeated_fc_relu_fuse_pass')
    predictor = create_predictor(config)
    return predictor
Exemple #16
0
def init_predictor(args):

    config = Config(os.path.join(args.model_dir, "inference.pdmodel"),
                    os.path.join(args.model_dir, "inference.pdiparams"))

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)

    predictor = create_predictor(config)
    return predictor
    def __init__(self,
                 model_file,
                 params_file,
                 use_mkldnn=True,
                 use_gpu=False,
                 device_id=0):
        config = Config(model_file, params_file)
        config.enable_memory_optim()

        if use_gpu:
            print("ENABLE_GPU")
            config.enable_use_gpu(100, device_id)

        if use_mkldnn:
            config.enable_mkldnn()
        self.predictor = create_predictor(config)
def auto_tune(args, dataset, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        dataset(dataset): an dataset.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    num = min(len(dataset), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for idx, (img, _) in enumerate(dataset):
        data = np.array([img])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

        if idx + 1 >= num:
            break

    logger.info("Auto tune success.\n")
Exemple #19
0
def auto_tune(args, imgs, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        imgs(str, list[str]): the path for images.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    if not isinstance(imgs, (list, tuple)):
        imgs = [imgs]
    num = min(len(imgs), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for i in range(0, num):
        data = np.array([cfg.transforms(imgs[i])[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

    logger.info("Auto tune success.\n")
def preprocess():
    #for i in range(1000):
    #    _preprocess()
    #return 1
    #feed = _preprocess()

    import numpy as np
    from paddle.inference import Config
    from paddle.inference import create_predictor
    config = Config("serving_server/__model__", "serving_server/__params__")
    config.disable_gpu()
    config.switch_use_feed_fetch_ops(False)
    config.switch_specify_input_names(True)
    predictor = create_predictor(config)

    for i in range(10):
        feed = _preprocess()
        # input 0 im shape
        input_names = predictor.get_input_names()
        input_handle = predictor.get_input_handle(input_names[0])
        input_t = feed["im_shape"]
        input_handle.reshape(input_t.shape)
        input_handle.copy_from_cpu(input_t)
        # input 1 image
        input_names = predictor.get_input_names()
        input_handle = predictor.get_input_handle(input_names[1])
        input_t = feed["image"]
        input_handle.reshape(input_t.shape)
        input_handle.copy_from_cpu(input_t)

        # input 2 scale factor
        input_names = predictor.get_input_names()
        input_handle = predictor.get_input_handle(input_names[2])
        input_t = feed["scale_factor"]
        input_handle.reshape(input_t.shape)
        input_handle.copy_from_cpu(input_t)

        predictor.run()
        output_names = predictor.get_output_names()
        output_handle = predictor.get_output_handle(output_names[0])
        output_data = output_handle.copy_to_cpu() 
Exemple #21
0
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

            if self.args.use_trt:
                ptype = PrecisionType.Int8 if args.use_int8 else PrecisionType.Float32
                pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                max_batch_size=1,
                                                min_subgraph_size=3,
                                                precision_mode=ptype,
                                                use_static=False,
                                                use_calib_mode=False)

        self.predictor = create_predictor(pred_cfg)
Exemple #22
0
def main():
    args = parse_args()

    # 配置
    config = Config(args.model_file, args.params_file)
    config.disable_gpu()
    config.switch_use_feed_fetch_ops(False)
    config.switch_specify_input_names(True)

    # 创建paddlePredictor
    predictor = create_predictor(config)

    # 获取输入
    val_dataset = paddle.vision.datasets.MNIST(mode='test',
                                               transform=transforms.ToTensor())
    (image, label) = val_dataset[np.random.randint(10000)]
    # fake_input = np.random.randn(1, 1, 28, 28).astype("float32")
    # image = np.asndarray(image).astype("float32")
    # print(image.shape)
    image = image.numpy().reshape([1, 1, 28, 28])
    # print(image.shape)
    # print(fake_input.shape)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])
    input_handle.reshape([1, 1, 28, 28])
    input_handle.copy_from_cpu(image)

    # 运行predictor
    predictor.run()

    # 获取输出
    output_names = predictor.get_output_names()
    output_handle = predictor.get_output_handle(output_names[0])
    output = output_handle.copy_to_cpu()

    print("True label: ", label.item())
    print("Prediction: ", np.argmax(output))
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    if run_mode == 'trt_int8':
        raise ValueError("TensorRT int8 mode is not supported now, "
                         "please use trt_fp32 or trt_fp16 instead.")
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=False)

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor
def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
                   cpu_threads=1,
                   enable_mkldnn=False):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need device == 'GPU'.
    """
    if device != 'GPU' and run_mode != 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
            .format(run_mode, device))
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    if device == 'GPU':
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
        config.enable_lite_engine()
        config.enable_xpu(10 * 1024 * 1024)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if enable_mkldnn:
            try:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            except Exception as e:
                print(
                    "The current environment does not support `mkldnn`, so disable mkldnn."
                )
                pass

    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor, config
Exemple #25
0
    def load_model(self,
                   model_dir,
                   use_gpu=False,
                   enable_mkldnn=False,
                   cpu_threads=1):
        model = os.path.join(model_dir, '__model__')
        params = os.path.join(model_dir, '__params__')
        config = Config(model, params)

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, 0)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if enable_mkldnn:
                config.enable_mkldnn()
                config.set_mkldnn_cache_capacity(10)

        config.disable_glog_info()
        config.switch_ir_optim(True)
        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.switch_specify_input_names(True)

        # 通过参数加载模型预测器
        predictor = create_predictor(config)

        # 获取模型的输入输出
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        input_handle = predictor.get_input_handle(input_names[0])
        output_handle = predictor.get_output_handle(output_names[0])

        return predictor, input_handle, output_handle
Exemple #26
0
    def load_predictor(self, model_file, params_file):
        config = Config(model_file, params_file)
        if self.predictor_config["use_gpu"]:
            config.enable_use_gpu(200, 0)
            config.switch_ir_optim(True)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(
                self.predictor_config["cpu_threads"])

            if self.predictor_config["enable_mkldnn"]:
                try:
                    # cache 10 different shapes for mkldnn to avoid memory leak
                    config.set_mkldnn_cache_capacity(10)
                    config.enable_mkldnn()
                except Exception as e:
                    logging.error(
                        "The current environment does not support `mkldnn`, so disable mkldnn."
                    )
        config.disable_glog_info()
        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)
        input_names = predictor.get_input_names()
        output_names = predictor.get_output_names()
        return predictor, input_names, output_names
    parser.add_argument("--threads",
                        type=int,
                        default=1,
                        help="Whether use gpu.")
    return parser.parse_args()

if __name__ == '__main__':
    args = parse_args()
    assert (args.model_dir != "") or \
            (args.model_file != "" and args.params_file != ""), \
            "Set model path error."
    assert args.img_path != "", "Set img_path error."
    
    # Init config
    if args.model_dir == "":
        config = Config(args.model_file, args.params_file)
    else:
        config = Config(args.model_dir)
    config.enable_use_gpu(500, 0)
    config.switch_ir_optim()
    config.enable_memory_optim()
    config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=PrecisionType.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False)
        
    # Create predictor
    predictor = create_predictor(config)

    # Set input
    img = cv2.imread(args.img_path)
    img = preprocess(img)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_handle(input_names[0])
Exemple #28
0
def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    #config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=Config.Precision.Half
            if args.use_fp16 else Config.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor
Exemple #29
0
    def load_config(self, modelpath, use_gpu, gpu_id, use_mkldnn, cpu_threads):
        '''
        load the model config

        modelpath: inference model path

        use_gpu: use gpu or not

        use_mkldnn: use mkldnn or not
        '''
        # 对运行位置进行配置
        if use_gpu:
            try:
                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
            except Exception:
                print(
                    '''Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU. Now switch to CPU to continue...''')
                use_gpu = False

        if os.path.isdir(modelpath):
            if os.path.exists(os.path.join(modelpath, "__params__")):
                # __model__ + __params__
                model = os.path.join(modelpath, "__model__")
                params = os.path.join(modelpath, "__params__")
                config = Config(model, params)
            elif os.path.exists(os.path.join(modelpath, "params")):
                # model + params
                model = os.path.join(modelpath, "model")
                params = os.path.join(modelpath, "params")
                config = Config(model, params)
            elif os.path.exists(os.path.join(modelpath, "__model__")):
                # __model__ + others
                config = Config(modelpath)
            else:
                raise Exception(
                    "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath))
        elif os.path.exists(modelpath+".pdmodel"):
            # *.pdmodel + *.pdiparams
            model = modelpath+".pdmodel"
            params = modelpath+".pdiparams"
            config = Config(model, params)
        else:
            raise Exception(
                "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath))

        # 设置参数
        if use_gpu:
            config.enable_use_gpu(100, gpu_id)
        else:
            config.disable_gpu()
            config.set_cpu_math_library_num_threads(cpu_threads)
            if use_mkldnn:
                config.enable_mkldnn()

        config.disable_glog_info()

        # 返回配置
        return config
Exemple #30
0
    def create_paddle_predictor(self, args, inference_model_dir=None):
        if inference_model_dir is None:
            inference_model_dir = args.inference_model_dir
        params_file = os.path.join(inference_model_dir, "inference.pdiparams")
        model_file = os.path.join(inference_model_dir, "inference.pdmodel")
        config = Config(model_file, params_file)

        if args.use_gpu:
            config.enable_use_gpu(args.gpu_mem, 0)
        else:
            config.disable_gpu()
            if args.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)

        if args.enable_profile:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(args.ir_optim)  # default true
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=Config.Precision.Half
                if args.use_fp16 else Config.Precision.Float32,
                max_batch_size=args.batch_size,
                min_subgraph_size=30)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor, config