def __init__(self, cfg, name='ACTION'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] self.topk = cfg[name]['topk'] self.frame_offset = cfg[name]['nms_offset'] self.nms_thread = cfg[name]['nms_thread'] self.cls_thread = cfg[name]['classify_score_thread'] self.iou_thread = cfg[name]['iou_score_thread'] self.label_map_file = cfg['COMMON']['label_dic'] self.fps = cfg['COMMON']['fps'] self.nms_id = 5 # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input1_tensor = self.predictor.get_input_handle(input_names[0]) #self.input2_tensor = self.predictor.get_input_handle(input_names[1]) output_names = self.predictor.get_output_names() self.output1_tensor = self.predictor.get_output_handle(output_names[0]) self.output2_tensor = self.predictor.get_output_handle(output_names[1])
def __init__(self, cfg, name='PPTSM'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() print("output_names = ", output_names) #self.output_tensor = self.predictor.get_output_handle(output_names[1]) self.output_tensor = self.predictor.get_output_handle(output_names[0])
def __init__(self, cfg, name='BMN'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] self.nms_thread = cfg[name]['nms_thread'] self.min_pred_score = cfg[name]['score_thread'] self.min_frame_thread = cfg['COMMON']['fps'] # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() self.output1_tensor = self.predictor.get_output_handle(output_names[0]) self.output2_tensor = self.predictor.get_output_handle(output_names[1]) self.output3_tensor = self.predictor.get_output_handle(output_names[2])
def inference_config_str(self, config: paddle_infer.Config) -> str: dic = {} enable_trt = config.tensorrt_engine_enabled() trt_precison = config.tensorrt_precision_mode() trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled() if enable_trt: dic['use_trt'] = True dic['trt_precision'] = trt_precison dic['use_dynamic_shape'] = trt_dynamic_shape else: dic['use_trt'] = False return str(dic)
def create_predictors(args): config_ul_0 = Config(args.model_file_0, args.params_file_0) config_ul_1 = Config(args.model_file_1, args.params_file_1) config_ul_2 = Config(args.model_file_2, args.params_file_2) config_ul_3 = Config(args.model_file_3, args.params_file_3) config_wash = Config(args.wash_model_file, args.wash_params_file) ultra_model_0 = create_paddle_predictor(args, config_ul_0) ultra_model_1 = create_paddle_predictor(args, config_ul_1) ultra_model_2 = create_paddle_predictor(args, config_ul_2) ultra_model_3 = create_paddle_predictor(args, config_ul_3) wash_model = create_paddle_predictor(args, config_wash) return ultra_model_0, ultra_model_1, ultra_model_2, ultra_model_3, wash_model
def test_static_save_and_run_inference_predictor(self): paddle.enable_static() np_data = np.random.random((1, 1, 28, 28)).astype("float32") np_label = np.random.random((1, 1)).astype("int64") path_prefix = "custom_op_inference/custom_relu" from paddle.inference import Config from paddle.inference import create_predictor for device in self.devices: predict = custom_relu_static_inference( self.custom_ops[0], device, np_data, np_label, path_prefix) # load inference model config = Config(path_prefix + ".pdmodel", path_prefix + ".pdiparams") predictor = create_predictor(config) input_tensor = predictor.get_input_handle(predictor.get_input_names( )[0]) input_tensor.reshape(np_data.shape) input_tensor.copy_from_cpu(np_data.copy()) predictor.run() output_tensor = predictor.get_output_handle( predictor.get_output_names()[0]) predict_infer = output_tensor.copy_to_cpu() self.assertTrue( np.isclose( predict, predict_infer, rtol=5e-5).any(), "custom op predict: {},\n custom op infer predict: {}".format( predict, predict_infer)) paddle.disable_static()
def init_predictor(args): if args.model_dir is not "": config = Config(args.model_dir) else: config = Config(args.model_file, args.params_file) config.enable_memory_optim() if args.use_gpu: config.enable_use_gpu(1000, 0) else: # If not specific mkldnn, you can set the blas thread. # The thread num should not be greater than the number of cores in the CPU. config.set_cpu_math_library_num_threads(4) predictor = create_predictor(config) return predictor
def set_config(args): config = Config(args.model_file, args.params_file) config.enable_lite_engine(PrecisionType.Float32, True) # use lite xpu subgraph config.enable_xpu(10 * 1024 * 1024) # use lite cuda subgraph # config.enable_use_gpu(100, 0) config.set_cpu_math_library_num_threads(args.math_thread_num) return config
def init_predictor(args): config = Config() if args.model_dir == "": config.set_model(args.model_file, args.params_file) else: config.set_model(args.model_dir) #config.disable_glog_info() config.enable_use_gpu(1000, 3) predictor = create_predictor(config) return predictor
def collect_dynamic_shape(args): if not is_support_collecting(): logger.error("The Paddle does not support collecting dynamic shape, " \ "please reinstall the PaddlePaddle (latest gpu version).") # prepare config cfg = DeployConfig(args.config) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(1000, 0) pred_cfg.collect_shape_range_info(args.dynamic_shape_path) # create predictor predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) # get images img_path_list, _ = get_image_list(args.image_path) if not isinstance(img_path_list, (list, tuple)): img_path_list = [img_path_list] logger.info(f"The num of images is {len(img_path_list)} \n") # collect progbar_val = progbar.Progbar(target=len(img_path_list)) for idx, img_path in enumerate(img_path_list): data = np.array([cfg.transforms(img_path)[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Fail to collect dynamic shape. Usually, the error is out of " "GPU memory, for the model and image are too large.\n") del predictor if os.path.exists(args.dynamic_shape_path): os.remove(args.dynamic_shape_path) progbar_val.update(idx + 1) logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
def init_predictor(self, cfg): model_dir = cfg['model_dir'] params_file = cfg['params_file'] use_gpu = cfg['use_gpu'] config = Config(model_dir, params_file) # config.enable_memory_optim() if use_gpu: config.enable_use_gpu(1000, 0) else: # If not specific mkldnn, you can set the blas thread. # The thread num should not be greater than the number of cores in the CPU. config.set_cpu_math_library_num_threads(4) config.enable_mkldnn() config.disable_glog_info() predictor = create_predictor(config) return predictor
def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args self.compose = T.Compose(self.cfg.transforms) resize_h, resize_w = args.input_shape self.disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) self.prev_gray = np.zeros((resize_h, resize_w), np.uint8) self.prev_cfd = np.zeros((resize_h, resize_w), np.float32) self.is_init = True pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) self.predictor = create_predictor(pred_cfg) if self.args.test_speed: self.cost_averager = TimeAverager()
def init_predictor(args): config = Config('./model') config.enable_memory_optim() config.enable_use_gpu(100, 0) # using dynamic shpae mode, the max_batch_size will be ignored. config.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=5, precision_mode=PrecisionType.Float32, use_static=False, use_calib_mode=False) head_number = 12 names = [ "placeholder_0", "placeholder_1", "placeholder_2", "stack_0.tmp_0" ] min_input_shape = [1, 1, 1] max_input_shape = [100, 128, 1] opt_input_shape = [10, 60, 1] config.set_trt_dynamic_shape_info( { names[0]: min_input_shape, names[1]: min_input_shape, names[2]: min_input_shape, names[3]: [1, head_number, 1, 1] }, { names[0]: max_input_shape, names[1]: max_input_shape, names[2]: max_input_shape, names[3]: [100, head_number, 128, 128] }, { names[0]: opt_input_shape, names[1]: opt_input_shape, names[2]: opt_input_shape, names[3]: [10, head_number, 60, 60] }) predictor = create_predictor(config) return predictor
def load_config(self, modelpath, use_gpu, use_mkldnn, combined): ''' load the model config modelpath: inference model path use_gpu: use gpu or not use_mkldnn: use mkldnn or not combined: inference model format is combined or not ''' # 对运行位置进行配置 if use_gpu: try: int(os.environ.get('CUDA_VISIBLE_DEVICES')) except Exception: print( 'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.' ) use_gpu = False # 加载模型参数 if combined: model = os.path.join(modelpath, "__model__") params = os.path.join(modelpath, "__params__") config = Config(model, params) else: config = Config(modelpath) # 设置参数 if use_gpu: config.enable_use_gpu(100, 0) else: config.disable_gpu() if use_mkldnn: config.enable_mkldnn() # 返回配置 return config
def init_predictor(args): if args.model_dir: config = Config(args.model_dir) else: config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(1000, 0) else: config.disable_gpu() print(config) # config.delete('repeated_fc_relu_fuse_pass') predictor = create_predictor(config) return predictor
def init_predictor(args): config = Config(os.path.join(args.model_dir, "inference.pdmodel"), os.path.join(args.model_dir, "inference.pdiparams")) config.enable_memory_optim() if args.use_gpu: config.enable_use_gpu(1000, 0) else: # If not specific mkldnn, you can set the blas thread. # The thread num should not be greater than the number of cores in the CPU. config.set_cpu_math_library_num_threads(4) predictor = create_predictor(config) return predictor
def __init__(self, model_file, params_file, use_mkldnn=True, use_gpu=False, device_id=0): config = Config(model_file, params_file) config.enable_memory_optim() if use_gpu: print("ENABLE_GPU") config.enable_use_gpu(100, device_id) if use_mkldnn: config.enable_mkldnn() self.predictor = create_predictor(config)
def auto_tune(args, dataset, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. dataset(dataset): an dataset. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) num = min(len(dataset), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for idx, (img, _) in enumerate(dataset): data = np.array([img]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return if idx + 1 >= num: break logger.info("Auto tune success.\n")
def auto_tune(args, imgs, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. imgs(str, list[str]): the path for images. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) if not isinstance(imgs, (list, tuple)): imgs = [imgs] num = min(len(imgs), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for i in range(0, num): data = np.array([cfg.transforms(imgs[i])[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return logger.info("Auto tune success.\n")
def preprocess(): #for i in range(1000): # _preprocess() #return 1 #feed = _preprocess() import numpy as np from paddle.inference import Config from paddle.inference import create_predictor config = Config("serving_server/__model__", "serving_server/__params__") config.disable_gpu() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) predictor = create_predictor(config) for i in range(10): feed = _preprocess() # input 0 im shape input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) input_t = feed["im_shape"] input_handle.reshape(input_t.shape) input_handle.copy_from_cpu(input_t) # input 1 image input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[1]) input_t = feed["image"] input_handle.reshape(input_t.shape) input_handle.copy_from_cpu(input_t) # input 2 scale factor input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[2]) input_t = feed["scale_factor"] input_handle.reshape(input_t.shape) input_handle.copy_from_cpu(input_t) predictor.run() output_names = predictor.get_output_names() output_handle = predictor.get_output_handle(output_names[0]) output_data = output_handle.copy_to_cpu()
def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) if self.args.use_trt: ptype = PrecisionType.Int8 if args.use_int8 else PrecisionType.Float32 pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=3, precision_mode=ptype, use_static=False, use_calib_mode=False) self.predictor = create_predictor(pred_cfg)
def main(): args = parse_args() # 配置 config = Config(args.model_file, args.params_file) config.disable_gpu() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 创建paddlePredictor predictor = create_predictor(config) # 获取输入 val_dataset = paddle.vision.datasets.MNIST(mode='test', transform=transforms.ToTensor()) (image, label) = val_dataset[np.random.randint(10000)] # fake_input = np.random.randn(1, 1, 28, 28).astype("float32") # image = np.asndarray(image).astype("float32") # print(image.shape) image = image.numpy().reshape([1, 1, 28, 28]) # print(image.shape) # print(fake_input.shape) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) input_handle.reshape([1, 1, 28, 28]) input_handle.copy_from_cpu(image) # 运行predictor predictor.run() # 获取输出 output_names = predictor.get_output_names() output_handle = predictor.get_output_handle(output_names[0]) output = output_handle.copy_to_cpu() print("True label: ", label.item()) print("Prediction: ", np.argmax(output))
def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min_subgraph_size=3): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ use_gpu (bool): whether use gpu Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need use_gpu == True. """ if not use_gpu and not run_mode == 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" .format(run_mode, use_gpu)) if run_mode == 'trt_int8': raise ValueError("TensorRT int8 mode is not supported now, " "please use trt_fp32 or trt_fp16 instead.") config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if use_gpu: # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) else: config.disable_gpu() if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=False) # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def load_predictor(model_dir, run_mode='fluid', batch_size=1, device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need device == 'GPU'. """ if device != 'GPU' and run_mode != 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) elif device == 'XPU': config.enable_lite_engine() config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: print( "The current environment does not support `mkldnn`, so disable mkldnn." ) pass precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=trt_calib_mode) if use_dynamic_shape: min_input_shape = { 'image': [batch_size, 3, trt_min_shape, trt_min_shape] } max_input_shape = { 'image': [batch_size, 3, trt_max_shape, trt_max_shape] } opt_input_shape = { 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config
def load_model(self, model_dir, use_gpu=False, enable_mkldnn=False, cpu_threads=1): model = os.path.join(model_dir, '__model__') params = os.path.join(model_dir, '__params__') config = Config(model, params) # 设置参数 if use_gpu: config.enable_use_gpu(100, 0) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: config.enable_mkldnn() config.set_mkldnn_cache_capacity(10) config.disable_glog_info() config.switch_ir_optim(True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 通过参数加载模型预测器 predictor = create_predictor(config) # 获取模型的输入输出 input_names = predictor.get_input_names() output_names = predictor.get_output_names() input_handle = predictor.get_input_handle(input_names[0]) output_handle = predictor.get_output_handle(output_names[0]) return predictor, input_handle, output_handle
def load_predictor(self, model_file, params_file): config = Config(model_file, params_file) if self.predictor_config["use_gpu"]: config.enable_use_gpu(200, 0) config.switch_ir_optim(True) else: config.disable_gpu() config.set_cpu_math_library_num_threads( self.predictor_config["cpu_threads"]) if self.predictor_config["enable_mkldnn"]: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: logging.error( "The current environment does not support `mkldnn`, so disable mkldnn." ) config.disable_glog_info() config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) input_names = predictor.get_input_names() output_names = predictor.get_output_names() return predictor, input_names, output_names
parser.add_argument("--threads", type=int, default=1, help="Whether use gpu.") return parser.parse_args() if __name__ == '__main__': args = parse_args() assert (args.model_dir != "") or \ (args.model_file != "" and args.params_file != ""), \ "Set model path error." assert args.img_path != "", "Set img_path error." # Init config if args.model_dir == "": config = Config(args.model_file, args.params_file) else: config = Config(args.model_dir) config.enable_use_gpu(500, 0) config.switch_ir_optim() config.enable_memory_optim() config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=PrecisionType.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) # Create predictor predictor = create_predictor(config) # Set input img = cv2.imread(args.img_path) img = preprocess(img) input_names = predictor.get_input_names() input_tensor = predictor.get_input_handle(input_names[0])
def create_paddle_predictor(args): config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() #config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def load_config(self, modelpath, use_gpu, gpu_id, use_mkldnn, cpu_threads): ''' load the model config modelpath: inference model path use_gpu: use gpu or not use_mkldnn: use mkldnn or not ''' # 对运行位置进行配置 if use_gpu: try: int(os.environ.get('CUDA_VISIBLE_DEVICES')) except Exception: print( '''Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU. Now switch to CPU to continue...''') use_gpu = False if os.path.isdir(modelpath): if os.path.exists(os.path.join(modelpath, "__params__")): # __model__ + __params__ model = os.path.join(modelpath, "__model__") params = os.path.join(modelpath, "__params__") config = Config(model, params) elif os.path.exists(os.path.join(modelpath, "params")): # model + params model = os.path.join(modelpath, "model") params = os.path.join(modelpath, "params") config = Config(model, params) elif os.path.exists(os.path.join(modelpath, "__model__")): # __model__ + others config = Config(modelpath) else: raise Exception( "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath)) elif os.path.exists(modelpath+".pdmodel"): # *.pdmodel + *.pdiparams model = modelpath+".pdmodel" params = modelpath+".pdiparams" config = Config(model, params) else: raise Exception( "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath)) # 设置参数 if use_gpu: config.enable_use_gpu(100, gpu_id) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if use_mkldnn: config.enable_mkldnn() config.disable_glog_info() # 返回配置 return config
def create_paddle_predictor(self, args, inference_model_dir=None): if inference_model_dir is None: inference_model_dir = args.inference_model_dir params_file = os.path.join(inference_model_dir, "inference.pdiparams") model_file = os.path.join(inference_model_dir, "inference.pdmodel") config = Config(model_file, params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) if args.enable_profile: config.enable_profile() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size, min_subgraph_size=30) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config