def __init__(self, cfg, name='BMN'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] self.nms_thread = cfg[name]['nms_thread'] self.min_pred_score = cfg[name]['score_thread'] self.min_frame_thread = cfg['COMMON']['fps'] # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() self.output1_tensor = self.predictor.get_output_handle(output_names[0]) self.output2_tensor = self.predictor.get_output_handle(output_names[1]) self.output3_tensor = self.predictor.get_output_handle(output_names[2])
def load_predictor(self, model_file, params_file): config = Config(model_file, params_file) if self.predictor_config["use_gpu"]: config.enable_use_gpu(200, 0) config.switch_ir_optim(True) else: config.disable_gpu() config.set_cpu_math_library_num_threads( self.predictor_config["cpu_threads"]) if self.predictor_config["enable_mkldnn"]: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: logging.error( "The current environment does not support `mkldnn`, so disable mkldnn." ) config.disable_glog_info() config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) input_names = predictor.get_input_names() output_names = predictor.get_output_names() return predictor, input_names, output_names
def __init__(self, cfg, name='ACTION'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] self.topk = cfg[name]['topk'] self.frame_offset = cfg[name]['nms_offset'] self.nms_thread = cfg[name]['nms_thread'] self.cls_thread = cfg[name]['classify_score_thread'] self.iou_thread = cfg[name]['iou_score_thread'] self.label_map_file = cfg['COMMON']['label_dic'] self.fps = cfg['COMMON']['fps'] self.nms_id = 5 # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input1_tensor = self.predictor.get_input_handle(input_names[0]) #self.input2_tensor = self.predictor.get_input_handle(input_names[1]) output_names = self.predictor.get_output_names() self.output1_tensor = self.predictor.get_output_handle(output_names[0]) self.output2_tensor = self.predictor.get_output_handle(output_names[1])
def create_paddle_predictor(args): config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() #config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def __init__(self, cfg, name='PPTSM'): name = name.upper() self.name = name model_file = cfg[name]['model_file'] params_file = cfg[name]['params_file'] gpu_mem = cfg[name]['gpu_mem'] device_id = cfg[name]['device_id'] # model init config = Config(model_file, params_file) config.enable_use_gpu(gpu_mem, device_id) config.switch_ir_optim(True) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() print("output_names = ", output_names) #self.output_tensor = self.predictor.get_output_handle(output_names[1]) self.output_tensor = self.predictor.get_output_handle(output_names[0])
def create_paddle_predictor(self, args, inference_model_dir=None): if inference_model_dir is None: inference_model_dir = args.inference_model_dir params_file = os.path.join(inference_model_dir, "inference.pdiparams") model_file = os.path.join(inference_model_dir, "inference.pdmodel") config = Config(model_file, params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) if args.enable_profile: config.enable_profile() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=Config.Precision.Half if args.use_fp16 else Config.Precision.Float32, max_batch_size=args.batch_size, min_subgraph_size=30) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config
def load_model(self, model_dir, use_gpu=False, enable_mkldnn=False, cpu_threads=1): model = os.path.join(model_dir, '__model__') params = os.path.join(model_dir, '__params__') config = Config(model, params) # 设置参数 if use_gpu: config.enable_use_gpu(100, 0) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: config.enable_mkldnn() config.set_mkldnn_cache_capacity(10) config.disable_glog_info() config.switch_ir_optim(True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 通过参数加载模型预测器 predictor = create_predictor(config) # 获取模型的输入输出 input_names = predictor.get_input_names() output_names = predictor.get_output_names() input_handle = predictor.get_input_handle(input_names[0]) output_handle = predictor.get_output_handle(output_names[0]) return predictor, input_handle, output_handle
def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min_subgraph_size=3): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ use_gpu (bool): whether use gpu Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need use_gpu == True. """ if not use_gpu and not run_mode == 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" .format(run_mode, use_gpu)) if run_mode == 'trt_int8': raise ValueError("TensorRT int8 mode is not supported now, " "please use trt_fp32 or trt_fp16 instead.") config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if use_gpu: # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op # FIXME(dkp): ir optimize may prune variable inside graph # and incur error in Paddle 2.0, e.g. in SSDLite # FCOS model, set as False currently and should # be set as True after switch_ir_optim fixed config.switch_ir_optim(False) else: config.disable_gpu() if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=False) # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def create_paddle_predictor(args): config = Config(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() if args.use_mkldnn: config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) config.set_mkldnn_cache_capacity(100) config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
def init_model(self): from paddle.inference import Config from paddle.inference import PrecisionType from paddle.inference import create_predictor precision_mode = PrecisionType.Float32 use_calib_mode = False if self.param_type == "fp16": precision_mode = PrecisionType.Half elif self.param_type == "int8": precision_mode = PrecisionType.Int8 use_calib_mode = True mode_path = os.path.join(self.model_dir,"yolov3/__model__") param_path = os.path.join(self.model_dir,"yolov3/__params__") config = Config(mode_path, param_path) config.enable_use_gpu(100, 0) config.switch_ir_optim(True) size = (self.batch_size * 101) << 20 config.enable_tensorrt_engine( workspace_size= size, max_batch_size=self.batch_size, min_subgraph_size=3, precision_mode=precision_mode, use_static=False, use_calib_mode=use_calib_mode) if not self.debug: config.disable_glog_info() else: config.enable_profile() config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.enable_mkldnn() #exit(1) self.predictor = create_predictor(config)
def load_model(self, modelpath, use_gpu, use_mkldnn, combined): # 对运行位置进行配置 if use_gpu: try: int(os.environ.get('CUDA_VISIBLE_DEVICES')) except Exception: print( 'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.' ) use_gpu = False # 加载模型参数 if combined: model = os.path.join(modelpath, "__model__") params = os.path.join(modelpath, "__params__") config = Config(model, params) else: config = Config(modelpath) # 设置参数 if use_gpu: config.enable_use_gpu(100, 0) else: config.disable_gpu() if use_mkldnn: config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 通过参数加载模型预测器 predictor = create_predictor(config) # 返回预测器 return predictor
return parser.parse_args() if __name__ == '__main__': args = parse_args() assert (args.model_dir != "") or \ (args.model_file != "" and args.params_file != ""), \ "Set model path error." assert args.img_path != "", "Set img_path error." # Init config if args.model_dir == "": config = Config(args.model_file, args.params_file) else: config = Config(args.model_dir) config.enable_use_gpu(500, 0) config.switch_ir_optim() config.enable_memory_optim() config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=PrecisionType.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False) # Create predictor predictor = create_predictor(config) # Set input img = cv2.imread(args.img_path) img = preprocess(img) input_names = predictor.get_input_names() input_tensor = predictor.get_input_handle(input_names[0]) input_tensor.reshape(img.shape) input_tensor.copy_from_cpu(img.copy()) # Run
class Predictor: def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() self._init_cpu_config() self.predictor = create_predictor(self.pred_cfg) def _init_base_config(self): self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True) def _init_cpu_config(self): """ Init the config for x86 cpu. """ logger.info("Use CPU") self.pred_cfg.disable_gpu() if self.args.enable_mkldnn: logger.info("Use MKLDNN") # cache 10 different shapes for mkldnn self.pred_cfg.set_mkldnn_cache_capacity(10) self.pred_cfg.enable_mkldnn() self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads) def run(self, imgs): if not isinstance(imgs, (list, tuple)): imgs = [imgs] input_names = self.predictor.get_input_names() input_handle = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() output_seg_handle = self.predictor.get_output_handle(output_names[0]) args = self.args if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) cut_height = 160 num_classes = 7 postprocessor = tusimple_processor.TusimpleProcessor( num_classes=num_classes, cut_height=cut_height, save_dir=args.save_dir) for i, im_path in enumerate(imgs): im = cv2.imread(im_path) im = im[cut_height:, :, :] im = im.astype('float32') im, _ = self.cfg.transforms(im) im = im[np.newaxis, ...] input_handle.reshape(im.shape) input_handle.copy_from_cpu(im) self.predictor.run() seg_results = output_seg_handle.copy_to_cpu() # get lane points seg_results = paddle.to_tensor([seg_results]) postprocessor.predict(seg_results, im_path) logger.info("Finish")
root_path = "/usr/local/quake/datas/benchmark" images_dir = os.path.join(root_path, './dataset/images') # 均值 方差 mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) batch_size = 8 input_h = 608 input_w = 608 mode_path = os.path.join(root_path, "./model/yolov3/__model__") param_path = os.path.join(root_path, "./model/yolov3/__params__") config = Config(mode_path, param_path) config.enable_use_gpu(100, 0) config.switch_ir_optim(True) config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=3, precision_mode=PrecisionType.Int8, use_static=False, use_calib_mode=True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.enable_mkldnn() #config.enable_profile() predictor = create_predictor(config) input_names = predictor.get_input_names() input_img = predictor.get_input_handle(input_names[0])
class Predictor: def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() if args.device == 'cpu': self._init_cpu_config() else: self._init_gpu_config() self.predictor = create_predictor(self.pred_cfg) if hasattr(args, 'benchmark') and args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger(model_name=args.model_name, model_precision=args.precision, batch_size=args.batch_size, data_shape="dynamic", save_path=None, inference_config=self.pred_cfg, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger) def _init_base_config(self): self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True) def _init_cpu_config(self): """ Init the config for x86 cpu. """ logger.info("Using CPU") self.pred_cfg.disable_gpu() if self.args.enable_mkldnn: logger.info("Using MKLDNN") # cache 1- different shapes for mkldnn self.pred_cfg.set_mkldnn_cache_capacity(10) self.pred_cfg.enable_mkldnn() self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads) def _init_gpu_config(self): """ Init the config for nvidia gpu. """ logger.info("using GPU") self.pred_cfg.enable_use_gpu(100, 0) precision_map = { "fp16": PrecisionType.Half, "fp32": PrecisionType.Float32, "int8": PrecisionType.Int8 } precision_mode = precision_map[self.args.precision] if self.args.use_trt: logger.info("Use TRT") self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=300, precision_mode=precision_mode, use_static=False, use_calib_mode=False) if use_auto_tune(self.args) and \ os.path.exists(self.args.auto_tuned_shape_file): logger.info("Use auto tuned dynamic shape") allow_build_at_runtime = True self.pred_cfg.enable_tuned_tensorrt_dynamic_shape( self.args.auto_tuned_shape_file, allow_build_at_runtime) else: logger.info("Use manual set dynamic shape") min_input_shape = {"x": [1, 3, 100, 100]} max_input_shape = {"x": [1, 3, 2000, 3000]} opt_input_shape = {"x": [1, 3, 512, 1024]} self.pred_cfg.set_trt_dynamic_shape_info( min_input_shape, max_input_shape, opt_input_shape) def run(self, imgs, trimaps=None, imgs_dir=None): self.imgs_dir = imgs_dir num = len(imgs) input_names = self.predictor.get_input_names() input_handle = {} for i in range(len(input_names)): input_handle[input_names[i]] = self.predictor.get_input_handle( input_names[i]) output_names = self.predictor.get_output_names() output_handle = self.predictor.get_output_handle(output_names[0]) args = self.args for i in tqdm.tqdm(range(0, num, args.batch_size)): # warm up if i == 0 and args.benchmark: for _ in range(5): img_inputs = [] if trimaps is not None: trimap_inputs = [] trans_info = [] for j in range(i, i + args.batch_size): img = imgs[i] trimap = trimaps[i] if trimaps is not None else None data = self._preprocess(img=img, trimap=trimap) img_inputs.append(data['img']) if trimaps is not None: trimap_inputs.append( data['trimap'][np.newaxis, :, :]) trans_info.append(data['trans_info']) img_inputs = np.array(img_inputs) if trimaps is not None: trimap_inputs = ( np.array(trimap_inputs)).astype('float32') input_handle['img'].copy_from_cpu(img_inputs) if trimaps is not None: input_handle['trimap'].copy_from_cpu(trimap_inputs) self.predictor.run() results = output_handle.copy_to_cpu() results = results.squeeze(1) for j in range(args.batch_size): trimap = trimap_inputs[ j] if trimaps is not None else None result = self._postprocess(results[j], trans_info[j], trimap=trimap) # inference if args.benchmark: self.autolog.times.start() img_inputs = [] if trimaps is not None: trimap_inputs = [] trans_info = [] for j in range(i, i + args.batch_size): img = imgs[i] trimap = trimaps[i] if trimaps is not None else None data = self._preprocess(img=img, trimap=trimap) img_inputs.append(data['img']) if trimaps is not None: trimap_inputs.append(data['trimap'][np.newaxis, :, :]) trans_info.append(data['trans_info']) img_inputs = np.array(img_inputs) if trimaps is not None: trimap_inputs = (np.array(trimap_inputs)).astype('float32') input_handle['img'].copy_from_cpu(img_inputs) if trimaps is not None: input_handle['trimap'].copy_from_cpu(trimap_inputs) if args.benchmark: self.autolog.times.stamp() self.predictor.run() if args.benchmark: self.autolog.times.stamp() results = output_handle.copy_to_cpu() results = results.squeeze(1) for j in range(args.batch_size): trimap = trimap_inputs[j] if trimaps is not None else None result = self._postprocess(results[j], trans_info[j], trimap=trimap) self._save_imgs(result, imgs[i + j]) if args.benchmark: self.autolog.times.end(stamp=True) logger.info("Finish") def _preprocess(self, img, trimap=None): data = {} data['img'] = img if trimap is not None: data['trimap'] = trimap data['gt_fields'] = ['trimap'] data = self.cfg.transforms(data) return data def _postprocess(self, alpha, trans_info, trimap=None): """recover pred to origin shape""" if trimap is not None: trimap = trimap.squeeze(0) alpha[trimap == 0] = 0 alpha[trimap == 255] = 1 for item in trans_info[::-1]: if item[0] == 'resize': h, w = item[1][0], item[1][1] alpha = cv2.resize(alpha, (w, h), interpolation=cv2.INTER_LINEAR) elif item[0] == 'padding': h, w = item[1][0], item[1][1] alpha = alpha[:, :, 0:h, 0:w] else: raise Exception("Unexpected info '{}' in im_info".format( item[0])) return alpha def _save_imgs(self, alpha, img_path): ori_img = cv2.imread(img_path) alpha = (alpha * 255).astype('uint8') if self.imgs_dir is not None: img_path = img_path.replace(self.imgs_dir, '') name, ext = os.path.splitext(img_path) if name[0] == '/': name = name[1:] alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png') clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png') # save alpha mkdir(alpha_save_path) cv2.imwrite(alpha_save_path, alpha) # save clip image mkdir(clip_save_path) alpha = alpha[:, :, np.newaxis] clip = np.concatenate([ori_img, alpha], axis=-1) cv2.imwrite(clip_save_path, clip)
def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need use_gpu == True. """ if not use_gpu and not run_mode == 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" .format(run_mode, use_gpu)) use_calib_mode = True if run_mode == 'trt_int8' else False config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if use_gpu: # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) else: config.disable_gpu() if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=use_calib_mode) if use_dynamic_shape: print('use_dynamic_shape') min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]} max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]} opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]} config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor
class Predictor: def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() if args.device == 'cpu': self._init_cpu_config() else: self._init_gpu_config() self.predictor = create_predictor(self.pred_cfg) def _init_base_config(self): self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True) def _init_cpu_config(self): """ Init the config for x86 cpu. """ logger.info("Using CPU") self.pred_cfg.disable_gpu() if self.args.enable_mkldnn: logger.info("Using MKLDNN") # cache 1- different shapes for mkldnn self.pred_cfg.set_mkldnn_cache_capacity(10) self.pred_cfg.enable_mkldnn() self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads) def _init_gpu_config(self): """ Init the config for nvidia gpu. """ logger.info("using GPU") self.pred_cfg.enable_use_gpu(100, 0) def run(self, imgs, trimaps=None, imgs_dir=None): self.imgs_dir = imgs_dir num = len(imgs) input_names = self.predictor.get_input_names() input_handle = {} for i in range(len(input_names)): input_handle[input_names[i]] = self.predictor.get_input_handle( input_names[i]) output_names = self.predictor.get_output_names() output_handle = self.predictor.get_output_handle(output_names[0]) args = self.args for i in tqdm.tqdm(range(0, num, args.batch_size)): img_inputs = [] if trimaps is not None: trimap_inputs = [] trans_info = [] for j in range(i, i + args.batch_size): img = imgs[i] trimap = trimaps[i] if trimaps is not None else None data = self._preprocess(img=img, trimap=trimap) img_inputs.append(data['img']) if trimaps is not None: trimap_inputs.append(data['trimap'][np.newaxis, :, :]) trans_info.append(data['trans_info']) img_inputs = np.array(img_inputs) if trimaps is not None: trimap_inputs = (np.array(trimap_inputs)).astype('float32') input_handle['img'].copy_from_cpu(img_inputs) if trimaps is not None: input_handle['trimap'].copy_from_cpu(trimap_inputs) self.predictor.run() results = output_handle.copy_to_cpu() results = results.squeeze(1) for j in range(args.batch_size): trimap = trimap_inputs[j] if trimaps is not None else None result = self._postprocess( results[j], trans_info[j], trimap=trimap) self._save_imgs(result, imgs[i + j]) logger.info("Finish") def _preprocess(self, img, trimap=None): data = {} data['img'] = img if trimap is not None: data['trimap'] = trimap data['gt_fields'] = ['trimap'] data = self.cfg.transforms(data) return data def _postprocess(self, alpha, trans_info, trimap=None): """recover pred to origin shape""" if trimap is not None: trimap = trimap.squeeze(0) alpha[trimap == 0] = 0 alpha[trimap == 255] = 1 for item in trans_info[::-1]: if item[0] == 'resize': h, w = item[1][0], item[1][1] alpha = cv2.resize( alpha, (w, h), interpolation=cv2.INTER_LINEAR) elif item[0] == 'padding': h, w = item[1][0], item[1][1] alpha = alpha[:, :, 0:h, 0:w] else: raise Exception("Unexpected info '{}' in im_info".format( item[0])) return alpha def _save_imgs(self, alpha, img_path): ori_img = cv2.imread(img_path) alpha = (alpha * 255).astype('uint8') if self.imgs_dir is not None: img_path = img_path.replace(self.imgs_dir, '') name, ext = os.path.splitext(img_path) if name[0] == '/': name = name[1:] alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png') clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png') # save alpha mkdir(alpha_save_path) cv2.imwrite(alpha_save_path, alpha) # save clip image mkdir(clip_save_path) alpha = alpha[:, :, np.newaxis] clip = np.concatenate([ori_img, alpha], axis=-1) cv2.imwrite(clip_save_path, clip)
def load_predictor(model_dir, run_mode='fluid', batch_size=1, device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need device == 'GPU'. """ if device != 'GPU' and run_mode != 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) elif device == 'XPU': config.enable_lite_engine() config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: print( "The current environment does not support `mkldnn`, so disable mkldnn." ) pass precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=trt_calib_mode) if use_dynamic_shape: min_input_shape = { 'image': [batch_size, 3, trt_min_shape, trt_min_shape] } max_input_shape = { 'image': [batch_size, 3, trt_max_shape, trt_max_shape] } opt_input_shape = { 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config
class Predictor: def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() if args.device == 'cpu': self._init_cpu_config() else: self._init_gpu_config() self.predictor = create_predictor(self.pred_cfg) if hasattr(args, 'benchmark') and args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger(model_name=args.model_name, model_precision=args.precision, batch_size=args.batch_size, data_shape="dynamic", save_path=None, inference_config=self.pred_cfg, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger) def _init_base_config(self): self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True) def _init_cpu_config(self): """ Init the config for x86 cpu. """ logger.info("Use CPU") self.pred_cfg.disable_gpu() if self.args.enable_mkldnn: logger.info("Use MKLDNN") # cache 10 different shapes for mkldnn self.pred_cfg.set_mkldnn_cache_capacity(10) self.pred_cfg.enable_mkldnn() self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads) def _init_gpu_config(self): """ Init the config for nvidia gpu. """ logger.info("Use GPU") self.pred_cfg.enable_use_gpu(100, 0) precision_map = { "fp16": PrecisionType.Half, "fp32": PrecisionType.Float32, "int8": PrecisionType.Int8 } precision_mode = precision_map[self.args.precision] if self.args.use_trt: logger.info("Use TRT") self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=50, precision_mode=precision_mode, use_static=False, use_calib_mode=False) if use_auto_tune(self.args) and \ os.path.exists(self.args.auto_tuned_shape_file): logger.info("Use auto tuned dynamic shape") allow_build_at_runtime = True self.pred_cfg.enable_tuned_tensorrt_dynamic_shape( self.args.auto_tuned_shape_file, allow_build_at_runtime) else: logger.info("Use manual set dynamic shape") min_input_shape = {"x": [1, 3, 100, 100]} max_input_shape = {"x": [1, 3, 2000, 3000]} opt_input_shape = {"x": [1, 3, 512, 1024]} self.pred_cfg.set_trt_dynamic_shape_info( min_input_shape, max_input_shape, opt_input_shape) def run(self, imgs): if not isinstance(imgs, (list, tuple)): imgs = [imgs] num = len(imgs) input_names = self.predictor.get_input_names() input_handle = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() output_handle = self.predictor.get_output_handle(output_names[0]) results = [] args = self.args if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) for i in range(0, num, args.batch_size): if args.benchmark: self.autolog.times.start() data = np.array( [self._preprocess(img) for img in imgs[i:i + args.batch_size]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) if args.benchmark: self.autolog.times.stamp() self.predictor.run() results = output_handle.copy_to_cpu() if args.benchmark: self.autolog.times.stamp() results = self._postprocess(results) if args.benchmark: self.autolog.times.end(stamp=True) self._save_imgs(results, imgs) logger.info("Finish") def _preprocess(self, img): return self.cfg.transforms(img)[0] def _postprocess(self, results): if self.args.with_argmax: results = np.argmax(results, axis=1) return results def _save_imgs(self, results, imgs): for i in range(results.shape[0]): result = get_pseudo_color_map(results[i]) basename = os.path.basename(imgs[i]) basename, _ = os.path.splitext(basename) basename = f'{basename}.png' result.save(os.path.join(self.args.save_dir, basename))