def __init__(self, config): super().__init__(config["Global"], config["Global"]["rec_inference_model_dir"]) self.preprocess_ops = create_operators(config["RecPreProcess"][ "transform_ops"]) self.postprocess = build_postprocess(config["RecPostProcess"]) self.benchmark = config["Global"].get("benchmark", False) if self.benchmark: import auto_log pid = os.getpid() self.auto_logger = auto_log.AutoLogger( model_name=config["Global"].get("model_name", "rec"), model_precision='fp16' if config["Global"]["use_fp16"] else 'fp32', batch_size=config["Global"].get("batch_size", 1), data_shape=[3, 224, 224], save_path=config["Global"].get("save_log_path", "./auto_log.log"), inference_config=self.config, pids=pid, process_name=None, gpu_ids=None, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=2)
def __init__(self, config): super().__init__(config["Global"]) self.preprocess_ops = [] self.postprocess = None if "PreProcess" in config: if "transform_ops" in config["PreProcess"]: self.preprocess_ops = create_operators( config["PreProcess"]["transform_ops"]) if "PostProcess" in config: self.postprocess = build_postprocess(config["PostProcess"]) # for whole_chain project to test each repo of paddle self.benchmark = config["Global"].get("benchmark", False) if self.benchmark: import auto_log import os pid = os.getpid() self.auto_logger = auto_log.AutoLogger( model_name=config["Global"].get("model_name", "cls"), model_precision='fp16' if config["Global"]["use_fp16"] else 'fp32', batch_size=config["Global"].get("batch_size", 1), data_shape=[3, 224, 224], save_path=config["Global"].get("save_log_path", "./auto_log.log"), inference_config=self.config, pids=pid, process_name=None, gpu_ids=None, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=2)
def __init__(self, args): self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm postprocess_params = { 'name': 'CTCLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } if self.rec_algorithm == "SRN": postprocess_params = { 'name': 'SRNLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == "RARE": postprocess_params = { 'name': 'AttnLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == 'NRTR': postprocess_params = { 'name': 'NRTRLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == "SAR": postprocess_params = { 'name': 'SARLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, self.config = \ utility.create_predictor(args, 'rec', logger) self.benchmark = args.benchmark self.use_onnx = args.use_onnx if args.benchmark: import auto_log pid = os.getpid() gpu_id = utility.get_infer_gpuid() self.autolog = auto_log.AutoLogger( model_name="rec", model_precision=args.precision, batch_size=args.rec_batch_num, data_shape="dynamic", save_path=None, #args.save_log_path, inference_config=self.config, pids=pid, process_name=None, gpu_ids=gpu_id if args.use_gpu else None, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger)
def main(args): predictor, pred_config = init_predictor(args) place = paddle.set_device('gpu' if args.use_gpu else 'cpu') args.place = place input_names = predictor.get_input_names() output_names = predictor.get_output_names() test_dataloader = create_data_loader(args) if args.benchmark: import auto_log pid = os.getpid() autolog = auto_log.AutoLogger( model_name=args.model_name, model_precision=args.precision, batch_size=args.batchsize, data_shape="dynamic", save_path=args.save_log_path, inference_config=pred_config, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ]) for batch_id, batch_data in enumerate(test_dataloader): batch_data[1] = batch_data[1][0] name_data_pair = dict(zip(input_names, batch_data)) if args.benchmark: autolog.times.start() for name in input_names: input_tensor = predictor.get_input_handle(name) input_tensor.copy_from_cpu(name_data_pair[name].numpy()) if args.benchmark: autolog.times.stamp() predictor.run() for name in output_names: output_tensor = predictor.get_output_handle(name) output_data = output_tensor.copy_to_cpu() results = [] results_type = [] if args.benchmark: autolog.times.stamp() for name in output_names: results_type.append(output_tensor.type()) results.append(output_data[0]) if args.benchmark: autolog.times.end(stamp=True) print(results) if args.benchmark: autolog.report()
def infer_main(args): """infer_main Main inference function. Args: args: Parameters generated using argparser. Returns: class_id: Class index of the input. prob: : Probability of the input. """ inference_engine = InferenceEngine(args) # init benchmark if args.benchmark: import auto_log autolog = auto_log.AutoLogger(model_name="classification", batch_size=args.batch_size, inference_config=inference_engine.config, gpu_ids="auto" if args.use_gpu else None) assert args.batch_size == 1, "batch size just supports 1 now." # enable benchmark if args.benchmark: autolog.times.start() # preprocess img = inference_engine.preprocess(args.img_path) if args.benchmark: autolog.times.stamp() output = inference_engine.run(img) if args.benchmark: autolog.times.stamp() # postprocess class_id, prob = inference_engine.postprocess(output) if args.benchmark: autolog.times.stamp() autolog.times.end(stamp=True) autolog.report() print(f"image_name: {args.img_path}, class_id: {class_id}, prob: {prob}") return class_id, prob
def infer_main(args): """infer_main Main inference function. Args: args: Parameters generated using argparser. Returns: class_id: Class index of the input. prob: : Probability of the input. """ # init inference engine inference_engine = InferenceEngine(args) # init benchmark log if args.benchmark: import auto_log autolog = auto_log.AutoLogger(model_name="example", batch_size=args.batch_size, inference_config=inference_engine.config, gpu_ids="auto" if args.use_gpu else None) # enable benchmark if args.benchmark: autolog.times.start() # preprocess img = inference_engine.preprocess(args.img_path) if args.benchmark: autolog.times.stamp() output = inference_engine.run(img) if args.benchmark: autolog.times.stamp() # postprocess class_id, prob = inference_engine.postprocess(output) if args.benchmark: autolog.times.stamp() autolog.times.end(stamp=True) autolog.report() return class_id, prob
def __init__(self, args): """ Prepare for prediction. The usage and docs of paddle inference, please refer to https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html """ self.args = args self.cfg = DeployConfig(args.cfg) self._init_base_config() if args.device == 'cpu': self._init_cpu_config() else: self._init_gpu_config() self.predictor = create_predictor(self.pred_cfg) if hasattr(args, 'benchmark') and args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger(model_name=args.model_name, model_precision=args.precision, batch_size=args.batch_size, data_shape="dynamic", save_path=None, inference_config=self.pred_cfg, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger)
def __init__(self, model_dir, device="gpu", max_seq_length=128, batch_size=32, use_tensorrt=False, precision="fp32", cpu_threads=10, enable_mkldnn=False): self.max_seq_length = max_seq_length self.batch_size = batch_size model_file = model_dir + "/inference.pdmodel" params_file = model_dir + "/inference.pdiparams" if not os.path.exists(model_file): raise ValueError("not find model file path {}".format(model_file)) if not os.path.exists(params_file): raise ValueError( "not find params file path {}".format(params_file)) config = paddle.inference.Config(model_file, params_file) if device == "gpu": # set GPU configs accordingly # such as intialize the gpu memory, enable tensorrt config.enable_use_gpu(100, 0) precision_map = { "fp16": inference.PrecisionType.Half, "fp32": inference.PrecisionType.Float32, "int8": inference.PrecisionType.Int8 } precision_mode = precision_map[precision] if args.use_tensorrt: config.enable_tensorrt_engine(max_batch_size=batch_size, min_subgraph_size=30, precision_mode=precision_mode) elif device == "cpu": # set CPU configs accordingly, # such as enable_mkldnn, set_cpu_math_library_num_threads config.disable_gpu() if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_threads) elif device == "xpu": # set XPU configs accordingly config.enable_xpu(100) config.switch_use_feed_fetch_ops(False) self.predictor = paddle.inference.create_predictor(config) self.input_handles = [ self.predictor.get_input_handle(name) for name in self.predictor.get_input_names() ] self.output_handle = self.predictor.get_output_handle( self.predictor.get_output_names()[0]) if args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger(model_name="ernie-1.0", model_precision=precision, batch_size=self.batch_size, data_shape="dynamic", save_path=args.save_log_path, inference_config=config, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=0, logger=logger)
def __init__(self, model_dir, device="gpu", max_seq_length=128, batch_size=200, use_tensorrt=False, precision="fp32", enable_mkldnn=False, benchmark=False, save_log_path=""): self.max_seq_length = max_seq_length self.batch_size = batch_size model_file = os.path.join(model_dir, "inference.pdmodel") params_file = os.path.join(model_dir, "inference.pdiparams") if not os.path.exists(model_file): raise ValueError("not find model file path {}".format(model_file)) if not os.path.exists(params_file): raise ValueError( "not find params file path {}".format(params_file)) config = paddle.inference.Config(model_file, params_file) if device == "gpu": # set GPU configs accordingly config.enable_use_gpu(100, 0) precision_map = { "fp16": (inference.PrecisionType.Half, False), "fp32": (inference.PrecisionType.Float32, False), "int8": (inference.PrecisionType.Int8, True) } precision_mode, use_calib_mode = precision_map[precision] if use_tensorrt: config.enable_tensorrt_engine(max_batch_size=batch_size, min_subgraph_size=1, precision_mode=precision_mode, use_calib_mode=use_calib_mode) min_input_shape = { # shape: [B, T, H] "embedding_1.tmp_0": [batch_size, 1, 128], # shape: [T, B, H] "gru_0.tmp_0": [1, batch_size, 256], } max_input_shape = { "embedding_1.tmp_0": [batch_size, 256, 128], "gru_0.tmp_0": [256, batch_size, 256], } opt_input_shape = { "embedding_1.tmp_0": [batch_size, 128, 128], "gru_0.tmp_0": [128, batch_size, 256], } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) elif device == "cpu": # set CPU configs accordingly, # such as enable_mkldnn, set_cpu_math_library_num_threads config.disable_gpu() if enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_threads) elif device == "xpu": # set XPU configs accordingly config.enable_xpu(100) config.switch_use_feed_fetch_ops(False) self.predictor = paddle.inference.create_predictor(config) self.input_handles = [ self.predictor.get_input_handle(name) for name in self.predictor.get_input_names() ] self.output_handle = self.predictor.get_output_handle( self.predictor.get_output_names()[0]) if args.benchmark: import auto_log pid = os.getpid() kwargs = { "model_name": "bigru_crf", "model_precision": precision, "batch_size": self.batch_size, "data_shape": "dynamic", "save_path": save_log_path, "inference_config": config, "pids": pid, "process_name": None, "time_keys": ['preprocess_time', 'inference_time', 'postprocess_time'], "warmup": 0, "logger": logger } if device == "gpu": kwargs["gpu_ids"] = 0 self.autolog = auto_log.AutoLogger(**kwargs)
def __init__(self, args): self.args = args self.det_algorithm = args.det_algorithm pre_process_list = [{ 'DetResizeForTest': { 'limit_side_len': args.det_limit_side_len, 'limit_type': args.det_limit_type, } }, { 'NormalizeImage': { 'std': [0.229, 0.224, 0.225], 'mean': [0.485, 0.456, 0.406], 'scale': '1./255.', 'order': 'hwc' } }, { 'ToCHWImage': None }, { 'KeepKeys': { 'keep_keys': ['image', 'shape'] } }] postprocess_params = {} if self.det_algorithm == "DB": postprocess_params['name'] = 'DBPostProcess' postprocess_params["thresh"] = args.det_db_thresh postprocess_params["box_thresh"] = args.det_db_box_thresh postprocess_params["max_candidates"] = 1000 postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio postprocess_params["use_dilation"] = args.use_dilation postprocess_params["score_mode"] = args.det_db_score_mode elif self.det_algorithm == "EAST": postprocess_params['name'] = 'EASTPostProcess' postprocess_params["score_thresh"] = args.det_east_score_thresh postprocess_params["cover_thresh"] = args.det_east_cover_thresh postprocess_params["nms_thresh"] = args.det_east_nms_thresh elif self.det_algorithm == "SAST": pre_process_list[0] = { 'DetResizeForTest': { 'resize_long': args.det_limit_side_len } } postprocess_params['name'] = 'SASTPostProcess' postprocess_params["score_thresh"] = args.det_sast_score_thresh postprocess_params["nms_thresh"] = args.det_sast_nms_thresh self.det_sast_polygon = args.det_sast_polygon if self.det_sast_polygon: postprocess_params["sample_pts_num"] = 6 postprocess_params["expand_scale"] = 1.2 postprocess_params["shrink_ratio_of_width"] = 0.2 else: postprocess_params["sample_pts_num"] = 2 postprocess_params["expand_scale"] = 1.0 postprocess_params["shrink_ratio_of_width"] = 0.3 else: logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) sys.exit(0) self.preprocess_op = create_operators(pre_process_list) self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor( args, 'det', logger) if args.benchmark: import auto_log pid = os.getpid() self.autolog = auto_log.AutoLogger(model_name="det", model_precision=args.precision, batch_size=1, data_shape="dynamic", save_path=args.save_log_path, inference_config=self.config, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], warmup=10)
def paddle_inference(args): import paddle.inference as paddle_infer config = init_paddle_inference_config(args) predictor = paddle_infer.create_predictor(config) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) if args.benchmark: import auto_log pid = os.getpid() autolog = auto_log.AutoLogger( model_name="det", model_precision='fp32', batch_size=1, data_shape="dynamic", save_path="./output/auto_log.log", inference_config=config, pids=pid, process_name=None, gpu_ids=0, time_keys=[ 'preprocess_time', 'inference_time','postprocess_time' ], warmup=0) img = np.random.uniform(0, 255, [1, 3, 112,112]).astype(np.float32) input_handle.copy_from_cpu(img) for i in range(10): predictor.run() img_list = get_image_file_list(args.image_path) for img_path in img_list: img = cv2.imread(img_path) st = time.time() if args.benchmark: autolog.times.start() # normalize to mean 0.5, std 0.5 img = (img - 127.5) * 0.00784313725 # BGR2RGB img = img[:, :, ::-1] img = img.transpose((2, 0, 1)) img = np.expand_dims(img, 0) img = img.astype('float32') if args.benchmark: autolog.times.stamp() input_handle.copy_from_cpu(img) predictor.run() output_names = predictor.get_output_names() output_handle = predictor.get_output_handle(output_names[0]) output_data = output_handle.copy_to_cpu() if args.benchmark: autolog.times.stamp() autolog.times.end(stamp=True) print('{}\t{}'.format(img_path,json.dumps(output_data.tolist()))) print('paddle inference result: ', output_data.shape) if args.benchmark: autolog.report()