def auto_tune(args, dataset, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. dataset(dataset): an dataset. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) num = min(len(dataset), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for idx, (img, _) in enumerate(dataset): data = np.array([img]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return if idx + 1 >= num: break logger.info("Auto tune success.\n")
def auto_tune(args, imgs, img_nums): """ Use images to auto tune the dynamic shape for trt sub graph. The tuned shape saved in args.auto_tuned_shape_file. Args: args(dict): input args. imgs(str, list[str]): the path for images. img_nums(int): the nums of images used for auto tune. Returns: None """ logger.info("Auto tune the dynamic shape for GPU TRT.") assert use_auto_tune(args) if not isinstance(imgs, (list, tuple)): imgs = [imgs] num = min(len(imgs), img_nums) cfg = DeployConfig(args.cfg) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(100, 0) if not args.print_detail: pred_cfg.disable_glog_info() pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file) predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) for i in range(0, num): data = np.array([cfg.transforms(imgs[i])[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Auto tune fail. Usually, the error is out of GPU memory, " "because the model and image is too large. \n") del predictor if os.path.exists(args.auto_tuned_shape_file): os.remove(args.auto_tuned_shape_file) return logger.info("Auto tune success.\n")
def collect_dynamic_shape(args): if not is_support_collecting(): logger.error("The Paddle does not support collecting dynamic shape, " \ "please reinstall the PaddlePaddle (latest gpu version).") # prepare config cfg = DeployConfig(args.config) pred_cfg = PredictConfig(cfg.model, cfg.params) pred_cfg.enable_use_gpu(1000, 0) pred_cfg.collect_shape_range_info(args.dynamic_shape_path) # create predictor predictor = create_predictor(pred_cfg) input_names = predictor.get_input_names() input_handle = predictor.get_input_handle(input_names[0]) # get images img_path_list, _ = get_image_list(args.image_path) if not isinstance(img_path_list, (list, tuple)): img_path_list = [img_path_list] logger.info(f"The num of images is {len(img_path_list)} \n") # collect progbar_val = progbar.Progbar(target=len(img_path_list)) for idx, img_path in enumerate(img_path_list): data = np.array([cfg.transforms(img_path)[0]]) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) try: predictor.run() except: logger.info( "Fail to collect dynamic shape. Usually, the error is out of " "GPU memory, for the model and image are too large.\n") del predictor if os.path.exists(args.dynamic_shape_path): os.remove(args.dynamic_shape_path) progbar_val.update(idx + 1) logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) if self.args.use_trt: ptype = PrecisionType.Int8 if args.use_int8 else PrecisionType.Float32 pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30, max_batch_size=1, min_subgraph_size=3, precision_mode=ptype, use_static=False, use_calib_mode=False) self.predictor = create_predictor(pred_cfg)
def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args self.compose = T.Compose(self.cfg.transforms) resize_h, resize_w = args.input_shape self.disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) self.prev_gray = np.zeros((resize_h, resize_w), np.uint8) self.prev_cfd = np.zeros((resize_h, resize_w), np.float32) self.is_init = True pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) self.predictor = create_predictor(pred_cfg) if self.args.test_speed: self.cost_averager = TimeAverager()
def _init_base_config(self): self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) if not self.args.print_detail: self.pred_cfg.disable_glog_info() self.pred_cfg.enable_memory_optim() self.pred_cfg.switch_ir_optim(True)