예제 #1
0
def auto_tune(args, dataset, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        dataset(dataset): an dataset.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    num = min(len(dataset), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for idx, (img, _) in enumerate(dataset):
        data = np.array([img])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

        if idx + 1 >= num:
            break

    logger.info("Auto tune success.\n")
예제 #2
0
def auto_tune(args, imgs, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        imgs(str, list[str]): the path for images.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    if not isinstance(imgs, (list, tuple)):
        imgs = [imgs]
    num = min(len(imgs), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for i in range(0, num):
        data = np.array([cfg.transforms(imgs[i])[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

    logger.info("Auto tune success.\n")
예제 #3
0
def collect_dynamic_shape(args):

    if not is_support_collecting():
        logger.error("The Paddle does not support collecting dynamic shape, " \
            "please reinstall the PaddlePaddle (latest gpu version).")

    # prepare config
    cfg = DeployConfig(args.config)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(1000, 0)
    pred_cfg.collect_shape_range_info(args.dynamic_shape_path)

    # create predictor
    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    # get images
    img_path_list, _ = get_image_list(args.image_path)
    if not isinstance(img_path_list, (list, tuple)):
        img_path_list = [img_path_list]
    logger.info(f"The num of images is {len(img_path_list)} \n")

    # collect
    progbar_val = progbar.Progbar(target=len(img_path_list))
    for idx, img_path in enumerate(img_path_list):
        data = np.array([cfg.transforms(img_path)[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)

        try:
            predictor.run()
        except:
            logger.info(
                "Fail to collect dynamic shape. Usually, the error is out of "
                "GPU memory, for the model and image are too large.\n")
            del predictor
            if os.path.exists(args.dynamic_shape_path):
                os.remove(args.dynamic_shape_path)

        progbar_val.update(idx + 1)

    logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
예제 #4
0
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

            if self.args.use_trt:
                ptype = PrecisionType.Int8 if args.use_int8 else PrecisionType.Float32
                pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                max_batch_size=1,
                                                min_subgraph_size=3,
                                                precision_mode=ptype,
                                                use_static=False,
                                                use_calib_mode=False)

        self.predictor = create_predictor(pred_cfg)
예제 #5
0
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args
        self.compose = T.Compose(self.cfg.transforms)
        resize_h, resize_w = args.input_shape

        self.disflow = cv2.DISOpticalFlow_create(
            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
        self.prev_gray = np.zeros((resize_h, resize_w), np.uint8)
        self.prev_cfd = np.zeros((resize_h, resize_w), np.float32)
        self.is_init = True

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

        self.predictor = create_predictor(pred_cfg)
        if self.args.test_speed:
            self.cost_averager = TimeAverager()
예제 #6
0
 def _init_base_config(self):
     self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
     if not self.args.print_detail:
         self.pred_cfg.disable_glog_info()
     self.pred_cfg.enable_memory_optim()
     self.pred_cfg.switch_ir_optim(True)