def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.tune:
        config.collect_shape_range_info(shape_file)
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.use_trt:
            # using dynamic shpae mode, the max_batch_size will be ignored.
            config.enable_tensorrt_engine(workspace_size=1 << 30,
                                          max_batch_size=1,
                                          min_subgraph_size=5,
                                          precision_mode=PrecisionType.Float32,
                                          use_static=False,
                                          use_calib_mode=False)
            if args.tuned_dynamic_shape:
                config.enable_tuned_tensorrt_dynamic_shape(shape_file, True)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor
def auto_tune(args, dataset, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        dataset(dataset): an dataset.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    num = min(len(dataset), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for idx, (img, _) in enumerate(dataset):
        data = np.array([img])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

        if idx + 1 >= num:
            break

    logger.info("Auto tune success.\n")
Exemple #3
0
def auto_tune(args, imgs, img_nums):
    """
    Use images to auto tune the dynamic shape for trt sub graph.
    The tuned shape saved in args.auto_tuned_shape_file.

    Args:
        args(dict): input args.
        imgs(str, list[str]): the path for images.
        img_nums(int): the nums of images used for auto tune.
    Returns:
        None
    """
    logger.info("Auto tune the dynamic shape for GPU TRT.")

    assert use_auto_tune(args)

    if not isinstance(imgs, (list, tuple)):
        imgs = [imgs]
    num = min(len(imgs), img_nums)

    cfg = DeployConfig(args.cfg)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(100, 0)
    if not args.print_detail:
        pred_cfg.disable_glog_info()
    pred_cfg.collect_shape_range_info(args.auto_tuned_shape_file)

    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    for i in range(0, num):
        data = np.array([cfg.transforms(imgs[i])[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        try:
            predictor.run()
        except:
            logger.info(
                "Auto tune fail. Usually, the error is out of GPU memory, "
                "because the model and image is too large. \n")
            del predictor
            if os.path.exists(args.auto_tuned_shape_file):
                os.remove(args.auto_tuned_shape_file)
            return

    logger.info("Auto tune success.\n")
def collect_dynamic_shape(args):

    if not is_support_collecting():
        logger.error("The Paddle does not support collecting dynamic shape, " \
            "please reinstall the PaddlePaddle (latest gpu version).")

    # prepare config
    cfg = DeployConfig(args.config)
    pred_cfg = PredictConfig(cfg.model, cfg.params)
    pred_cfg.enable_use_gpu(1000, 0)
    pred_cfg.collect_shape_range_info(args.dynamic_shape_path)

    # create predictor
    predictor = create_predictor(pred_cfg)
    input_names = predictor.get_input_names()
    input_handle = predictor.get_input_handle(input_names[0])

    # get images
    img_path_list, _ = get_image_list(args.image_path)
    if not isinstance(img_path_list, (list, tuple)):
        img_path_list = [img_path_list]
    logger.info(f"The num of images is {len(img_path_list)} \n")

    # collect
    progbar_val = progbar.Progbar(target=len(img_path_list))
    for idx, img_path in enumerate(img_path_list):
        data = np.array([cfg.transforms(img_path)[0]])
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)

        try:
            predictor.run()
        except:
            logger.info(
                "Fail to collect dynamic shape. Usually, the error is out of "
                "GPU memory, for the model and image are too large.\n")
            del predictor
            if os.path.exists(args.dynamic_shape_path):
                os.remove(args.dynamic_shape_path)

        progbar_val.update(idx + 1)

    logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")
    def get_config(self, model, params, tuned=False):
        config = Config()
        config.set_model_buffer(model, len(model), params, len(params))
        config.enable_use_gpu(100, 0)
        config.set_optim_cache_dir('tuned_test')
        if tuned:
            config.collect_shape_range_info('shape_range.pbtxt')
        else:
            config.enable_tensorrt_engine(
                workspace_size=1024,
                max_batch_size=1,
                min_subgraph_size=0,
                precision_mode=paddle.inference.PrecisionType.Float32,
                use_static=True,
                use_calib_mode=False)
            config.enable_tuned_tensorrt_dynamic_shape('shape_range.pbtxt',
                                                       True)

        return config