Python Config.enable_tensorrt_engineの例、paddle.inference.Config.enable_tensorrt_engine Pythonの例

コード例 #1

0

ファイルを表示

def create_paddle_predictor(args):
    config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
    else:
        config.disable_gpu()
        if args.enable_mkldnn:
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()

    #config.disable_glog_info()
    config.switch_ir_optim(args.ir_optim)  # default true
    if args.use_tensorrt:
        config.enable_tensorrt_engine(
            precision_mode=Config.Precision.Half
            if args.use_fp16 else Config.Precision.Float32,
            max_batch_size=args.batch_size)

    config.enable_memory_optim()
    # use zero copy
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)

    return predictor

コード例 #2

0

ファイルを表示

ファイル: infer_tune.py プロジェクト: wangye707/Paddle-Inference-Demo

def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.tune:
        config.collect_shape_range_info(shape_file)
    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.use_trt:
            # using dynamic shpae mode, the max_batch_size will be ignored.
            config.enable_tensorrt_engine(workspace_size=1 << 30,
                                          max_batch_size=1,
                                          min_subgraph_size=5,
                                          precision_mode=PrecisionType.Float32,
                                          use_static=False,
                                          use_calib_mode=False)
            if args.tuned_dynamic_shape:
                config.enable_tuned_tensorrt_dynamic_shape(shape_file, True)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor

コード例 #3

0

ファイルを表示

    def create_paddle_predictor(self, args, inference_model_dir=None):
        if inference_model_dir is None:
            inference_model_dir = args.inference_model_dir
        params_file = os.path.join(inference_model_dir, "inference.pdiparams")
        model_file = os.path.join(inference_model_dir, "inference.pdmodel")
        config = Config(model_file, params_file)

        if args.use_gpu:
            config.enable_use_gpu(args.gpu_mem, 0)
        else:
            config.disable_gpu()
            if args.enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
        config.set_cpu_math_library_num_threads(args.cpu_num_threads)

        if args.enable_profile:
            config.enable_profile()
        config.disable_glog_info()
        config.switch_ir_optim(args.ir_optim)  # default true
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
                precision_mode=Config.Precision.Half
                if args.use_fp16 else Config.Precision.Float32,
                max_batch_size=args.batch_size,
                min_subgraph_size=30)

        config.enable_memory_optim()
        # use zero copy
        config.switch_use_feed_fetch_ops(False)
        predictor = create_predictor(config)

        return predictor, config

コード例 #4

0

ファイルを表示

ファイル: infer.py プロジェクト: Fitz-Fitz/Barricade-Detection-in-Gazebo

def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    if run_mode == 'trt_int8':
        raise ValueError("TensorRT int8 mode is not supported now, "
                         "please use trt_fp32 or trt_fp16 instead.")
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        # FIXME(dkp): ir optimize may prune variable inside graph
        #             and incur error in Paddle 2.0, e.g. in SSDLite
        #             FCOS model, set as False currently and should
        #             be set as True after switch_ir_optim fixed
        config.switch_ir_optim(False)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=False)

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor

コード例 #5

0

ファイルを表示

def init_predictor(args):
    if args.model_dir is not "":
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)
    config.enable_memory_optim()
    config.enable_use_gpu(100, 0)

    # using dynamic shpae mode, the max_batch_size will be ignored.
    config.enable_tensorrt_engine(workspace_size=1 << 30,
                                  max_batch_size=1,
                                  min_subgraph_size=5,
                                  precision_mode=PrecisionType.Float32,
                                  use_static=False,
                                  use_calib_mode=False)

    head_number = 12

    names = [
        "placeholder_0", "placeholder_1", "placeholder_2", "stack_0.tmp_0"
    ]
    min_input_shape = [1, 1, 1]
    max_input_shape = [100, 128, 1]
    opt_input_shape = [10, 60, 1]

    config.set_trt_dynamic_shape_info(
        {
            names[0]: min_input_shape,
            names[1]: min_input_shape,
            names[2]: min_input_shape,
            names[3]: [1, head_number, 1, 1]
        }, {
            names[0]: max_input_shape,
            names[1]: max_input_shape,
            names[2]: max_input_shape,
            names[3]: [100, head_number, 128, 128]
        }, {
            names[0]: opt_input_shape,
            names[1]: opt_input_shape,
            names[2]: opt_input_shape,
            names[3]: [10, head_number, 60, 60]
        })
    predictor = create_predictor(config)
    return predictor

コード例 #6

0

ファイルを表示

ファイル: test_trt_tuned_dynamic_shape.py プロジェクト: wuhuachaocoding/Paddle

    def get_config(self, model, params, tuned=False):
        config = Config()
        config.set_model_buffer(model, len(model), params, len(params))
        config.enable_use_gpu(100, 0)
        config.set_optim_cache_dir('tuned_test')
        if tuned:
            config.collect_shape_range_info('shape_range.pbtxt')
        else:
            config.enable_tensorrt_engine(
                workspace_size=1024,
                max_batch_size=1,
                min_subgraph_size=0,
                precision_mode=paddle.inference.PrecisionType.Float32,
                use_static=True,
                use_calib_mode=False)
            config.enable_tuned_tensorrt_dynamic_shape('shape_range.pbtxt',
                                                       True)

        return config

コード例 #7

0

ファイルを表示

ファイル: infer.py プロジェクト: zhuomingliang/PaddleSeg

    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

            if self.args.use_trt:
                ptype = PrecisionType.Int8 if args.use_int8 else PrecisionType.Float32
                pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                max_batch_size=1,
                                                min_subgraph_size=3,
                                                precision_mode=ptype,
                                                use_static=False,
                                                use_calib_mode=False)

        self.predictor = create_predictor(pred_cfg)

コード例 #8

0

ファイルを表示

def init_predictor(args):
    if args.model_dir:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.enable_tensorRT:
            config.enable_tensorrt_engine(
                max_batch_size=args.batchsize,
                min_subgraph_size=1,
                precision_mode=paddle.inference.PrecisionType.Float32)
    else:
        config.disable_gpu()
        config.delete_pass("repeated_fc_relu_fuse_pass")
        config.set_cpu_math_library_num_threads(args.cpu_threads)
        if args.enable_mkldnn:
            config.enable_mkldnn()
    predictor = create_predictor(config)
    return predictor, config

コード例 #9

0

ファイルを表示

def init_predictor(args):
    if args.model_dir:
        has_model = 0
        pdmodel_name = 0
        pdiparams_name = 0
        for file_name in os.listdir(args.model_dir):
            if re.search("__model__", file_name):
                has_model = 1
            if file_name.endswith(".pdmodel"):
                pdmodel_name = os.path.join(args.model_dir, file_name)
            if file_name.endswith(".pdiparams"):
                pdiparams_name = os.path.join(args.model_dir, file_name)
        if has_model == 1:
            config = Config(args.model_dir)
        elif pdmodel_name and pdiparams_name:
            config = Config(pdmodel_name, pdiparams_name)
        else:
            raise ValueError(
                "config setting error, please check your model path")
    else:
        config = Config(args.model_file, args.params_file)

    if args.use_gpu:
        config.enable_use_gpu(1000, 0)
        if args.enable_tensorRT:
            config.enable_tensorrt_engine(
                max_batch_size=args.batchsize,
                min_subgraph_size=9,
                precision_mode=paddle.inference.PrecisionType.Float32)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(args.cpu_threads)
        if args.enable_mkldnn:
            config.enable_mkldnn()
            config.delete_pass("scale_matmul_fuse_pass")
    predictor = create_predictor(config)
    return predictor, config

コード例 #10

0

ファイルを表示

ファイル: infer_util.py プロジェクト: Archermmt/yolov3_dcn_nv_hackthon2021

    def init_model(self):
        from paddle.inference import Config
        from paddle.inference import PrecisionType
        from paddle.inference import create_predictor

        precision_mode = PrecisionType.Float32
        use_calib_mode = False

        if self.param_type == "fp16":
            precision_mode = PrecisionType.Half
        elif self.param_type == "int8":
            precision_mode = PrecisionType.Int8
            use_calib_mode = True

        mode_path = os.path.join(self.model_dir,"yolov3/__model__")
        param_path = os.path.join(self.model_dir,"yolov3/__params__")
        config = Config(mode_path, param_path)
        config.enable_use_gpu(100, 0)
        config.switch_ir_optim(True)
        size = (self.batch_size * 101) << 20
        config.enable_tensorrt_engine(
            workspace_size= size,
            max_batch_size=self.batch_size,
            min_subgraph_size=3,
            precision_mode=precision_mode,
            use_static=False,
            use_calib_mode=use_calib_mode)
        if not self.debug:
            config.disable_glog_info()
        else:
            config.enable_profile()

        config.enable_memory_optim()
        config.switch_use_feed_fetch_ops(False)
        config.enable_mkldnn()
        #exit(1)
        self.predictor = create_predictor(config)

コード例 #11

0

ファイルを表示

ファイル: det_infer.py プロジェクト: lvjian0706/PaddleDetection

def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
                   cpu_threads=1,
                   enable_mkldnn=False):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
            calibration, trt_calib_mode need to set True
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need device == 'GPU'.
    """
    if device != 'GPU' and run_mode != 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
            .format(run_mode, device))
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    if device == 'GPU':
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
        config.enable_lite_engine()
        config.enable_xpu(10 * 1024 * 1024)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if enable_mkldnn:
            try:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            except Exception as e:
                print(
                    "The current environment does not support `mkldnn`, so disable mkldnn."
                )
                pass

    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor, config

コード例 #12

0

ファイルを表示

ファイル: model_test.py プロジェクト: xbsu/Paddle-Inference-Demo

if __name__ == '__main__':
    args = parse_args()
    assert (args.model_dir != "") or \
            (args.model_file != "" and args.params_file != ""), \
            "Set model path error."
    assert args.img_path != "", "Set img_path error."
    
    # Init config
    if args.model_dir == "":
        config = Config(args.model_file, args.params_file)
    else:
        config = Config(args.model_dir)
    config.enable_use_gpu(500, 0)
    config.switch_ir_optim()
    config.enable_memory_optim()
    config.enable_tensorrt_engine(workspace_size=1 << 30, precision_mode=PrecisionType.Float32,max_batch_size=1, min_subgraph_size=5, use_static=False, use_calib_mode=False)
        
    # Create predictor
    predictor = create_predictor(config)

    # Set input
    img = cv2.imread(args.img_path)
    img = preprocess(img)
    input_names = predictor.get_input_names()
    input_tensor = predictor.get_input_handle(input_names[0])
    input_tensor.reshape(img.shape)
    input_tensor.copy_from_cpu(img.copy())

    # Run
    predictor.run()

コード例 #13

0

ファイルを表示

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Use CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Use MKLDNN")
            # cache 10 different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("Use GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=50,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs):
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]

        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        results = []
        args = self.args

        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        for i in range(0, num, args.batch_size):
            if args.benchmark:
                self.autolog.times.start()
            data = np.array(
                [self._preprocess(img) for img in imgs[i:i + args.batch_size]])

            input_handle.reshape(data.shape)
            input_handle.copy_from_cpu(data)
            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            results = output_handle.copy_to_cpu()
            if args.benchmark:
                self.autolog.times.stamp()

            results = self._postprocess(results)

            if args.benchmark:
                self.autolog.times.end(stamp=True)
            self._save_imgs(results, imgs)

        logger.info("Finish")

    def _preprocess(self, img):
        return self.cfg.transforms(img)[0]

    def _postprocess(self, results):
        if self.args.with_argmax:
            results = np.argmax(results, axis=1)
        return results

    def _save_imgs(self, results, imgs):
        for i in range(results.shape[0]):
            result = get_pseudo_color_map(results[i])
            basename = os.path.basename(imgs[i])
            basename, _ = os.path.splitext(basename)
            basename = f'{basename}.png'
            result.save(os.path.join(self.args.save_dir, basename))

コード例 #14

0

ファイルを表示

class Predictor:
    def __init__(self, args):
        """
        Prepare for prediction.
        The usage and docs of paddle inference, please refer to
        https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
        """
        self.args = args
        self.cfg = DeployConfig(args.cfg)

        self._init_base_config()

        if args.device == 'cpu':
            self._init_cpu_config()
        else:
            self._init_gpu_config()

        self.predictor = create_predictor(self.pred_cfg)

        if hasattr(args, 'benchmark') and args.benchmark:
            import auto_log
            pid = os.getpid()
            self.autolog = auto_log.AutoLogger(model_name=args.model_name,
                                               model_precision=args.precision,
                                               batch_size=args.batch_size,
                                               data_shape="dynamic",
                                               save_path=None,
                                               inference_config=self.pred_cfg,
                                               pids=pid,
                                               process_name=None,
                                               gpu_ids=0,
                                               time_keys=[
                                                   'preprocess_time',
                                                   'inference_time',
                                                   'postprocess_time'
                                               ],
                                               warmup=0,
                                               logger=logger)

    def _init_base_config(self):
        self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        if not self.args.print_detail:
            self.pred_cfg.disable_glog_info()
        self.pred_cfg.enable_memory_optim()
        self.pred_cfg.switch_ir_optim(True)

    def _init_cpu_config(self):
        """
        Init the config for x86 cpu.
        """
        logger.info("Using CPU")
        self.pred_cfg.disable_gpu()
        if self.args.enable_mkldnn:
            logger.info("Using MKLDNN")
            # cache 1- different shapes for mkldnn
            self.pred_cfg.set_mkldnn_cache_capacity(10)
            self.pred_cfg.enable_mkldnn()
        self.pred_cfg.set_cpu_math_library_num_threads(self.args.cpu_threads)

    def _init_gpu_config(self):
        """
        Init the config for nvidia gpu.
        """
        logger.info("using GPU")
        self.pred_cfg.enable_use_gpu(100, 0)
        precision_map = {
            "fp16": PrecisionType.Half,
            "fp32": PrecisionType.Float32,
            "int8": PrecisionType.Int8
        }
        precision_mode = precision_map[self.args.precision]

        if self.args.use_trt:
            logger.info("Use TRT")
            self.pred_cfg.enable_tensorrt_engine(workspace_size=1 << 30,
                                                 max_batch_size=1,
                                                 min_subgraph_size=300,
                                                 precision_mode=precision_mode,
                                                 use_static=False,
                                                 use_calib_mode=False)

            if use_auto_tune(self.args) and \
                os.path.exists(self.args.auto_tuned_shape_file):
                logger.info("Use auto tuned dynamic shape")
                allow_build_at_runtime = True
                self.pred_cfg.enable_tuned_tensorrt_dynamic_shape(
                    self.args.auto_tuned_shape_file, allow_build_at_runtime)
            else:
                logger.info("Use manual set dynamic shape")
                min_input_shape = {"x": [1, 3, 100, 100]}
                max_input_shape = {"x": [1, 3, 2000, 3000]}
                opt_input_shape = {"x": [1, 3, 512, 1024]}
                self.pred_cfg.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

    def run(self, imgs, trimaps=None, imgs_dir=None):
        self.imgs_dir = imgs_dir
        num = len(imgs)
        input_names = self.predictor.get_input_names()
        input_handle = {}

        for i in range(len(input_names)):
            input_handle[input_names[i]] = self.predictor.get_input_handle(
                input_names[i])
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        args = self.args

        for i in tqdm.tqdm(range(0, num, args.batch_size)):
            # warm up
            if i == 0 and args.benchmark:
                for _ in range(5):
                    img_inputs = []
                    if trimaps is not None:
                        trimap_inputs = []
                    trans_info = []
                    for j in range(i, i + args.batch_size):
                        img = imgs[i]
                        trimap = trimaps[i] if trimaps is not None else None
                        data = self._preprocess(img=img, trimap=trimap)
                        img_inputs.append(data['img'])
                        if trimaps is not None:
                            trimap_inputs.append(
                                data['trimap'][np.newaxis, :, :])
                        trans_info.append(data['trans_info'])
                    img_inputs = np.array(img_inputs)
                    if trimaps is not None:
                        trimap_inputs = (
                            np.array(trimap_inputs)).astype('float32')

                    input_handle['img'].copy_from_cpu(img_inputs)
                    if trimaps is not None:
                        input_handle['trimap'].copy_from_cpu(trimap_inputs)
                    self.predictor.run()
                    results = output_handle.copy_to_cpu()

                    results = results.squeeze(1)
                    for j in range(args.batch_size):
                        trimap = trimap_inputs[
                            j] if trimaps is not None else None
                        result = self._postprocess(results[j],
                                                   trans_info[j],
                                                   trimap=trimap)

            # inference
            if args.benchmark:
                self.autolog.times.start()

            img_inputs = []
            if trimaps is not None:
                trimap_inputs = []
            trans_info = []
            for j in range(i, i + args.batch_size):
                img = imgs[i]
                trimap = trimaps[i] if trimaps is not None else None
                data = self._preprocess(img=img, trimap=trimap)
                img_inputs.append(data['img'])
                if trimaps is not None:
                    trimap_inputs.append(data['trimap'][np.newaxis, :, :])
                trans_info.append(data['trans_info'])
            img_inputs = np.array(img_inputs)
            if trimaps is not None:
                trimap_inputs = (np.array(trimap_inputs)).astype('float32')

            input_handle['img'].copy_from_cpu(img_inputs)
            if trimaps is not None:
                input_handle['trimap'].copy_from_cpu(trimap_inputs)

            if args.benchmark:
                self.autolog.times.stamp()

            self.predictor.run()

            if args.benchmark:
                self.autolog.times.stamp()

            results = output_handle.copy_to_cpu()

            results = results.squeeze(1)
            for j in range(args.batch_size):
                trimap = trimap_inputs[j] if trimaps is not None else None
                result = self._postprocess(results[j],
                                           trans_info[j],
                                           trimap=trimap)
                self._save_imgs(result, imgs[i + j])

            if args.benchmark:
                self.autolog.times.end(stamp=True)
        logger.info("Finish")

    def _preprocess(self, img, trimap=None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data = self.cfg.transforms(data)
        return data

    def _postprocess(self, alpha, trans_info, trimap=None):
        """recover pred to origin shape"""
        if trimap is not None:
            trimap = trimap.squeeze(0)
            alpha[trimap == 0] = 0
            alpha[trimap == 255] = 1
        for item in trans_info[::-1]:
            if item[0] == 'resize':
                h, w = item[1][0], item[1][1]
                alpha = cv2.resize(alpha, (w, h),
                                   interpolation=cv2.INTER_LINEAR)
            elif item[0] == 'padding':
                h, w = item[1][0], item[1][1]
                alpha = alpha[:, :, 0:h, 0:w]
            else:
                raise Exception("Unexpected info '{}' in im_info".format(
                    item[0]))
        return alpha

    def _save_imgs(self, alpha, img_path):
        ori_img = cv2.imread(img_path)
        alpha = (alpha * 255).astype('uint8')

        if self.imgs_dir is not None:
            img_path = img_path.replace(self.imgs_dir, '')
        name, ext = os.path.splitext(img_path)
        if name[0] == '/':
            name = name[1:]
        alpha_save_path = os.path.join(args.save_dir, 'alpha/', name + '.png')
        clip_save_path = os.path.join(args.save_dir, 'clip/', name + '.png')

        # save alpha
        mkdir(alpha_save_path)
        cv2.imwrite(alpha_save_path, alpha)

        # save clip image
        mkdir(clip_save_path)
        alpha = alpha[:, :, np.newaxis]
        clip = np.concatenate([ori_img, alpha], axis=-1)
        cv2.imwrite(clip_save_path, clip)

コード例 #15

0

ファイルを表示

ファイル: infer.py プロジェクト: lang-du/fruit_detection

def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   use_gpu=False,
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640):
    """set AnalysisConfig, generate AnalysisPredictor
    Args:
        model_dir (str): root path of __model__ and __params__
        use_gpu (bool): whether use gpu
        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
        use_dynamic_shape (bool): use dynamic shape or not
        trt_min_shape (int): min shape for dynamic shape in trt
        trt_max_shape (int): max shape for dynamic shape in trt
        trt_opt_shape (int): opt shape for dynamic shape in trt
    Returns:
        predictor (PaddlePredictor): AnalysisPredictor
    Raises:
        ValueError: predict by TensorRT need use_gpu == True.
    """
    if not use_gpu and not run_mode == 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
            .format(run_mode, use_gpu))
    use_calib_mode = True if run_mode == 'trt_int8' else False
    config = Config(os.path.join(model_dir, 'model.pdmodel'),
                    os.path.join(model_dir, 'model.pdiparams'))
    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if use_gpu:
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    else:
        config.disable_gpu()

    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(workspace_size=1 << 10,
                                      max_batch_size=batch_size,
                                      min_subgraph_size=min_subgraph_size,
                                      precision_mode=precision_map[run_mode],
                                      use_static=False,
                                      use_calib_mode=use_calib_mode)

        if use_dynamic_shape:
            print('use_dynamic_shape')
            min_input_shape = {'image': [1, 3, trt_min_shape, trt_min_shape]}
            max_input_shape = {'image': [1, 3, trt_max_shape, trt_max_shape]}
            opt_input_shape = {'image': [1, 3, trt_opt_shape, trt_opt_shape]}
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor

コード例 #16

0

ファイルを表示

images_dir = os.path.join(root_path, './dataset/images')
# 均值 方差
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
batch_size = 8
input_h = 608
input_w = 608

mode_path = os.path.join(root_path, "./model/yolov3/__model__")
param_path = os.path.join(root_path, "./model/yolov3/__params__")
config = Config(mode_path, param_path)
config.enable_use_gpu(100, 0)
config.switch_ir_optim(True)
config.enable_tensorrt_engine(workspace_size=1 << 10,
                              max_batch_size=batch_size,
                              min_subgraph_size=3,
                              precision_mode=PrecisionType.Int8,
                              use_static=False,
                              use_calib_mode=True)

config.enable_memory_optim()
config.switch_use_feed_fetch_ops(False)
config.enable_mkldnn()
#config.enable_profile()

predictor = create_predictor(config)
input_names = predictor.get_input_names()
input_img = predictor.get_input_handle(input_names[0])
input_info = predictor.get_input_handle(input_names[1])

for name in os.listdir(images_dir):
    pic_list = list()