def set_config(args): config = Config(args.model_file, args.params_file) config.enable_lite_engine(PrecisionType.Float32, True) # use lite xpu subgraph config.enable_xpu(10 * 1024 * 1024) # use lite cuda subgraph # config.enable_use_gpu(100, 0) config.set_cpu_math_library_num_threads(args.math_thread_num) return config
def _set_config(self): """ predictor config setting. """ # create default cpu predictor cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) # create predictors using various types of devices # npu npu_id = self._get_device_id("FLAGS_selected_npus") if npu_id != -1: # use npu npu_config = Config(self.default_pretrained_model_path) npu_config.disable_glog_info() npu_config.enable_npu(device_id=npu_id) self.npu_predictor = create_predictor(npu_config) # gpu gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") if gpu_id != -1: # use gpu gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) self.gpu_predictor = create_predictor(gpu_config) # xpu xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") if xpu_id != -1: # use xpu xpu_config = Config(self.default_pretrained_model_path) xpu_config.disable_glog_info() xpu_config.enable_xpu(100) self.xpu_predictor = create_predictor(xpu_config)
def load_predictor(model_dir, run_mode='fluid', batch_size=1, device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt trt_calib_mode (bool): If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True Returns: predictor (PaddlePredictor): AnalysisPredictor Raises: ValueError: predict by TensorRT need device == 'GPU'. """ if device != 'GPU' and run_mode != 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) elif device == 'XPU': config.enable_lite_engine() config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: print( "The current environment does not support `mkldnn`, so disable mkldnn." ) pass precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if run_mode in precision_map.keys(): config.enable_tensorrt_engine(workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=trt_calib_mode) if use_dynamic_shape: min_input_shape = { 'image': [batch_size, 3, trt_min_shape, trt_min_shape] } max_input_shape = { 'image': [batch_size, 3, trt_max_shape, trt_max_shape] } opt_input_shape = { 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config