def create_predictor(args, mode, model_path): model_dir = model_path model_file_path = model_dir + "/model" params_file_path = model_dir + "/params" assert os.path.exists(model_file_path) assert os.path.exists(params_file_path) config = AnalysisConfig(model_file_path, params_file_path) # use CPU config.disable_gpu() config.set_cpu_math_library_num_threads(6) if args['enable_mkldnn']: config.enable_mkldnn() # config.enable_memory_optim() config.disable_glog_info() if args['use_zero_copy_run']: config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) else: config.switch_use_feed_fetch_ops(True) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() for name in input_names: input_tensor = predictor.get_input_tensor(name) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def _set_config(self): """ predictor config setting """ model_file_path = os.path.join(self.pretrained_model_path, 'model') params_file_path = os.path.join(self.pretrained_model_path, 'params') config = AnalysisConfig(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: config.enable_use_gpu(8000, 0) else: config.disable_gpu() config.disable_glog_info() # use zero copy config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) self.predictor = create_paddle_predictor(config) input_names = self.predictor.get_input_names() self.input_tensor = self.predictor.get_input_tensor(input_names[0]) output_names = self.predictor.get_output_names() self.output_tensors = [] for output_name in output_names: output_tensor = self.predictor.get_output_tensor(output_name) self.output_tensors.append(output_tensor)
def _set_config(self): """ predictor config setting """ self.model_file_path = self.default_pretrained_model_path cpu_config = AnalysisConfig(self.model_file_path) cpu_config.disable_glog_info() cpu_config.switch_ir_optim(True) cpu_config.enable_memory_optim() cpu_config.switch_use_feed_fetch_ops(False) cpu_config.switch_specify_input_names(True) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.model_file_path) gpu_config.disable_glog_info() gpu_config.switch_ir_optim(True) gpu_config.enable_memory_optim() gpu_config.switch_use_feed_fetch_ops(False) gpu_config.switch_specify_input_names(True) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(100, 0) self.gpu_predictor = create_paddle_predictor(gpu_config)
def load_model(self, modelpath, use_gpu): # 对运行位置进行配置 if use_gpu: try: places = os.environ["CUDA_VISIBLE_DEVICES"] places = int(places[0]) except Exception as e: print( 'Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) use_gpu = False # 加载模型参数 config = AnalysisConfig(modelpath) # 设置参数 if use_gpu: config.enable_use_gpu(100, places) else: config.disable_gpu() config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(True) config.enable_memory_optim() config.switch_use_feed_fetch_ops(False) config.switch_specify_input_names(True) # 通过参数加载模型预测器 predictor = create_paddle_predictor(config) # 返回预测器 return predictor
def _set_config(self): """ predictor config setting """ # encoder cpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) cpu_config_enc.disable_glog_info() cpu_config_enc.disable_gpu() self.cpu_predictor_enc = create_paddle_predictor(cpu_config_enc) # decoder cpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) cpu_config_dec.disable_glog_info() cpu_config_dec.disable_gpu() self.cpu_predictor_dec = create_paddle_predictor(cpu_config_dec) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: # encoder gpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) gpu_config_enc.disable_glog_info() gpu_config_enc.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor_enc = create_paddle_predictor(gpu_config_enc) # decoder gpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) gpu_config_dec.disable_glog_info() gpu_config_dec.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor_dec = create_paddle_predictor(gpu_config_dec)
def _set_config(self): """ predictor config setting """ self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') self.params_file_path = os.path.join( self.default_pretrained_model_path, '__params__') cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_paddle_predictor(gpu_config)
def __init__(self, model_dir, label_id_path, vocab_path, gpu_id=None, gpu_mem=8000, zero_copy=True): self.tokenizer = ErnieTokenizer.from_pretrained(vocab_path) self.id_2_token = {v: k for k, v in self.tokenizer.vocab.items()} label_encoder = LabelEncoder(label_id_info=label_id_path, isFile=True) self.id_label_dict = label_encoder.id_label_dict # 设置AnalysisConfig config = AnalysisConfig(model_dir) if gpu_id is None: config.disable_gpu() else: config.enable_use_gpu(gpu_mem, gpu_id) logging.info("gpu id: {}".format(config.gpu_device_id())) self.zero_copy = zero_copy if self.zero_copy: config.switch_use_feed_fetch_ops(False) # 创建PaddlePredictor self.predictor = create_paddle_predictor(config) if self.zero_copy: input_names = self.predictor.get_input_names() #logging.info(input_names) self.input_tensor = self.predictor.get_input_tensor(input_names[0]) output_names = self.predictor.get_output_names() #logging.info(output_names) self.output_tensor = self.predictor.get_output_tensor(output_names[0])
def predict(args): # config AnalysisConfig config = AnalysisConfig(args.model_file, args.params_file) if args.gpu_id < 0: config.disable_gpu() else: config.enable_use_gpu(args.gpu_mem, args.gpu_id) # you can enable tensorrt engine if paddle is installed with tensorrt # config.enable_tensorrt_engine() predictor = create_paddle_predictor(config) # input inputs = preprocess_image(args.image_path) inputs = PaddleTensor(inputs) # predict outputs = predictor.run([inputs]) # get output output = outputs[0] output = output.as_ndarray().flatten() cls = np.argmax(output) score = output[cls] logger.info("class: {0}".format(cls)) logger.info("score: {0}".format(score)) return
def _set_config(self): # predictor config setting. cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_paddle_predictor(gpu_config) # model config setting. if not self.model_config: with open(os.path.join(self.directory, 'config.yml')) as fp: self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) self.multi_box_head_config = self.model_config['MultiBoxHead'] self.output_decoder_config = self.model_config['SSDOutputDecoder'] self.mobilenet_config = self.model_config['MobileNet']
def _get_analysis_config(self, use_gpu=False, use_trt=False, use_mkldnn=False): ''' Return a new object of AnalysisConfig. ''' config = AnalysisConfig(os.path.join(self.path, "model"), os.path.join(self.path, "params")) config.disable_gpu() config.switch_specify_input_names(True) config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(False) if use_gpu: config.enable_use_gpu(100, 0) if use_trt: config.enable_tensorrt_engine( self.trt_parameters.workspace_size, self.trt_parameters.max_batch_size, self.trt_parameters.min_subgraph_size, self.trt_parameters.precision, self.trt_parameters.use_static, self.trt_parameters.use_calib_mode) elif use_mkldnn: config.enable_mkldnn() return config
def create_predictor(mode): if mode == "detect": model_file_path = config.det_model_dir params_file_path = config.det_param_dir else: model_file_path = config.rec_model_dir params_file_path = config.rec_param_dir an_config = AnalysisConfig(model_file_path, params_file_path) if config.use_gpu: an_config.enable_use_gpu(config.gpu_mem, 0) else: an_config.disable_gpu() an_config.disable_glog_info() # use zero copy an_config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") an_config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(an_config) input_names = predictor.get_input_names() input_tensor = predictor.get_input_tensor(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def create_predictor(mode): """ create predictor for inference :param args: params for prediction engine :param mode: mode :return: predictor """ if mode == "det": model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_det_infer" elif mode == 'cls': model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_cls_infer" elif mode == 'rec': model_dir = "./src/ai/ocr_paddle/inference/ch_ppocr_mobile_v1.1_rec_infer" else: raise ValueError( "'mode' of create_predictor() can only be one of ['det', 'cls', 'rec']" ) if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) model_file_path = model_dir + "/model" params_file_path = model_dir + "/params" if not os.path.exists(model_file_path): logger.info("not find model file path {}".format(model_file_path)) sys.exit(0) if not os.path.exists(params_file_path): logger.info("not find params file path {}".format(params_file_path)) sys.exit(0) config = AnalysisConfig(model_file_path, params_file_path) config.disable_gpu() config.set_cpu_math_library_num_threads(6) enable_mkldnn = False if enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() # config.enable_memory_optim() config.disable_glog_info() use_zero_copy_run = False if use_zero_copy_run: config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) else: config.switch_use_feed_fetch_ops(True) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() for name in input_names: input_tensor = predictor.get_input_tensor(name) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def main(): args = parse_args() model_file = args.model_dir + "/__model__" params_file = args.model_dir + "/params" config = AnalysisConfig(model_file, params_file) config.disable_gpu() predictor = create_paddle_predictor(config) test_image(predictor, args.image_path)
def create_predictor(args, mode, logger): if mode == "det": model_dir = args.det_model_dir elif mode == 'cls': model_dir = args.cls_model_dir else: model_dir = args.rec_model_dir if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) model_file_path = model_dir + "/inference.pdmodel" params_file_path = model_dir + "/inference.pdiparams" if not os.path.exists(model_file_path): logger.info("not find model file path {}".format(model_file_path)) sys.exit(0) if not os.path.exists(params_file_path): logger.info("not find params file path {}".format(params_file_path)) sys.exit(0) config = AnalysisConfig(model_file_path, params_file_path) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=AnalysisConfig.Precision.Half if args.use_fp16 else AnalysisConfig.Precision.Float32, max_batch_size=args.max_batch_size) else: config.disable_gpu() config.set_cpu_math_library_num_threads(6) if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() # config.enable_memory_optim() config.disable_glog_info() if args.use_zero_copy_run: config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) else: config.switch_use_feed_fetch_ops(True) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() for name in input_names: input_tensor = predictor.get_input_tensor(name) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def __init__(self): """ create predictor manager """ self.get_predictor_timeout = float( config.get('get.predictor.timeout', default_value=0.5)) predictor_count = 0 enable_mkl = False gpu_memory = 200 gpu_device_ids = [] model_dir = config.get('model.dir') device_type = config.get('device.type') if device_type == PredictorManager.CPU_DEVICE: cpu_predictor_count = int( config.getint('cpu.predictor.count', default_value=0)) predictor_count = cpu_predictor_count enable_mkl = config.getboolean('cpu.enable_mkl', default_value=False) elif device_type == PredictorManager.GPU_DEVICE: gpu_predictor_count = int( config.getint('gpu.predictor.count', default_value=0)) predictor_count = gpu_predictor_count gpu_memory = config.getint('gpu.predictor.memory', default_value=200) gpu_device_ids = config.get('gpu.predictor.device.id').split(',') gpu_device_ids = map(int, gpu_device_ids) if PYTHON_VERSION == 3: gpu_device_ids = list(gpu_device_ids) assert len( gpu_device_ids ) == gpu_predictor_count, "gpu predictor count doesn't match device count" else: raise Exception("no device to run predictor!") assert predictor_count > 0, "no device to predict" logger.info( "device type:{} predictor count:{} model dir:{} get predictor timeout:{}s" .format(device_type, predictor_count, model_dir, self.get_predictor_timeout)) self.predictor_queue = Queue(maxsize=predictor_count) for i in range(predictor_count): # Set config predictor_config = AnalysisConfig(model_dir) # predictor_config.specify_input_name() if device_type == PredictorManager.CPU_DEVICE: predictor_config.disable_gpu() if enable_mkl: predictor_config.enable_mkldnn() else: device_id = gpu_device_ids[i] predictor_config.enable_use_gpu(gpu_memory, device_id) # Create PaddlePredictor predictor = create_paddle_predictor(predictor_config) self.predictor_queue.put(predictor)
def set_config(self, model_flie, params_file, use_feed_fetch_ops, specify_input_names): config = AnalysisConfig(model_flie, params_file) config.disable_gpu() config.enable_mkldnn() config.disable_glog_info() config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(use_feed_fetch_ops) config.switch_specify_input_names(specify_input_names) return config
def load_model(self, model_dir, roll_back=False): print("load_model==>", model_dir) config = AnalysisConfig(model_dir) #不启动cpu config.disable_gpu() #把老的模型留存 if self.predictor and roll_back: self.histroy.push(self.predictor) #创建预测 self.predictor = create_paddle_predictor(config) return self.predictor
def __load_inference_model(self, model_path, use_gpu): """ :param meta_path: :return: """ check_cuda(use_gpu) config = AnalysisConfig(model_path + "/" + "model", model_path + "/" + "params") if use_gpu: config.enable_use_gpu(1024) else: config.disable_gpu() config.enable_mkldnn() inference = create_paddle_predictor(config.to_native_config()) return inference
def create_predictor(args, mode): if mode == "det": model_dir = args.det_model_dir else: model_dir = args.rec_model_dir if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) model_file_path = model_dir + "/model" params_file_path = model_dir + "/params" if not os.path.exists(model_file_path): logger.info("not find model file path {}".format(model_file_path)) sys.exit(0) if not os.path.exists(params_file_path): logger.info("not find params file path {}".format(params_file_path)) sys.exit(0) config = AnalysisConfig(model_file_path, params_file_path) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() config.set_cpu_math_library_num_threads(6) if args.enable_mkldnn: config.enable_mkldnn() #config.enable_memory_optim() config.disable_glog_info() if args.use_zero_copy_run: config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) else: config.switch_use_feed_fetch_ops(True) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() for name in input_names: input_tensor = predictor.get_input_tensor(name) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def create_predictor(args, mode): if mode == "det": model_dir = args.det_model_dir else: model_dir = args.rec_model_dir if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) model_file_path = model_dir + "/model" params_file_path = model_dir + "/params" if not os.path.exists(model_file_path): logger.info("not find model file path {}".format(model_file_path)) sys.exit(0) if not os.path.exists(params_file_path): logger.info("not find params file path {}".format(params_file_path)) sys.exit(0) config = AnalysisConfig(model_file_path, params_file_path) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # if args.use_tensorrt: # config.enable_tensorrt_engine( # precision_mode=AnalysisConfig.Precision.Half # if args.use_fp16 else AnalysisConfig.Precision.Float32, # max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() input_tensor = predictor.get_input_tensor(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def _get_analysis_config(self, use_gpu=False, use_trt=False, use_mkldnn=False): ''' Return a new object of AnalysisConfig. ''' config = AnalysisConfig(self.path) config.disable_gpu() config.switch_specify_input_names(True) config.switch_ir_optim(True) config.switch_use_feed_fetch_ops(False) if use_gpu: config.enable_use_gpu(100, 0) if use_trt: config.enable_tensorrt_engine( self.trt_parameters.workspace_size, self.trt_parameters.max_batch_size, self.trt_parameters.min_subgraph_size, self.trt_parameters.precision, self.trt_parameters.use_static, self.trt_parameters.use_calib_mode) if self.trt_parameters.use_inspector: config.enable_tensorrt_inspector() self.assertTrue( config.tensorrt_inspector_enabled(), "The inspector option is not set correctly.") if self.dynamic_shape_params: config.set_trt_dynamic_shape_info( self.dynamic_shape_params.min_input_shape, self.dynamic_shape_params.max_input_shape, self.dynamic_shape_params.optim_input_shape, self.dynamic_shape_params.disable_trt_plugin_fp16) if self.enable_tensorrt_varseqlen: config.enable_tensorrt_varseqlen() elif use_mkldnn: config.enable_mkldnn() if self.enable_mkldnn_bfloat16: config.enable_mkldnn_bfloat16() print('config summary:', config.summary()) return config
def _set_config(self): """ predictor config setting. """ cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) self.cpu_predictor = create_paddle_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: gpu_config = AnalysisConfig(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_paddle_predictor(gpu_config)
def set_config(self, model_path, num_threads, mkldnn_cache_capacity, warmup_data=None, use_analysis=False, enable_ptq=False): config = AnalysisConfig(model_path) config.set_cpu_math_library_num_threads(num_threads) if use_analysis: config.disable_gpu() config.switch_use_feed_fetch_ops(True) config.switch_ir_optim(True) config.enable_mkldnn() config.set_mkldnn_cache_capacity(mkldnn_cache_capacity) if enable_ptq: # This pass to work properly, must be added before fc_fuse_pass config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass") config.enable_quantizer() config.quantizer_config().set_quant_data(warmup_data) config.quantizer_config().set_quant_batch_size(1) return config
def create_predictor(args): config = AnalysisConfig(args.model_file, args.params_file) if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) else: config.disable_gpu() config.disable_glog_info() config.switch_ir_optim(args.ir_optim) # default true if args.use_tensorrt: config.enable_tensorrt_engine( precision_mode=AnalysisConfig.Precision.Half if args.use_fp16 else AnalysisConfig.Precision.Float32, max_batch_size=args.batch_size) config.enable_memory_optim() # use zero copy config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(config) return predictor
def _load_model_and_set_config(self): ''' load model from file and set analysis config ''' if os.path.exists(os.path.join(self.model_path, self.params_file)): config = AnalysisConfig( os.path.join(self.model_path, "__model__"), os.path.join(self.model_path, self.params_file)) else: config = AnalysisConfig(os.path.join(self.model_path)) if fluid.is_compiled_with_cuda(): config.enable_use_gpu(100, 0) else: config.disable_gpu() config.switch_specify_input_names(True) config.switch_use_feed_fetch_ops(False) config.enable_memory_optim() config.disable_glog_info() config.switch_ir_optim(True) return config
def _set_config(self, pretrained_model_path): """ predictor config path """ model_file_path = os.path.join(pretrained_model_path, 'model') params_file_path = os.path.join(pretrained_model_path, 'params') config = AnalysisConfig(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: config.enable_use_gpu(8000, 0) else: config.disable_gpu() if self.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.disable_glog_info() config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) predictor = create_paddle_predictor(config) input_names = predictor.get_input_names() input_tensor = predictor.get_input_tensor(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: output_tensor = predictor.get_output_tensor(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors
def _set_config(self): """ predictor config setting """ model_file_path = os.path.join(self.infer_model_path, 'model') params_file_path = os.path.join(self.infer_model_path, 'params') config = AnalysisConfig(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True except: use_gpu = False if use_gpu: config.enable_use_gpu(8000, 0) else: config.disable_gpu() config.disable_glog_info() self.predictor = create_paddle_predictor(config)
def main(): """Predictor main""" args = parse_args() config = AnalysisConfig(args.model_files_path) config.disable_gpu() config.enable_profile() # config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num) predictor = create_paddle_predictor(config) tdm_model = TdmInferNet(args) first_layer_node = tdm_model.first_layer_node first_layer_nums = len(first_layer_node) first_layer_node = np.array(first_layer_node) first_layer_node = first_layer_node.reshape((1, -1)).astype('int64') first_layer_node = first_layer_node.repeat(args.batch_size, axis=0) first_layer_mask = (np.zeros( (args.batch_size, first_layer_nums))).astype('int64') file_list = [ str(args.test_files_path) + "/%s" % x for x in os.listdir(args.test_files_path) ] test_reader = TDMDataset().infer_reader(file_list, args.batch_size) for batch_id, data in enumerate(test_reader()): input_emb = data2tensor(data) inputs = tdm_input(input_emb, first_layer_node, first_layer_mask) outputs = predictor.run(inputs) output = outputs[0] output_data = output.as_ndarray() logger.info("TEST --> batch: {} infer_item {}".format( batch_id, output_data))
def _load_model_and_set_config(self): ''' load model from file and set analysis config ''' if os.path.exists(os.path.join(self.model_path, self.params_file)): config = AnalysisConfig( os.path.join(self.model_path, self.model_file), os.path.join(self.model_path, self.params_file)) else: config = AnalysisConfig(os.path.join(self.model_path)) if fluid.is_compiled_with_cuda(): config.enable_use_gpu(100, 0) else: config.disable_gpu() config.switch_specify_input_names(True) config.switch_use_feed_fetch_ops(False) config.enable_memory_optim() config.disable_glog_info() # TODO: set it to True after PaddleInference fix the precision error # in CUDA11 config.switch_ir_optim(False) return config
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError("args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model( model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() config.switch_ir_optim(True) else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator( input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] batch_result = output.as_ndarray() for single_example_probs in batch_result: print('\t'.join(map(str, single_example_probs.tolist()))) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))