def __init__(self): self.uv_client = LocalPredictor() self.uv_client.load_model_config( "user_vector_model/serving_server_dir") # milvus_host = '127.0.0.1' # milvus_port = '19530' self.milvus_client = RecallByMilvus() self.collection_name = 'demo_films'
def get_client(self, concurrency_idx): """ Function get_client is only used for local predictor case, creates one LocalPredictor object, and initializes the paddle predictor by function load_model_config.The concurrency_idx is used to select running devices. Args: concurrency_idx: process/thread index Returns: _local_predictor_client """ #checking the legality of concurrency_idx. device_num = len(self._devices) if device_num <= 0: _LOGGER.error( "device_num must be not greater than 0. devices({})".format( self._devices)) raise ValueError("The number of self._devices error") if concurrency_idx < 0: _LOGGER.error( "concurrency_idx({}) must be one positive number".format( concurrency_idx)) concurrency_idx = 0 elif concurrency_idx >= device_num: concurrency_idx = concurrency_idx % device_num _LOGGER.info("GET_CLIENT : concurrency_idx={}, device_num={}".format( concurrency_idx, device_num)) from paddle_serving_app.local_predict import LocalPredictor if self._local_predictor_client is None: self._local_predictor_client = LocalPredictor() # load model config and init predictor self._local_predictor_client.load_model_config( model_path=self._model_config, use_gpu=self._use_gpu, gpu_id=self._devices[concurrency_idx], use_profile=self._use_profile, thread_num=self._thread_num, mem_optim=self._mem_optim, ir_optim=self._ir_optim, use_trt=self._use_trt, use_lite=self._use_lite, use_xpu=self._use_xpu, precision=self._precision, use_mkldnn=self._use_mkldnn, mkldnn_cache_capacity=self._mkldnn_cache_capacity, mkldnn_op_list=self._mkldnn_op_list, mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, use_ascend_cl=self._use_ascend_cl, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, use_calib=self._use_calib) return self._local_predictor_client
def init_det_debugger(self, det_model_config): self.det_preprocess = Sequential([ ResizeByFactor(32, 960), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( (2, 0, 1)) ]) self.det_client = LocalPredictor() if sys.argv[1] == 'gpu': self.det_client.load_model_config( det_model_config, use_gpu=True, gpu_id=0) elif sys.argv[1] == 'cpu': self.det_client.load_model_config(det_model_config) self.ocr_reader = OCRReader( char_dict_path="../../../ppocr/utils/ppocr_keys_v1.txt")
def _launch_local_predictor(self, gpu): # actually, LocalPredictor is like a server, but it is WebService Request initiator # for WebService it is a Client. # local_predictor only support single-Model DirPath - Type:str # so the input must be self.server_config_dir_paths[0] from paddle_serving_app.local_predict import LocalPredictor self.client = LocalPredictor() if gpu: # if user forget to call function `set_gpus` to set self.gpus. # default self.gpus = [0]. if len(self.gpus) == 0 or self.gpus == ["-1"]: self.gpus = ["0"] # right now, local Predictor only support 1 card. # no matter how many gpu_id is in gpus, we only use the first one. gpu_id = (self.gpus[0].split(","))[0] self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=True, gpu_id=gpu_id) else: self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=False)
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize from paddle_serving_app.local_predict import LocalPredictor import sys debugger = LocalPredictor() debugger.load_model_config(sys.argv[1], gpu=True) seq = Sequential([ File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) ]) image_file = "daisy.jpg" img = seq(image_file) fetch_map = debugger.predict(feed={"image": img}, fetch=["feature_map"])
class OCRService(WebService): def init_det_debugger(self, det_model_config): self.det_preprocess = Sequential([ ResizeByFactor(32, 960), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( (2, 0, 1)) ]) self.det_client = LocalPredictor() if sys.argv[1] == 'gpu': self.det_client.load_model_config( det_model_config, use_gpu=True, gpu_id=0) elif sys.argv[1] == 'cpu': self.det_client.load_model_config(det_model_config) self.ocr_reader = OCRReader( char_dict_path="../../../ppocr/utils/ppocr_keys_v1.txt") def preprocess(self, feed=[], fetch=[]): data = base64.b64decode(feed[0]["image"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape det_img = self.det_preprocess(im) _, new_h, new_w = det_img.shape det_img = det_img[np.newaxis, :] det_img = det_img.copy() det_out = self.det_client.predict( feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ "thresh": 0.3, "box_thresh": 0.5, "max_candidates": 1000, "unclip_ratio": 1.5, "min_size": 3 }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() img_list = [] max_wh_ratio = 0 for i, dtbox in enumerate(dt_boxes): boximg = get_rotate_crop_image(im, dt_boxes[i]) img_list.append(boximg) h, w = boximg.shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) if len(img_list) == 0: return [], [] _, w, h = self.ocr_reader.resize_norm_img(img_list[0], max_wh_ratio).shape imgs = np.zeros((len(img_list), 3, w, h)).astype('float32') for id, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[id] = norm_img feed = {"x": imgs.copy()} fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) res = {"res": res_lst} return res
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize from paddle_serving_app.local_predict import LocalPredictor import sys predictor = LocalPredictor() predictor.load_model_config(sys.argv[1], use_lite=True, use_xpu=True, ir_optim=True) seq = Sequential([ File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) ])
class LocalServiceHandler(object): """ LocalServiceHandler is the processor of the local service, contains three client types, brpc, grpc and local_predictor.If you use the brpc or grpc, serveing startup ability is provided.If you use local_predictor, local predict ability is provided by paddle_serving_app. """ def __init__(self, model_config, client_type='local_predictor', workdir="", thread_num=2, device_type=-1, devices="", fetch_names=None, mem_optim=True, ir_optim=False, available_port_generator=None, use_profile=False, precision="fp32", use_mkldnn=False, mkldnn_cache_capacity=0, mkldnn_op_list=None, mkldnn_bf16_op_list=None, min_subgraph_size=3, dynamic_shape_info={}, use_calib=False): """ Initialization of localservicehandler Args: model_config: model config path client_type: brpc, grpc and local_predictor[default] workdir: work directory thread_num: number of threads, concurrent quantity. device_type: support multiple devices. -1=Not set, determined by `devices`. 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu devices: gpu id list[gpu], "" default[cpu] fetch_names: get fetch names out of LocalServiceHandler in local_predictor mode. fetch_names_ is compatible for Client(). mem_optim: use memory/graphics memory optimization, True default. ir_optim: use calculation chart optimization, False default. available_port_generator: generate available ports use_profile: use profiling, False default. precision: inference precesion, e.g. "fp32", "fp16", "int8" use_mkldnn: use mkldnn, default False. mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit. mkldnn_op_list: OP list optimized by mkldnn, None default. mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default. use_calib: set inference use_calib_mode param, False default. Returns: None """ if available_port_generator is None: available_port_generator = util.GetAvailablePortGenerator() self._model_config = model_config self._port_list = [] self._device_name = "cpu" self._use_gpu = False self._use_trt = False self._use_lite = False self._use_xpu = False self._use_ascend_cl = False self._use_mkldnn = False self._mkldnn_cache_capacity = 0 self._mkldnn_op_list = None self._mkldnn_bf16_op_list = None self.min_subgraph_size = 3 self.dynamic_shape_info = {} self._use_calib = False if device_type == -1: # device_type is not set, determined by `devices`, if devices == "": # CPU self._device_name = "cpu" devices = [-1] else: # GPU self._device_name = "gpu" self._use_gpu = True devices = [int(x) for x in devices.split(",")] elif device_type == 0: # CPU self._device_name = "cpu" devices = [-1] elif device_type == 1: # GPU self._device_name = "gpu" self._use_gpu = True devices = [int(x) for x in devices.split(",")] elif device_type == 2: # Nvidia Tensor RT self._device_name = "gpu" self._use_gpu = True devices = [int(x) for x in devices.split(",")] self._use_trt = True self.min_subgraph_size = min_subgraph_size self.dynamic_shape_info = dynamic_shape_info elif device_type == 3: # ARM CPU self._device_name = "arm" devices = [-1] self._use_lite = True elif device_type == 4: # Kunlun XPU self._device_name = "arm" devices = [int(x) for x in devices.split(",")] self._use_lite = True self._use_xpu = True elif device_type == 5: # Ascend 310 ARM CPU self._device_name = "arm" devices = [int(x) for x in devices.split(",")] self._use_lite = True self._use_ascend_cl = True elif device_type == 6: # Ascend 910 ARM CPU self._device_name = "arm" devices = [int(x) for x in devices.split(",")] self._use_ascend_cl = True else: _LOGGER.error( "LocalServiceHandler initialization fail. device_type={}". format(device_type)) if client_type == "brpc" or client_type == "grpc": for _ in devices: self._port_list.append(available_port_generator.next()) _LOGGER.info("Create ports for devices:{}. Port:{}".format( devices, self._port_list)) self._client_type = client_type self._workdir = workdir self._devices = devices self._thread_num = thread_num self._mem_optim = mem_optim self._ir_optim = ir_optim self._local_predictor_client = None self._rpc_service_list = [] self._server_pros = [] self._use_profile = use_profile self._fetch_names = fetch_names self._precision = precision self._use_mkldnn = use_mkldnn self._mkldnn_cache_capacity = mkldnn_cache_capacity self._mkldnn_op_list = mkldnn_op_list self._mkldnn_bf16_op_list = mkldnn_bf16_op_list self._use_calib = use_calib _LOGGER.info( "Models({}) will be launched by device {}. use_gpu:{}, " "use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, " "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, " "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}," "is_set_dynamic_shape_info:{}".format( model_config, self._device_name, self._use_gpu, self._use_trt, self._use_lite, self._use_xpu, device_type, self._devices, self._mem_optim, self._ir_optim, self._use_profile, self._thread_num, self._client_type, self._fetch_names, self._precision, self._use_calib, self._use_mkldnn, self._mkldnn_cache_capacity, self._mkldnn_op_list, self._mkldnn_bf16_op_list, self._use_ascend_cl, self.min_subgraph_size, bool(len(self.dynamic_shape_info)))) def get_fetch_list(self): return self._fetch_names def get_port_list(self): return self._port_list def get_client(self, concurrency_idx): """ Function get_client is only used for local predictor case, creates one LocalPredictor object, and initializes the paddle predictor by function load_model_config.The concurrency_idx is used to select running devices. Args: concurrency_idx: process/thread index Returns: _local_predictor_client """ #checking the legality of concurrency_idx. device_num = len(self._devices) if device_num <= 0: _LOGGER.error( "device_num must be not greater than 0. devices({})".format( self._devices)) raise ValueError("The number of self._devices error") if concurrency_idx < 0: _LOGGER.error( "concurrency_idx({}) must be one positive number".format( concurrency_idx)) concurrency_idx = 0 elif concurrency_idx >= device_num: concurrency_idx = concurrency_idx % device_num _LOGGER.info("GET_CLIENT : concurrency_idx={}, device_num={}".format( concurrency_idx, device_num)) from paddle_serving_app.local_predict import LocalPredictor if self._local_predictor_client is None: self._local_predictor_client = LocalPredictor() # load model config and init predictor self._local_predictor_client.load_model_config( model_path=self._model_config, use_gpu=self._use_gpu, gpu_id=self._devices[concurrency_idx], use_profile=self._use_profile, thread_num=self._thread_num, mem_optim=self._mem_optim, ir_optim=self._ir_optim, use_trt=self._use_trt, use_lite=self._use_lite, use_xpu=self._use_xpu, precision=self._precision, use_mkldnn=self._use_mkldnn, mkldnn_cache_capacity=self._mkldnn_cache_capacity, mkldnn_op_list=self._mkldnn_op_list, mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, use_ascend_cl=self._use_ascend_cl, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, use_calib=self._use_calib) return self._local_predictor_client def get_client_config(self): return os.path.join(self._model_config, "serving_server_conf.prototxt") def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim, ir_optim, precision): """ According to self._device_name, generating one Cpu/Gpu/Arm Server, and setting the model config amd startup params. Args: workdir: work directory port: network port gpuid: gpu id thread_num: thread num mem_optim: use memory/graphics memory optimization ir_optim: use calculation chart optimization precision: inference precison, e.g."fp32", "fp16", "int8" Returns: server: CpuServer/GpuServer """ if self._device_name == "cpu": from paddle_serving_server import OpMaker, OpSeqMaker, Server op_maker = OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') general_response_op = op_maker.create('general_response') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) server = Server() else: #gpu or arm from paddle_serving_server import OpMaker, OpSeqMaker, Server op_maker = OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') general_response_op = op_maker.create('general_response') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) server = Server() if gpuid >= 0: server.set_gpuid(gpuid) # TODO: support arm or arm + xpu later server.set_device(self._device_name) if self._use_xpu: server.set_xpu() if self._use_lite: server.set_lite() if self._use_ascend_cl: server.set_ascend_cl() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) server.set_ir_optimize(ir_optim) server.set_precision(precision) server.load_model_config(self._model_config) server.prepare_server(workdir=workdir, port=port, device=self._device_name) if self._fetch_names is None: self._fetch_names = server.get_fetch_list() return server def _start_one_server(self, service_idx): """ Start one server Args: service_idx: server index Returns: None """ self._rpc_service_list[service_idx].run_server() def prepare_server(self): """ Prepare all servers to be started, and append them into list. """ for i, device_id in enumerate(self._devices): if self._workdir != "": workdir = "{}_{}".format(self._workdir, i) else: workdir = _workdir_name_gen.next() self._rpc_service_list.append( self._prepare_one_server(workdir, self._port_list[i], device_id, thread_num=self._thread_num, mem_optim=self._mem_optim, ir_optim=self._ir_optim, precision=self._precision)) def start_server(self): """ Start multiple processes and start one server in each process """ for i, _ in enumerate(self._rpc_service_list): p = multiprocessing.Process(target=self._start_one_server, args=(i, )) p.daemon = True self._server_pros.append(p) for p in self._server_pros: p.start()
class RecallServerServicer(object): def __init__(self): self.uv_client = LocalPredictor() self.uv_client.load_model_config( "user_vector_model/serving_server_dir") # milvus_host = '127.0.0.1' # milvus_port = '19530' self.milvus_client = RecallByMilvus() self.collection_name = 'demo_films' def get_user_vector(self, user_info): dic = {"userid": [], "gender": [], "age": [], "occupation": []} lod = [0] dic["userid"].append(hash2(user_info.user_id)) dic["gender"].append(hash2(user_info.gender)) dic["age"].append(hash2(user_info.age)) dic["occupation"].append(hash2(user_info.job)) lod.append(1) dic["userid.lod"] = lod dic["gender.lod"] = lod dic["age.lod"] = lod dic["occupation.lod"] = lod for key in dic: dic[key] = np.array(dic[key]).astype(np.int64).reshape( len(dic[key]), 1) fetch_map = self.uv_client.predict( feed=dic, fetch=["save_infer_model/scale_0.tmp_0"], batch=True) return fetch_map["save_infer_model/scale_0.tmp_0"].tolist()[0] def recall(self, request, context): ''' message RecallRequest{ string log_id = 1; user_info.UserInfo user_info = 2; string recall_type= 3; uint32 request_num= 4; } message RecallResponse{ message Error { uint32 code = 1; string text = 2; } message ScorePair { string nid = 1; float score = 2; }; Error error = 1; repeated ScorePair score_pairs = 2; } ''' recall_res = recall_pb2.RecallResponse() user_vector = self.get_user_vector(request.user_info) status, results = self.milvus_client.search( collection_name=self.collection_name, vectors=[user_vector], partition_tag="Movie") for entities in results: if len(entities) == 0: recall_res.error.code = 500 recall_res.error.text = "Recall server get milvus fail. ({})".format( str(request)) return recall_res for topk_film in entities: # current_entity = topk_film.entity score_pair = recall_res.score_pairs.add() score_pair.nid = str(topk_film.id) score_pair.score = float(topk_film.distance) recall_res.error.code = 200 return recall_res
import redis import numpy as np import codecs class Movie(object): def __init__(self): self.movie_id, self.title, self.genres = "", "", "" pass def hash2(a): return hash(a) % 60000000 ctr_client = LocalPredictor() ctr_client.load_model_config("serving_server_dir") with codecs.open("movies.dat", "r", encoding='utf-8', errors='ignore') as f: lines = f.readlines() ff = open("movie_vectors.txt", 'w') for line in lines: if len(line.strip()) == 0: continue tmp = line.strip().split("::") movie_id = tmp[0] title = tmp[1] genre_group = tmp[2] tmp = genre_group.strip().split("|")
def __init__(self): self.ctr_client = LocalPredictor() self.ctr_client.load_model_config("rank_model")
class RankServerServicer(object): def __init__(self): self.ctr_client = LocalPredictor() self.ctr_client.load_model_config("rank_model") def process_feed_dict(self, user_info, item_infos): #" userid gender age occupation | movieid title genres" dic = { "userid": [], "gender": [], "age": [], "occupation": [], "movieid": [], "title": [], "genres": [] } batch_size = len(item_infos) lod = [0] for i, item_info in enumerate(item_infos): dic["movieid"].append(hash2(item_info.movie_id)) dic["title"].append(hash2(item_info.title)) dic["genres"].append(hash2(item_info.genre)) dic["userid"].append(hash2(user_info.user_id)) dic["gender"].append(hash2(user_info.gender)) dic["age"].append(hash2(user_info.age)) dic["occupation"].append(hash2(user_info.job)) lod.append(i + 1) dic["movieid.lod"] = lod dic["title.lod"] = lod dic["genres.lod"] = lod dic["userid.lod"] = lod dic["gender.lod"] = lod dic["age.lod"] = lod dic["occupation.lod"] = lod for key in dic: dic[key] = np.array(dic[key]).astype(np.int64).reshape( len(dic[key]), 1) return dic def rank_predict(self, request, context): ''' message RankRequest { string log_id = 1; user_info.UserInfo user_info = 2; repeated item_info.ItemInfo item_infos = 3; } message RankResponse { message Error { uint32 code = 1; string text = 2; } message ScorePair { string nid = 1; float score = 2; }; Error error = 1; repeated ScorePair score_pairs = 2; }; ''' batch_size = len(request.item_infos) dic = self.process_feed_dict(request.user_info, request.item_infos) fetch_map = self.ctr_client.predict( feed=dic, fetch=["save_infer_model/scale_0.tmp_0"], batch=True) response = rank_pb2.RankResponse() #raise ValueError("UM server get user_info from redis fail. ({})".format(str(request))) response.error.code = 200 for i in range(batch_size): score_pair = response.score_pairs.add() score_pair.nid = request.item_infos[i].movie_id score_pair.score = fetch_map["save_infer_model/scale_0.tmp_0"][i][ 0] response.score_pairs.sort(reverse=True, key=lambda item: item.score) return response
class WebService(object): def __init__(self, name="default_service"): self.name = name # pipeline self._server = pipeline.PipelineServer(self.name) self.gpus = ["-1"] # deprecated self.rpc_service_list = [] # deprecated def get_pipeline_response(self, read_op): return None def prepare_pipeline_config(self, yml_file=None, yml_dict=None): # build dag read_op = pipeline.RequestOp() last_op = self.get_pipeline_response(read_op) if not isinstance(last_op, Op): raise ValueError( "The return value type of `get_pipeline_response` " "function is not Op type, please check function " "`get_pipeline_response`.") response_op = pipeline.ResponseOp(input_ops=[last_op]) self._server.set_response_op(response_op) self._server.prepare_server(yml_file=yml_file, yml_dict=yml_dict) def run_service(self): self._server.run_server() def load_model_config(self, server_config_dir_paths, client_config_path=None): if isinstance(server_config_dir_paths, str): server_config_dir_paths = [server_config_dir_paths] elif isinstance(server_config_dir_paths, list): pass for single_model_config in server_config_dir_paths: if os.path.isdir(single_model_config): pass elif os.path.isfile(single_model_config): raise ValueError( "The input of --model should be a dir not file.") self.server_config_dir_paths = server_config_dir_paths from .proto import general_model_config_pb2 as m_config import google.protobuf.text_format file_path_list = [] for single_model_config in self.server_config_dir_paths: file_path_list.append( "{}/serving_server_conf.prototxt".format(single_model_config)) model_conf = m_config.GeneralModelConfig() f = open(file_path_list[0], 'r') model_conf = google.protobuf.text_format.Merge(str(f.read()), model_conf) self.feed_vars = {var.alias_name: var for var in model_conf.feed_var} if len(file_path_list) > 1: model_conf = m_config.GeneralModelConfig() f = open(file_path_list[-1], 'r') model_conf = google.protobuf.text_format.Merge( str(f.read()), model_conf) self.fetch_vars = {var.alias_name: var for var in model_conf.fetch_var} if client_config_path == None: self.client_config_path = file_path_list # after this function, self.gpus should be a list of str or []. def set_gpus(self, gpus): print("This API will be deprecated later. Please do not use it") self.gpus = format_gpu_to_strlist(gpus) # this function can be called by user # or by Function create_rpc_config # if by user, user can set_gpus or pass the `gpus` # if `gpus` == None, which means it`s not set at all. # at this time, we should use self.gpus instead. # otherwise, we should use the `gpus` first. # which means if set_gpus and `gpus` is both set. # `gpus` will be used. def default_rpc_service(self, workdir, port=9292, gpus=None, thread_num=4, mem_optim=True, use_lite=False, use_xpu=False, ir_optim=False, precision="fp32", use_calib=False, use_trt=False, gpu_multi_stream=False, runtime_thread_num=None, batch_infer_size=None): device = "cpu" server = Server() # only when `gpus == None`, which means it`s not set at all # we will use the self.gpus. if gpus == None: gpus = self.gpus gpus = format_gpu_to_strlist(gpus) server.set_gpuid(gpus) if len(gpus) == 0 or gpus == ["-1"]: if use_lite: device = "arm" else: device = "cpu" else: device = "gpu" op_maker = OpMaker() op_seq_maker = OpSeqMaker() read_op = op_maker.create('GeneralReaderOp') op_seq_maker.add_op(read_op) for idx, single_model in enumerate(self.server_config_dir_paths): infer_op_name = "GeneralInferOp" if len(self.server_config_dir_paths) == 2 and idx == 0: infer_op_name = "GeneralDetectionOp" else: infer_op_name = "GeneralInferOp" general_infer_op = op_maker.create(infer_op_name) op_seq_maker.add_op(general_infer_op) general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker.add_op(general_response_op) server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) server.set_ir_optimize(ir_optim) server.set_device(device) server.set_precision(precision) server.set_use_calib(use_calib) if use_trt and device == "gpu": server.set_trt() server.set_ir_optimize(True) if gpu_multi_stream and device == "gpu": server.set_gpu_multi_stream() if runtime_thread_num: server.set_runtime_thread_num(runtime_thread_num) if batch_infer_size: server.set_batch_infer_size(batch_infer_size) if use_lite: server.set_lite() if use_xpu: server.set_xpu() server.load_model_config( self.server_config_dir_paths ) #brpc Server support server_config_dir_paths server.prepare_server(workdir=workdir, port=port, device=device) return server def _launch_rpc_service(self, service_idx): self.rpc_service_list[service_idx].run_server() # if use this function, self.gpus must be set before. # if not, we will use the default value, self.gpus = ["-1"]. # so we always pass the `gpus` = self.gpus. def create_rpc_config(self): self.rpc_service_list.append( self.default_rpc_service( self.workdir, self.port_list[0], self.gpus, thread_num=self.thread_num, mem_optim=self.mem_optim, use_lite=self.use_lite, use_xpu=self.use_xpu, ir_optim=self.ir_optim, precision=self.precision, use_calib=self.use_calib, use_trt=self.use_trt, gpu_multi_stream=self.gpu_multi_stream, runtime_thread_num=self.runtime_thread_num, batch_infer_size=self.batch_infer_size)) def prepare_server(self, workdir, port=9393, device="cpu", precision="fp32", use_calib=False, use_lite=False, use_xpu=False, ir_optim=False, thread_num=4, mem_optim=True, use_trt=False, gpu_multi_stream=False, runtime_thread_num=None, batch_infer_size=None, gpuid=None): print("This API will be deprecated later. Please do not use it") self.workdir = workdir self.port = port self.thread_num = thread_num # self.device is not used at all. # device is set by gpuid. self.precision = precision self.use_calib = use_calib self.use_lite = use_lite self.use_xpu = use_xpu self.ir_optim = ir_optim self.mem_optim = mem_optim self.port_list = [] self.use_trt = use_trt self.gpu_multi_stream = gpu_multi_stream self.runtime_thread_num = runtime_thread_num self.batch_infer_size = batch_infer_size # record port and pid info for stopping process dump_pid_file([self.port], "web_service") # if gpuid != None, we will use gpuid first. # otherwise, keep the self.gpus unchanged. # maybe self.gpus is set by the Function set_gpus. if gpuid != None: self.gpus = format_gpu_to_strlist(gpuid) else: pass default_port = 12000 for i in range(1000): if port_is_available(default_port + i): self.port_list.append(default_port + i) break def _launch_web_service(self): self.client = Client() self.client.load_client_config(self.client_config_path) endpoints = "" endpoints = "127.0.0.1:{}".format(self.port_list[0]) self.client.connect([endpoints]) def get_prediction(self, request): if not request.json: abort(400) if "fetch" not in request.json: abort(400) try: feed, fetch, is_batch = self.preprocess(request.json["feed"], request.json["fetch"]) if isinstance(feed, dict) and "fetch" in feed: del feed["fetch"] if len(feed) == 0: raise ValueError("empty input") fetch_map = self.client.predict(feed=feed, fetch=fetch, batch=is_batch) result = self.postprocess(feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) result = {"result": result} except ValueError as err: result = {"result": str(err)} return result def run_rpc_service(self): print("This API will be deprecated later. Please do not use it") import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name)) server_pros = [] self.create_rpc_config() for i, service in enumerate(self.rpc_service_list): p = Process(target=self._launch_rpc_service, args=(i, )) server_pros.append(p) for p in server_pros: p.start() app_instance = Flask(__name__) @app_instance.before_first_request def init(): self._launch_web_service() service_name = "/" + self.name + "/prediction" @app_instance.route(service_name, methods=["POST"]) def run(): return self.get_prediction(request) self.app_instance = app_instance # TODO: maybe change another API name: maybe run_local_predictor? def run_debugger_service(self, gpu=False): print("This API will be deprecated later. Please do not use it") import socket localIP = socket.gethostbyname(socket.gethostname()) print("web service address:") print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name)) app_instance = Flask(__name__) @app_instance.before_first_request def init(): self._launch_local_predictor(gpu) service_name = "/" + self.name + "/prediction" @app_instance.route(service_name, methods=["POST"]) def run(): return self.get_prediction(request) self.app_instance = app_instance def _launch_local_predictor(self, gpu): # actually, LocalPredictor is like a server, but it is WebService Request initiator # for WebService it is a Client. # local_predictor only support single-Model DirPath - Type:str # so the input must be self.server_config_dir_paths[0] from paddle_serving_app.local_predict import LocalPredictor self.client = LocalPredictor() if gpu: # if user forget to call function `set_gpus` to set self.gpus. # default self.gpus = [0]. if len(self.gpus) == 0 or self.gpus == ["-1"]: self.gpus = ["0"] # right now, local Predictor only support 1 card. # no matter how many gpu_id is in gpus, we only use the first one. gpu_id = (self.gpus[0].split(","))[0] self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=True, gpu_id=gpu_id) else: self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=False) def run_web_service(self): print("This API will be deprecated later. Please do not use it") self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True) def get_app_instance(self): return self.app_instance def preprocess(self, feed=[], fetch=[]): print("This API will be deprecated later. Please do not use it") is_batch = True feed_dict = {} for var_name in self.feed_vars.keys(): feed_dict[var_name] = [] for feed_ins in feed: for key in feed_ins: feed_dict[key].append( np.array(feed_ins[key]).reshape( list(self.feed_vars[key].shape))[np.newaxis, :]) feed = {} for key in feed_dict: feed[key] = np.concatenate(feed_dict[key], axis=0) return feed, fetch, is_batch def postprocess(self, feed=[], fetch=[], fetch_map=None): print("This API will be deprecated later. Please do not use it") for key in fetch_map: fetch_map[key] = fetch_map[key].tolist() return fetch_map
def _launch_web_service(self): self.client = Client() self.client.load_client_config(self.client_config_path) endpoints = "" endpoints = "127.0.0.1:{}".format(self.port_list[0]) self.client.connect([endpoints])