Esempio n. 1
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
from paddle_serving_app.local_predict import LocalPredictor
import sys

debugger = LocalPredictor()
debugger.load_model_config(sys.argv[1], gpu=True)

seq = Sequential([
    File2Image(),
    Resize(256),
    CenterCrop(224),
    RGB2BGR(),
    Transpose((2, 0, 1)),
    Div(255),
    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
])

image_file = "daisy.jpg"
img = seq(image_file)
fetch_map = debugger.predict(feed={"image": img}, fetch=["feature_map"])
print(fetch_map["feature_map"].reshape(-1))
Esempio n. 2
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
from paddle_serving_app.local_predict import LocalPredictor
import sys

predictor = LocalPredictor()
predictor.load_model_config(sys.argv[1],
                            use_lite=True,
                            use_xpu=True,
                            ir_optim=True)

seq = Sequential([
    File2Image(),
    Resize(256),
    CenterCrop(224),
    RGB2BGR(),
    Transpose((2, 0, 1)),
    Div(255),
    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
])

image_file = "daisy.jpg"
img = seq(image_file)
fetch_map = predictor.predict(feed={"image": img}, fetch=["score"])
Esempio n. 3
0
class OCRService(WebService):
    def init_det_debugger(self, det_model_config):
        self.det_preprocess = Sequential([
            ResizeByFactor(32, 960), Div(255),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
                (2, 0, 1))
        ])
        self.det_client = LocalPredictor()
        if sys.argv[1] == 'gpu':
            self.det_client.load_model_config(
                det_model_config, use_gpu=True, gpu_id=0)
        elif sys.argv[1] == 'cpu':
            self.det_client.load_model_config(det_model_config)
        self.ocr_reader = OCRReader(
            char_dict_path="../../../ppocr/utils/ppocr_keys_v1.txt")

    def preprocess(self, feed=[], fetch=[]):
        data = base64.b64decode(feed[0]["image"].encode('utf8'))
        data = np.fromstring(data, np.uint8)
        im = cv2.imdecode(data, cv2.IMREAD_COLOR)
        ori_h, ori_w, _ = im.shape
        det_img = self.det_preprocess(im)
        _, new_h, new_w = det_img.shape
        det_img = det_img[np.newaxis, :]
        det_img = det_img.copy()
        det_out = self.det_client.predict(
            feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True)
        filter_func = FilterBoxes(10, 10)
        post_func = DBPostProcess({
            "thresh": 0.3,
            "box_thresh": 0.5,
            "max_candidates": 1000,
            "unclip_ratio": 1.5,
            "min_size": 3
        })
        sorted_boxes = SortedBoxes()
        ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
        dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list])
        dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
        dt_boxes = sorted_boxes(dt_boxes)
        get_rotate_crop_image = GetRotateCropImage()
        img_list = []
        max_wh_ratio = 0
        for i, dtbox in enumerate(dt_boxes):
            boximg = get_rotate_crop_image(im, dt_boxes[i])
            img_list.append(boximg)
            h, w = boximg.shape[0:2]
            wh_ratio = w * 1.0 / h
            max_wh_ratio = max(max_wh_ratio, wh_ratio)
        if len(img_list) == 0:
            return [], []
        _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
                                                  max_wh_ratio).shape
        imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
        for id, img in enumerate(img_list):
            norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
            imgs[id] = norm_img
        feed = {"x": imgs.copy()}
        fetch = ["save_infer_model/scale_0.tmp_1"]
        return feed, fetch, True

    def postprocess(self, feed={}, fetch=[], fetch_map=None):
        rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
        res_lst = []
        for res in rec_res:
            res_lst.append(res[0])
        res = {"res": res_lst}
        return res
class LocalServiceHandler(object):
    """
    LocalServiceHandler is the processor of the local service, contains
    three client types, brpc, grpc and local_predictor.If you use the 
    brpc or grpc, serveing startup ability is provided.If you use
    local_predictor, local predict ability is provided by paddle_serving_app.
    """
    def __init__(self,
                 model_config,
                 client_type='local_predictor',
                 workdir="",
                 thread_num=2,
                 device_type=-1,
                 devices="",
                 fetch_names=None,
                 mem_optim=True,
                 ir_optim=False,
                 available_port_generator=None,
                 use_profile=False,
                 precision="fp32",
                 use_mkldnn=False,
                 mkldnn_cache_capacity=0,
                 mkldnn_op_list=None,
                 mkldnn_bf16_op_list=None,
                 min_subgraph_size=3,
                 dynamic_shape_info={},
                 use_calib=False):
        """
        Initialization of localservicehandler

        Args:
           model_config: model config path
           client_type: brpc, grpc and local_predictor[default]
           workdir: work directory
           thread_num: number of threads, concurrent quantity.
           device_type: support multiple devices. -1=Not set, determined by
               `devices`. 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
           devices: gpu id list[gpu], "" default[cpu]
           fetch_names: get fetch names out of LocalServiceHandler in 
               local_predictor mode. fetch_names_ is compatible for Client().
           mem_optim: use memory/graphics memory optimization, True default.
           ir_optim: use calculation chart optimization, False default.
           available_port_generator: generate available ports
           use_profile: use profiling, False default.
           precision: inference precesion, e.g. "fp32", "fp16", "int8"
           use_mkldnn: use mkldnn, default False.
           mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit.
           mkldnn_op_list: OP list optimized by mkldnn, None default.
           mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default.
           use_calib: set inference use_calib_mode param, False default.

        Returns:
           None
        """
        if available_port_generator is None:
            available_port_generator = util.GetAvailablePortGenerator()

        self._model_config = model_config
        self._port_list = []
        self._device_name = "cpu"
        self._use_gpu = False
        self._use_trt = False
        self._use_lite = False
        self._use_xpu = False
        self._use_ascend_cl = False
        self._use_mkldnn = False
        self._mkldnn_cache_capacity = 0
        self._mkldnn_op_list = None
        self._mkldnn_bf16_op_list = None
        self.min_subgraph_size = 3
        self.dynamic_shape_info = {}
        self._use_calib = False

        if device_type == -1:
            # device_type is not set, determined by `devices`,
            if devices == "":
                # CPU
                self._device_name = "cpu"
                devices = [-1]
            else:
                # GPU
                self._device_name = "gpu"
                self._use_gpu = True
                devices = [int(x) for x in devices.split(",")]

        elif device_type == 0:
            # CPU
            self._device_name = "cpu"
            devices = [-1]
        elif device_type == 1:
            # GPU
            self._device_name = "gpu"
            self._use_gpu = True
            devices = [int(x) for x in devices.split(",")]
        elif device_type == 2:
            # Nvidia Tensor RT
            self._device_name = "gpu"
            self._use_gpu = True
            devices = [int(x) for x in devices.split(",")]
            self._use_trt = True
            self.min_subgraph_size = min_subgraph_size
            self.dynamic_shape_info = dynamic_shape_info
        elif device_type == 3:
            # ARM CPU
            self._device_name = "arm"
            devices = [-1]
            self._use_lite = True
        elif device_type == 4:
            # Kunlun XPU
            self._device_name = "arm"
            devices = [int(x) for x in devices.split(",")]
            self._use_lite = True
            self._use_xpu = True
        elif device_type == 5:
            # Ascend 310 ARM CPU
            self._device_name = "arm"
            devices = [int(x) for x in devices.split(",")]
            self._use_lite = True
            self._use_ascend_cl = True
        elif device_type == 6:
            # Ascend 910 ARM CPU
            self._device_name = "arm"
            devices = [int(x) for x in devices.split(",")]
            self._use_ascend_cl = True
        else:
            _LOGGER.error(
                "LocalServiceHandler initialization fail. device_type={}".
                format(device_type))

        if client_type == "brpc" or client_type == "grpc":
            for _ in devices:
                self._port_list.append(available_port_generator.next())
            _LOGGER.info("Create ports for devices:{}. Port:{}".format(
                devices, self._port_list))

        self._client_type = client_type
        self._workdir = workdir
        self._devices = devices
        self._thread_num = thread_num
        self._mem_optim = mem_optim
        self._ir_optim = ir_optim
        self._local_predictor_client = None
        self._rpc_service_list = []
        self._server_pros = []
        self._use_profile = use_profile
        self._fetch_names = fetch_names
        self._precision = precision
        self._use_mkldnn = use_mkldnn
        self._mkldnn_cache_capacity = mkldnn_cache_capacity
        self._mkldnn_op_list = mkldnn_op_list
        self._mkldnn_bf16_op_list = mkldnn_bf16_op_list
        self._use_calib = use_calib

        _LOGGER.info(
            "Models({}) will be launched by device {}. use_gpu:{}, "
            "use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, "
            "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
            "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, "
            "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
            "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{},"
            "is_set_dynamic_shape_info:{}".format(
                model_config, self._device_name, self._use_gpu, self._use_trt,
                self._use_lite, self._use_xpu, device_type, self._devices,
                self._mem_optim, self._ir_optim, self._use_profile,
                self._thread_num, self._client_type, self._fetch_names,
                self._precision, self._use_calib, self._use_mkldnn,
                self._mkldnn_cache_capacity, self._mkldnn_op_list,
                self._mkldnn_bf16_op_list, self._use_ascend_cl,
                self.min_subgraph_size, bool(len(self.dynamic_shape_info))))

    def get_fetch_list(self):
        return self._fetch_names

    def get_port_list(self):
        return self._port_list

    def get_client(self, concurrency_idx):
        """
        Function get_client is only used for local predictor case, creates one
        LocalPredictor object, and initializes the paddle predictor by function
        load_model_config.The concurrency_idx is used to select running devices.  

        Args:
            concurrency_idx: process/thread index

        Returns:
            _local_predictor_client
        """

        #checking the legality of concurrency_idx.
        device_num = len(self._devices)
        if device_num <= 0:
            _LOGGER.error(
                "device_num must be not greater than 0. devices({})".format(
                    self._devices))
            raise ValueError("The number of self._devices error")

        if concurrency_idx < 0:
            _LOGGER.error(
                "concurrency_idx({}) must be one positive number".format(
                    concurrency_idx))
            concurrency_idx = 0
        elif concurrency_idx >= device_num:
            concurrency_idx = concurrency_idx % device_num

        _LOGGER.info("GET_CLIENT : concurrency_idx={}, device_num={}".format(
            concurrency_idx, device_num))
        from paddle_serving_app.local_predict import LocalPredictor
        if self._local_predictor_client is None:
            self._local_predictor_client = LocalPredictor()
            # load model config and init predictor
            self._local_predictor_client.load_model_config(
                model_path=self._model_config,
                use_gpu=self._use_gpu,
                gpu_id=self._devices[concurrency_idx],
                use_profile=self._use_profile,
                thread_num=self._thread_num,
                mem_optim=self._mem_optim,
                ir_optim=self._ir_optim,
                use_trt=self._use_trt,
                use_lite=self._use_lite,
                use_xpu=self._use_xpu,
                precision=self._precision,
                use_mkldnn=self._use_mkldnn,
                mkldnn_cache_capacity=self._mkldnn_cache_capacity,
                mkldnn_op_list=self._mkldnn_op_list,
                mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
                use_ascend_cl=self._use_ascend_cl,
                min_subgraph_size=self.min_subgraph_size,
                dynamic_shape_info=self.dynamic_shape_info,
                use_calib=self._use_calib)
        return self._local_predictor_client

    def get_client_config(self):
        return os.path.join(self._model_config, "serving_server_conf.prototxt")

    def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim,
                            ir_optim, precision):
        """
        According to self._device_name, generating one Cpu/Gpu/Arm Server, and
        setting the model config amd startup params.

        Args:
            workdir: work directory
            port: network port
            gpuid: gpu id
            thread_num: thread num
            mem_optim: use memory/graphics memory optimization
            ir_optim: use calculation chart optimization
            precision: inference precison, e.g."fp32", "fp16", "int8"

        Returns:
            server: CpuServer/GpuServer
        """
        if self._device_name == "cpu":
            from paddle_serving_server import OpMaker, OpSeqMaker, Server
            op_maker = OpMaker()
            read_op = op_maker.create('general_reader')
            general_infer_op = op_maker.create('general_infer')
            general_response_op = op_maker.create('general_response')

            op_seq_maker = OpSeqMaker()
            op_seq_maker.add_op(read_op)
            op_seq_maker.add_op(general_infer_op)
            op_seq_maker.add_op(general_response_op)

            server = Server()
        else:
            #gpu or arm
            from paddle_serving_server import OpMaker, OpSeqMaker, Server
            op_maker = OpMaker()
            read_op = op_maker.create('general_reader')
            general_infer_op = op_maker.create('general_infer')
            general_response_op = op_maker.create('general_response')

            op_seq_maker = OpSeqMaker()
            op_seq_maker.add_op(read_op)
            op_seq_maker.add_op(general_infer_op)
            op_seq_maker.add_op(general_response_op)

            server = Server()
            if gpuid >= 0:
                server.set_gpuid(gpuid)
            # TODO: support arm or arm + xpu later
            server.set_device(self._device_name)
            if self._use_xpu:
                server.set_xpu()
            if self._use_lite:
                server.set_lite()
            if self._use_ascend_cl:
                server.set_ascend_cl()

        server.set_op_sequence(op_seq_maker.get_op_sequence())
        server.set_num_threads(thread_num)
        server.set_memory_optimize(mem_optim)
        server.set_ir_optimize(ir_optim)
        server.set_precision(precision)

        server.load_model_config(self._model_config)
        server.prepare_server(workdir=workdir,
                              port=port,
                              device=self._device_name)
        if self._fetch_names is None:
            self._fetch_names = server.get_fetch_list()
        return server

    def _start_one_server(self, service_idx):
        """
        Start one server
     
        Args:
            service_idx: server index
 
        Returns:
            None
        """
        self._rpc_service_list[service_idx].run_server()

    def prepare_server(self):
        """
        Prepare all servers to be started, and append them into list. 
        """
        for i, device_id in enumerate(self._devices):
            if self._workdir != "":
                workdir = "{}_{}".format(self._workdir, i)
            else:
                workdir = _workdir_name_gen.next()
            self._rpc_service_list.append(
                self._prepare_one_server(workdir,
                                         self._port_list[i],
                                         device_id,
                                         thread_num=self._thread_num,
                                         mem_optim=self._mem_optim,
                                         ir_optim=self._ir_optim,
                                         precision=self._precision))

    def start_server(self):
        """
        Start multiple processes and start one server in each process
        """
        for i, _ in enumerate(self._rpc_service_list):
            p = multiprocessing.Process(target=self._start_one_server,
                                        args=(i, ))
            p.daemon = True
            self._server_pros.append(p)
        for p in self._server_pros:
            p.start()
Esempio n. 5
0
class RecallServerServicer(object):
    def __init__(self):
        self.uv_client = LocalPredictor()
        self.uv_client.load_model_config(
            "user_vector_model/serving_server_dir")
        # milvus_host = '127.0.0.1'
        # milvus_port = '19530'
        self.milvus_client = RecallByMilvus()
        self.collection_name = 'demo_films'

    def get_user_vector(self, user_info):
        dic = {"userid": [], "gender": [], "age": [], "occupation": []}
        lod = [0]
        dic["userid"].append(hash2(user_info.user_id))
        dic["gender"].append(hash2(user_info.gender))
        dic["age"].append(hash2(user_info.age))
        dic["occupation"].append(hash2(user_info.job))
        lod.append(1)

        dic["userid.lod"] = lod
        dic["gender.lod"] = lod
        dic["age.lod"] = lod
        dic["occupation.lod"] = lod
        for key in dic:
            dic[key] = np.array(dic[key]).astype(np.int64).reshape(
                len(dic[key]), 1)

        fetch_map = self.uv_client.predict(
            feed=dic, fetch=["save_infer_model/scale_0.tmp_0"], batch=True)
        return fetch_map["save_infer_model/scale_0.tmp_0"].tolist()[0]

    def recall(self, request, context):
        '''
    message RecallRequest{
        string log_id = 1;
        user_info.UserInfo user_info = 2;
        string recall_type= 3;
        uint32 request_num= 4;
    }

    message RecallResponse{
        message Error {
            uint32 code = 1;
            string text = 2;
        }
        message ScorePair {
            string nid = 1;
            float score = 2;
        };
        Error error = 1;
        repeated ScorePair score_pairs = 2;
    }
        '''
        recall_res = recall_pb2.RecallResponse()
        user_vector = self.get_user_vector(request.user_info)

        status, results = self.milvus_client.search(
            collection_name=self.collection_name,
            vectors=[user_vector],
            partition_tag="Movie")
        for entities in results:
            if len(entities) == 0:
                recall_res.error.code = 500
                recall_res.error.text = "Recall server get milvus fail. ({})".format(
                    str(request))
                return recall_res
            for topk_film in entities:
                # current_entity = topk_film.entity
                score_pair = recall_res.score_pairs.add()
                score_pair.nid = str(topk_film.id)
                score_pair.score = float(topk_film.distance)
        recall_res.error.code = 200
        return recall_res
Esempio n. 6
0
import numpy as np
import codecs


class Movie(object):
    def __init__(self):
        self.movie_id, self.title, self.genres = "", "", ""
        pass


def hash2(a):
    return hash(a) % 60000000


ctr_client = LocalPredictor()
ctr_client.load_model_config("serving_server_dir")
with codecs.open("movies.dat", "r", encoding='utf-8', errors='ignore') as f:
    lines = f.readlines()

ff = open("movie_vectors.txt", 'w')

for line in lines:
    if len(line.strip()) == 0:
        continue
    tmp = line.strip().split("::")
    movie_id = tmp[0]
    title = tmp[1]
    genre_group = tmp[2]

    tmp = genre_group.strip().split("|")
    genre = tmp
Esempio n. 7
0
class RankServerServicer(object):
    def __init__(self):
        self.ctr_client = LocalPredictor()
        self.ctr_client.load_model_config("rank_model")

    def process_feed_dict(self, user_info, item_infos):
        #" userid gender age occupation | movieid title genres"
        dic = {
            "userid": [],
            "gender": [],
            "age": [],
            "occupation": [],
            "movieid": [],
            "title": [],
            "genres": []
        }
        batch_size = len(item_infos)
        lod = [0]
        for i, item_info in enumerate(item_infos):
            dic["movieid"].append(hash2(item_info.movie_id))
            dic["title"].append(hash2(item_info.title))
            dic["genres"].append(hash2(item_info.genre))
            dic["userid"].append(hash2(user_info.user_id))
            dic["gender"].append(hash2(user_info.gender))
            dic["age"].append(hash2(user_info.age))
            dic["occupation"].append(hash2(user_info.job))
            lod.append(i + 1)

        dic["movieid.lod"] = lod
        dic["title.lod"] = lod
        dic["genres.lod"] = lod
        dic["userid.lod"] = lod
        dic["gender.lod"] = lod
        dic["age.lod"] = lod
        dic["occupation.lod"] = lod
        for key in dic:
            dic[key] = np.array(dic[key]).astype(np.int64).reshape(
                len(dic[key]), 1)

        return dic

    def rank_predict(self, request, context):
        '''
        message RankRequest {
          string log_id = 1;
            user_info.UserInfo user_info = 2;
            repeated item_info.ItemInfo item_infos = 3;
        }

        message RankResponse {
            message Error {
                uint32 code = 1;
                string text = 2;
            }
            message ScorePair {
                string nid = 1;
                float score = 2;
            };
            Error error = 1;
            repeated ScorePair score_pairs = 2;
        };
        '''
        batch_size = len(request.item_infos)
        dic = self.process_feed_dict(request.user_info, request.item_infos)
        fetch_map = self.ctr_client.predict(
            feed=dic, fetch=["save_infer_model/scale_0.tmp_0"], batch=True)
        response = rank_pb2.RankResponse()

        #raise ValueError("UM server get user_info from redis fail. ({})".format(str(request)))
        response.error.code = 200

        for i in range(batch_size):
            score_pair = response.score_pairs.add()
            score_pair.nid = request.item_infos[i].movie_id
            score_pair.score = fetch_map["save_infer_model/scale_0.tmp_0"][i][
                0]
        response.score_pairs.sort(reverse=True, key=lambda item: item.score)
        return response
Esempio n. 8
0
class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
        # pipeline
        self._server = pipeline.PipelineServer(self.name)

        self.gpus = ["-1"]  # deprecated
        self.rpc_service_list = []  # deprecated

    def get_pipeline_response(self, read_op):
        return None

    def prepare_pipeline_config(self, yml_file=None, yml_dict=None):
        # build dag
        read_op = pipeline.RequestOp()
        last_op = self.get_pipeline_response(read_op)
        if not isinstance(last_op, Op):
            raise ValueError(
                "The return value type of `get_pipeline_response` "
                "function is not Op type, please check function "
                "`get_pipeline_response`.")
        response_op = pipeline.ResponseOp(input_ops=[last_op])
        self._server.set_response_op(response_op)
        self._server.prepare_server(yml_file=yml_file, yml_dict=yml_dict)

    def run_service(self):
        self._server.run_server()

    def load_model_config(self,
                          server_config_dir_paths,
                          client_config_path=None):
        if isinstance(server_config_dir_paths, str):
            server_config_dir_paths = [server_config_dir_paths]
        elif isinstance(server_config_dir_paths, list):
            pass

        for single_model_config in server_config_dir_paths:
            if os.path.isdir(single_model_config):
                pass
            elif os.path.isfile(single_model_config):
                raise ValueError(
                    "The input of --model should be a dir not file.")
        self.server_config_dir_paths = server_config_dir_paths
        from .proto import general_model_config_pb2 as m_config
        import google.protobuf.text_format
        file_path_list = []
        for single_model_config in self.server_config_dir_paths:
            file_path_list.append(
                "{}/serving_server_conf.prototxt".format(single_model_config))

        model_conf = m_config.GeneralModelConfig()
        f = open(file_path_list[0], 'r')
        model_conf = google.protobuf.text_format.Merge(str(f.read()),
                                                       model_conf)
        self.feed_vars = {var.alias_name: var for var in model_conf.feed_var}

        if len(file_path_list) > 1:
            model_conf = m_config.GeneralModelConfig()
            f = open(file_path_list[-1], 'r')
            model_conf = google.protobuf.text_format.Merge(
                str(f.read()), model_conf)

        self.fetch_vars = {var.alias_name: var for var in model_conf.fetch_var}
        if client_config_path == None:
            self.client_config_path = file_path_list

    # after this function, self.gpus should be a list of str or [].
    def set_gpus(self, gpus):
        print("This API will be deprecated later. Please do not use it")
        self.gpus = format_gpu_to_strlist(gpus)

# this function can be called by user
# or by Function create_rpc_config
# if by user, user can set_gpus or pass the `gpus`
# if `gpus` == None, which means it`s not set at all.
# at this time, we should use self.gpus instead.
# otherwise, we should use the `gpus` first.
# which means if set_gpus and `gpus` is both set.
# `gpus` will be used.

    def default_rpc_service(self,
                            workdir,
                            port=9292,
                            gpus=None,
                            thread_num=4,
                            mem_optim=True,
                            use_lite=False,
                            use_xpu=False,
                            ir_optim=False,
                            precision="fp32",
                            use_calib=False,
                            use_trt=False,
                            gpu_multi_stream=False,
                            runtime_thread_num=None,
                            batch_infer_size=None):

        device = "cpu"
        server = Server()
        # only when `gpus == None`, which means it`s not set at all
        # we will use the self.gpus.
        if gpus == None:
            gpus = self.gpus

        gpus = format_gpu_to_strlist(gpus)
        server.set_gpuid(gpus)

        if len(gpus) == 0 or gpus == ["-1"]:
            if use_lite:
                device = "arm"
            else:
                device = "cpu"
        else:
            device = "gpu"

        op_maker = OpMaker()
        op_seq_maker = OpSeqMaker()

        read_op = op_maker.create('GeneralReaderOp')
        op_seq_maker.add_op(read_op)

        for idx, single_model in enumerate(self.server_config_dir_paths):
            infer_op_name = "GeneralInferOp"
            if len(self.server_config_dir_paths) == 2 and idx == 0:
                infer_op_name = "GeneralDetectionOp"
            else:
                infer_op_name = "GeneralInferOp"
            general_infer_op = op_maker.create(infer_op_name)
            op_seq_maker.add_op(general_infer_op)

        general_response_op = op_maker.create('GeneralResponseOp')
        op_seq_maker.add_op(general_response_op)

        server.set_op_sequence(op_seq_maker.get_op_sequence())
        server.set_num_threads(thread_num)
        server.set_memory_optimize(mem_optim)
        server.set_ir_optimize(ir_optim)
        server.set_device(device)
        server.set_precision(precision)
        server.set_use_calib(use_calib)

        if use_trt and device == "gpu":
            server.set_trt()
            server.set_ir_optimize(True)

        if gpu_multi_stream and device == "gpu":
            server.set_gpu_multi_stream()

        if runtime_thread_num:
            server.set_runtime_thread_num(runtime_thread_num)

        if batch_infer_size:
            server.set_batch_infer_size(batch_infer_size)

        if use_lite:
            server.set_lite()
        if use_xpu:
            server.set_xpu()

        server.load_model_config(
            self.server_config_dir_paths
        )  #brpc Server support server_config_dir_paths

        server.prepare_server(workdir=workdir, port=port, device=device)
        return server

    def _launch_rpc_service(self, service_idx):
        self.rpc_service_list[service_idx].run_server()

    # if use this function, self.gpus must be set before.
    # if not, we will use the default value, self.gpus = ["-1"].
    # so we always pass the `gpus` = self.gpus.
    def create_rpc_config(self):
        self.rpc_service_list.append(
            self.default_rpc_service(
                self.workdir,
                self.port_list[0],
                self.gpus,
                thread_num=self.thread_num,
                mem_optim=self.mem_optim,
                use_lite=self.use_lite,
                use_xpu=self.use_xpu,
                ir_optim=self.ir_optim,
                precision=self.precision,
                use_calib=self.use_calib,
                use_trt=self.use_trt,
                gpu_multi_stream=self.gpu_multi_stream,
                runtime_thread_num=self.runtime_thread_num,
                batch_infer_size=self.batch_infer_size))

    def prepare_server(self,
                       workdir,
                       port=9393,
                       device="cpu",
                       precision="fp32",
                       use_calib=False,
                       use_lite=False,
                       use_xpu=False,
                       ir_optim=False,
                       thread_num=4,
                       mem_optim=True,
                       use_trt=False,
                       gpu_multi_stream=False,
                       runtime_thread_num=None,
                       batch_infer_size=None,
                       gpuid=None):
        print("This API will be deprecated later. Please do not use it")
        self.workdir = workdir
        self.port = port
        self.thread_num = thread_num
        # self.device is not used at all.
        # device is set by gpuid.
        self.precision = precision
        self.use_calib = use_calib
        self.use_lite = use_lite
        self.use_xpu = use_xpu
        self.ir_optim = ir_optim
        self.mem_optim = mem_optim
        self.port_list = []
        self.use_trt = use_trt
        self.gpu_multi_stream = gpu_multi_stream
        self.runtime_thread_num = runtime_thread_num
        self.batch_infer_size = batch_infer_size

        # record port and pid info for stopping process
        dump_pid_file([self.port], "web_service")
        # if gpuid != None, we will use gpuid first.
        # otherwise, keep the self.gpus unchanged.
        # maybe self.gpus is set by the Function set_gpus.
        if gpuid != None:
            self.gpus = format_gpu_to_strlist(gpuid)
        else:
            pass

        default_port = 12000
        for i in range(1000):
            if port_is_available(default_port + i):
                self.port_list.append(default_port + i)
                break

    def _launch_web_service(self):
        self.client = Client()
        self.client.load_client_config(self.client_config_path)
        endpoints = ""
        endpoints = "127.0.0.1:{}".format(self.port_list[0])
        self.client.connect([endpoints])

    def get_prediction(self, request):
        if not request.json:
            abort(400)
        if "fetch" not in request.json:
            abort(400)
        try:
            feed, fetch, is_batch = self.preprocess(request.json["feed"],
                                                    request.json["fetch"])
            if isinstance(feed, dict) and "fetch" in feed:
                del feed["fetch"]
            if len(feed) == 0:
                raise ValueError("empty input")
            fetch_map = self.client.predict(feed=feed,
                                            fetch=fetch,
                                            batch=is_batch)
            result = self.postprocess(feed=request.json["feed"],
                                      fetch=fetch,
                                      fetch_map=fetch_map)
            result = {"result": result}
        except ValueError as err:
            result = {"result": str(err)}
        return result

    def run_rpc_service(self):
        print("This API will be deprecated later. Please do not use it")
        import socket
        localIP = socket.gethostbyname(socket.gethostname())
        print("web service address:")
        print("http://{}:{}/{}/prediction".format(localIP, self.port,
                                                  self.name))
        server_pros = []
        self.create_rpc_config()
        for i, service in enumerate(self.rpc_service_list):
            p = Process(target=self._launch_rpc_service, args=(i, ))
            server_pros.append(p)
        for p in server_pros:
            p.start()

        app_instance = Flask(__name__)

        @app_instance.before_first_request
        def init():
            self._launch_web_service()

        service_name = "/" + self.name + "/prediction"

        @app_instance.route(service_name, methods=["POST"])
        def run():
            return self.get_prediction(request)

        self.app_instance = app_instance

    # TODO: maybe change another API name: maybe run_local_predictor?
    def run_debugger_service(self, gpu=False):
        print("This API will be deprecated later. Please do not use it")
        import socket
        localIP = socket.gethostbyname(socket.gethostname())
        print("web service address:")
        print("http://{}:{}/{}/prediction".format(localIP, self.port,
                                                  self.name))
        app_instance = Flask(__name__)

        @app_instance.before_first_request
        def init():
            self._launch_local_predictor(gpu)

        service_name = "/" + self.name + "/prediction"

        @app_instance.route(service_name, methods=["POST"])
        def run():
            return self.get_prediction(request)

        self.app_instance = app_instance

    def _launch_local_predictor(self, gpu):
        # actually, LocalPredictor is like a server, but it is WebService Request initiator
        # for WebService it is a Client.
        # local_predictor only support single-Model DirPath - Type:str
        # so the input must be self.server_config_dir_paths[0]
        from paddle_serving_app.local_predict import LocalPredictor
        self.client = LocalPredictor()
        if gpu:
            # if user forget to call function `set_gpus` to set self.gpus.
            # default self.gpus = [0].
            if len(self.gpus) == 0 or self.gpus == ["-1"]:
                self.gpus = ["0"]
            # right now, local Predictor only support 1 card.
            # no matter how many gpu_id is in gpus, we only use the first one.
            gpu_id = (self.gpus[0].split(","))[0]
            self.client.load_model_config(self.server_config_dir_paths[0],
                                          use_gpu=True,
                                          gpu_id=gpu_id)
        else:
            self.client.load_model_config(self.server_config_dir_paths[0],
                                          use_gpu=False)

    def run_web_service(self):
        print("This API will be deprecated later. Please do not use it")
        self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)

    def get_app_instance(self):
        return self.app_instance

    def preprocess(self, feed=[], fetch=[]):
        print("This API will be deprecated later. Please do not use it")
        is_batch = True
        feed_dict = {}
        for var_name in self.feed_vars.keys():
            feed_dict[var_name] = []
        for feed_ins in feed:
            for key in feed_ins:
                feed_dict[key].append(
                    np.array(feed_ins[key]).reshape(
                        list(self.feed_vars[key].shape))[np.newaxis, :])
        feed = {}
        for key in feed_dict:
            feed[key] = np.concatenate(feed_dict[key], axis=0)
        return feed, fetch, is_batch

    def postprocess(self, feed=[], fetch=[], fetch_map=None):
        print("This API will be deprecated later. Please do not use it")
        for key in fetch_map:
            fetch_map[key] = fetch_map[key].tolist()
        return fetch_map