コード例 #1
0
    def __init__(self, args):
        logger.debug('Create backend {}'.format(args['name']))

        self.name = args['name']

        self.input_tensor_queue = queue.Queue(maxsize=MAX_SESSION_SLOT_NUM)
        self.batched_tensor_queue = queue.Queue(maxsize=MAX_BATCHED_TENSOR_NUM)
        self.output_tensor_queue = queue.Queue(maxsize=MAX_SESSION_SLOT_NUM)

        self.dynamic_batch = args.get('dynamic_batch')
        self.duplicate_num = args.get('duplicate_num')
        self.model_path = args['path']
        self.model_type = args.get('model_type')
        self.max_batch_size = args.get('max_batch_size')
        self.timeout = args.get('timeout')
        self.use_mps = args.get('use_mps')

        self.metric_q = args.get('metric_queue')
        self.tags = {'model': self.name}

        self.threads = {}  # all threads
        self.io_queues = []  # io queue of request handler
        self.io_queue_lock = threading.Lock(
        )  # lock for create request handler

        # input shared memory
        self.input_shm_name_set = [
        ]  # shared memory name for concat and inference
        self.input_shm_set = []  # shared memory for concat and inference
        self.input_shm_queue = mp.Queue(maxsize=3 * self.duplicate_num)

        self.input_info = args.get('input_info')
        # create a set of input shared memory
        for idx in range(3 * self.duplicate_num):
            input_shm_name = []
            input_shm = []
            for info in self.input_info:
                shm_name = gen_name(info['name'], suffix=idx)
                sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
                sh.create_shm()
                input_shm_name.append(shm_name)
                input_shm.append(sh)

            self.input_shm_name_set.append(input_shm_name)
            self.input_shm_set.append(input_shm)
            self.input_shm_queue.put(idx)

        # output shared memory info
        self.output_info = args.get('output_info')

        self.use_mps = False if self.use_mps is None else self.use_mps
        self.dynamic_batch = True if self.dynamic_batch is None else self.dynamic_batch
        self.max_batch_size = 32 if self.max_batch_size is None else self.max_batch_size
        self.duplicate_num = 1 if self.duplicate_num is None else self.duplicate_num

        self.adapt = False
        if self.timeout is None:
            # print('TIMEOUT IS NONE')
            self.timeout = 0.01
            self.adapt = True
コード例 #2
0
        def get_tensor_info_from_session(create):
            tensor_info = conn.recv()
            shm_list = []
            for info in tensor_info:
                sh = ShmHandler(info['shm_name'], info['max_shape'],
                                info['dtype'])
                if create:
                    sh.create_shm()
                else:
                    sh.load_shm()
                shm_list.append(sh)

            conn.send(True)
            return shm_list
コード例 #3
0
ファイル: session.py プロジェクト: YellowOldOdd/SDBI
    def __init__(
        self,
        name: str,
        path: str,
        input_info: list,
        output_info: list,
        dynamic_batch: bool = True,
        duplicate_num: int = 1,
        model_type: str = 'torch',
        max_batch_size: int = 32,
        # timeout : float = 0.003,
        metric: bool = False,
        timeout=None,
    ):
        global inference_context_queue
        assert inference_context_queue
        logger.debug('Session started.')

        # 1. backend params
        self.name = name
        backend_args = {
            'name': name,
            'path': path,
            'dynamic_batch': dynamic_batch,
            'duplicate_num': duplicate_num,
            'model_type': model_type,
            'max_batch_size': max_batch_size,
            'timeout': timeout,
            'input_info': input_info,
            'output_info': output_info,
        }

        # 2. build connect with backend
        self.conn_s, self.conn_c = mp.Pipe()
        inference_context_queue.put((self.conn_c, backend_args))

        stat = self.conn_s.recv()
        assert stat is True

        # 3. share memory with backend
        self.input_shm = []
        self.output_shm = []

        def _shm_info(tensor_infos):
            for info in tensor_infos:
                assert info.get('name')
                assert info.get('max_shape')
                assert info.get('dtype')
                info['shm_name'] = gen_name(info['name'])
                info['shm_size'] = \
                    functools.reduce(operator.mul, info.get('max_shape')) * \
                    np.dtype(info.get('dtype')).itemsize

        _shm_info(input_info)
        for info in input_info:
            sh = ShmHandler(info['shm_name'], info['max_shape'], info['dtype'])
            sh.create_shm()
            self.input_shm.append(sh)
        self.conn_s.send(input_info)
        assert self.conn_s.recv()

        # load output shm
        _shm_info(output_info)
        self.conn_s.send(output_info)
        assert self.conn_s.recv()
        for info in output_info:
            sh = ShmHandler(info['shm_name'], info['max_shape'], info['dtype'])
            sh.load_shm()
            self.output_shm.append(sh)
コード例 #4
0
    def mps_model_handler(self, idx):
        try:
            # 1. create backend model process
            conn_backend, conn_model = mp.Pipe()
            proc = mp.Process(target=model_process,
                              args=(self.name, self.model_type,
                                    self.model_path, self.input_shm_queue,
                                    conn_model, self.input_info,
                                    self.output_info, idx, self.metric_q))
            proc.start()

            # 2. create shared memory
            conn_backend.send(self.input_shm_name_set)

            output_shm_name = conn_backend.recv()
            output_shm = []
            for shn_name, info in zip(output_shm_name, self.output_info):
                sh = ShmHandler(shn_name, info['max_shape'], info['dtype'])
                sh.load_shm()
                output_shm.append(sh)
        except:
            logger.error('mps_model_handler initialize error')
            logger.error(traceback.format_exc())
            return

        def health_check():
            while True:
                sleep(5)
                tag = {'model_handler_name': '{}_{}'.format(self.name, idx)}
                if proc.is_alive():
                    self.emit_metric({'model_handler_health_value': 1},
                                     tag=tag)
                else:
                    self.emit_metric({'model_handler_health_value': 0},
                                     tag=tag)

        health_thread = threading.Thread(target=health_check, daemon=True)
        health_thread.start()

        # 3. inference
        while self.alive:
            start_ts = time()
            try:
                shm_idx, shapes, batch_index, batch_q_ts = \
                    self.batched_tensor_queue.get(timeout=1)
            except queue.Empty:
                continue
            except:
                logger.error('mps_model_handler error')
                logger.error(traceback.format_exc())

            batch_output = []
            try:
                model_start_ts = time()
                conn_backend.send((shm_idx, shapes))
                shapes = conn_backend.recv()
                self.emit_metric(
                    {'backend_forward_model_cost': time() - model_start_ts})

                for shape, sh in zip(shapes, output_shm):
                    shm_arr = sh.ndarray(shape)
                    output_arr = np.empty(shape, shm_arr.dtype)
                    output_arr[:] = shm_arr[:]
                    batch_output.append(output_arr)

                fwd_cost = time() - start_ts
                self.emit_metric({'backend_forward_cost': fwd_cost})

                if self.adapt:
                    delta = fwd_cost / (0.5 +
                                        self.duplicate_num) - self.timeout
                    if abs(delta) / self.timeout > 0.2:
                        self.io_queue_lock.acquire()
                        self.timeout = self.timeout * 0.8 + (self.timeout +
                                                             delta) * 0.2
                        self.io_queue_lock.release()
                        # print('forward cost : {}, timeout : {}'.format(
                        #     fwd_cost, self.timeout
                        # ))

            except:
                logger.error('mps_model_handler error')
                logger.error(traceback.format_exc())
                self.emit_metric({'mps_model_handler_error_counter': 1})

            finally:
                self.output_tensor_queue.put((batch_output, batch_index))

        # 4. clean
        conn_backend.send(EXIT_SIG)
        stat = conn_backend.recv()
        for sh in output_shm:
            sh.close()
        conn_backend.send(True)

        proc.join()
コード例 #5
0
def model_process(model_name, model_type, model_path, shm_queue, conn,
                  input_info, output_info, pid, metric_q):
    try:
        # 1. init model
        if model_type == 'mock':
            from SimpleDBI.mock_model import MockModel
            model = MockModel(model_name, model_path)
        elif model_type == 'torch':
            from SimpleDBI.torch_model import TorchModel
            model = TorchModel(model_name, model_path)
        elif model_type == 'tf':
            from SimpleDBI.tf_model import TFModel
            model = TFModel(model_name, model_path, input_info, output_info)
        elif model_type == 'tensorrt':
            from SimpleDBI.tensorrt_model import TensorRTModel
            model = TensorRTModel(model_name, model_path)
        elif model_type == 'onnx2trt':
            from SimpleDBI.onnx2trt_model import TensorRTModel
            model = TensorRTModel(model_name, model_path)
        else:
            logger.error('ERROR MODEL TYPE : {}'.format(model_type))
            raise RuntimeError('ERROR MODEL TYPE : {}'.format(model_type))

        # 2. create shared memoty
        # 2.1 create output shared memory
        output_shm_name = []
        output_shm = []
        for info in output_info:
            shm_name = gen_name(info['name'])
            sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
            sh.create_shm()
            output_shm_name.append(shm_name)
            output_shm.append(sh)

        # 2.2 load input shared memory
        input_shm_name_list = conn.recv()
        input_shm_list = []
        for input_shm_name in input_shm_name_list:
            input_shm = []
            for shm_name, info in zip(input_shm_name, input_info):
                sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
                sh.load_shm()
                input_shm.append(sh)
            input_shm_list.append(input_shm)

        conn.send(output_shm_name)
    except:
        logger.error('model_process initialize error')
        logger.error(traceback.format_exc())
        return
    logger.info('model_process <{}> initialize done'.format(model_name))

    tags = {'model': '{}_{}'.format(model_name, pid)}
    # 3. inference
    while True:
        value = conn.recv()
        if value == EXIT_SIG:
            break

        shm_idx, input_shapes = value
        inputs = []
        output_shapes = []
        try:
            ts = time()
            # 3.1 load input
            input_shm = input_shm_list[shm_idx]
            for shape, sh in zip(input_shapes, input_shm):
                shm_arr = sh.ndarray(shape)
                inputs.append(shm_arr)

            # 3.2 forward
            outputs = model.forward(*inputs)

            # 3.3 write output
            for output, sh in zip(outputs, output_shm):
                shape = output.shape
                shm_arr = sh.ndarray(shape)
                shm_arr[:] = output[:]
                output_shapes.append(shape)

            if metric_q is not None:
                metric_q.put({
                    "tags": tags,
                    "fields": {
                        'model_proc_cost': time() - ts
                    },
                })

        except:
            logger.error('model_process runtime error')
            logger.error(traceback.format_exc())

        finally:
            conn.send(output_shapes)
            shm_queue.put(shm_idx)  # send shared memory to avalible queue

    # 4. clean
    try:
        for input_shm in input_shm_list:
            for sh in input_shm:
                sh.close()

        conn.send(True)
        stat = conn.recv()
        assert stat
        for sh in output_shm:
            sh.close()

        conn.close()
    except:
        logger.error('model_process destructor error')
        logger.error(traceback.format_exc())

    logger.error('Model process exit.')