コード例 #1
0
    def add_batch_handler(self, api_route, max_latency, max_batch_size):
        '''
        Params:
        * max_latency: limit the max latency of overall request handling
        * max_batch_size: limit the max batch size for handler

        ** marshal server will give priority to meet these limits than efficiency
        '''
        from aiohttp.web import HTTPTooManyRequests

        if api_route not in self.batch_handlers:
            dispatcher = CorkDispatcher(
                max_latency,
                max_batch_size,
                shared_sema=self.fetch_sema(),
                fallback=HTTPTooManyRequests,
            )
            _func = dispatcher(
                functools.partial(
                    self._batch_handler_template,
                    api_route=api_route,
                    max_latency=max_latency,
                )
            )
            self.cleanup_tasks.append(dispatcher.shutdown)
            self.batch_handlers[api_route] = _func
コード例 #2
0
async def test_dispatcher_basic(model):
    N = 100
    dispatcher = CorkDispatcher(max_latency_in_ms=5 * 60 * 1000, max_batch_size=1000)

    A, B = model.A, model.B
    wrapped_model = dispatcher(model)

    inputs = [i for i in range(N)]

    with assert_in_time((A + B) * N):
        outputs = await asyncio.gather(*(wrapped_model(i) for i in inputs))
    assert model.n_called < N
    assert all([o == p for p, o in zip(await model(inputs), outputs)])
コード例 #3
0
async def test_dispatcher_overload(model):
    dispatcher = CorkDispatcher(max_latency_in_ms=2000, max_batch_size=1000)

    wrapped_model = dispatcher(model)

    # preheating
    while model.n_called < BENCHMARK.N_PREHEAT_01:
        await asyncio.gather(*(wrapped_model(i) for i in range(5)))

    # check latency
    inputs = tuple(range(3000))
    with assert_in_time(2 * 1.5):
        outputs = await asyncio.gather(*(wrapped_model(i) for i in inputs))
    assert any([p == o for p, o in zip(await model(inputs), outputs)])
コード例 #4
0
ファイル: marshal.py プロジェクト: sysu/BentoML
    def add_batch_handler(self, api_name, max_latency, max_batch_size):
        '''
        Params:
        * max_latency: limit the max latency of overall request handling
        * max_batch_size: limit the max batch size for handler

        ** marshal server will give priority to meet these limits than efficiency
        '''

        if api_name not in self.batch_handlers:
            _func = CorkDispatcher(
                max_latency,
                max_batch_size,
                shared_sema=self.fetch_sema(),
                fallback=aiohttp.web.HTTPTooManyRequests,
            )(partial(self._batch_handler_template, api_name=api_name))
            self.batch_handlers[api_name] = _func
コード例 #5
0
async def test_dispatcher_preheating(model):
    dispatcher = CorkDispatcher(max_latency_in_ms=2000, max_batch_size=1000)

    A, B = model.A, model.B
    wrapped_model = dispatcher(model)

    while model.n_called < BENCHMARK.N_PREHEAT_01 * 2:
        if like(B, dispatcher.optimizer.o_b, 0.99, 0.001):
            break
        await asyncio.gather(*(wrapped_model(i) for i in range(model.n_called % 5 + 1)))
        await asyncio.sleep(0.01)
    assert model.n_called <= BENCHMARK.N_PREHEAT_01

    while model.n_called < BENCHMARK.N_PREHEAT_90 * 2:
        if like(B, dispatcher.optimizer.o_b, 0.1, 0.001) and like(
            A, dispatcher.optimizer.o_a, 0.1, 0.001
        ):
            break
        await asyncio.gather(*(wrapped_model(i) for i in range(model.n_called % 5 + 1)))
        await asyncio.sleep(0.01)
    assert model.n_called <= BENCHMARK.N_PREHEAT_90