Esempio n. 1
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento=None,
        with_conda=False,
        enable_microbatch=False,
        microbatch_workers=1,
    ):
        track_cli('serve_gunicorn')
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path)

        if with_conda:
            run_with_conda_env(
                pip_installed_bundle_path,
                'bentoml serve_gunicorn {bento} -p {port} -w {workers} '
                '--timeout {timeout} {flags}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                    flags="--enable-microbatch" if enable_microbatch else "",
                ),
            )
            return

        if workers is None:
            workers = get_gunicorn_num_of_workers()

        from bentoml.server.gunicorn_server import GunicornBentoServer

        if enable_microbatch:
            prometheus_lock = multiprocessing.Lock()
            # avoid load model before gunicorn fork
            with reserve_free_port() as api_server_port:
                marshal_server = GunicornMarshalServer(
                    bundle_path=bento_service_bundle_path,
                    port=port,
                    workers=microbatch_workers,
                    prometheus_lock=prometheus_lock,
                    outbound_host="localhost",
                    outbound_port=api_server_port,
                    outbound_workers=workers,
                )

                gunicorn_app = GunicornBentoServer(
                    bento_service_bundle_path,
                    api_server_port,
                    workers,
                    timeout,
                    prometheus_lock,
                )
            marshal_server.async_run()
            gunicorn_app.run()
        else:
            gunicorn_app = GunicornBentoServer(bento_service_bundle_path, port,
                                               workers, timeout)
            gunicorn_app.run()
Esempio n. 2
0
def start_prod_server(
    saved_bundle_path: str,
    port: int,
    timeout: int,
    workers: int,
    enable_microbatch: bool,
    microbatch_workers: int,
    enable_swagger: bool,
):
    logger.info("Starting BentoML API server in production mode..")

    import psutil
    import multiprocessing

    assert (
        psutil.POSIX
    ), "BentoML API Server production mode only supports POSIX platforms"

    from bentoml.server.gunicorn_server import GunicornBentoServer
    from bentoml.server.marshal_server import GunicornMarshalServer
    from bentoml.server.utils import get_gunicorn_num_of_workers
    from bentoml.utils import reserve_free_port

    if workers is None:
        workers = get_gunicorn_num_of_workers()

    if enable_microbatch:
        prometheus_lock = multiprocessing.Lock()
        # avoid load model before gunicorn fork
        with reserve_free_port() as api_server_port:
            marshal_server = GunicornMarshalServer(
                bundle_path=saved_bundle_path,
                port=port,
                workers=microbatch_workers,
                prometheus_lock=prometheus_lock,
                outbound_host="localhost",
                outbound_port=api_server_port,
                outbound_workers=workers,
            )

            gunicorn_app = GunicornBentoServer(
                saved_bundle_path,
                api_server_port,
                workers,
                timeout,
                prometheus_lock,
                enable_swagger,
            )
        marshal_server.async_run()
        gunicorn_app.run()
    else:
        gunicorn_app = GunicornBentoServer(saved_bundle_path,
                                           port,
                                           workers,
                                           timeout,
                                           enable_swagger=enable_swagger)
        gunicorn_app.run()
Esempio n. 3
0
# This implement the sagemaker serving service shell.  It starts nginx and gunicorn.
# Parameter               Env Var                      Default Value
# number of workers       BENTO_SERVER_TIMEOUT         60s
# timeout                 GUNICORN_WORKER_COUNT        number of cpu cores / 2 + 1
# api name                API_NAME                     None

import subprocess
import os
import signal
import sys

from bentoml.server.utils import get_gunicorn_num_of_workers

bento_server_timeout = os.environ.get('BENTOML_GUNICORN_TIMEOUT', 60)
bento_server_workers = int(
    os.environ.get('BENTOML_GUNICORN_NUM_OF_WORKERS', get_gunicorn_num_of_workers())
)


def sigterm_handler(nginx_pid, gunicorn_pid):
    try:
        os.kill(nginx_pid, signal.SIGQUIT)
    except OSError:
        pass
    try:
        os.kill(gunicorn_pid, signal.SIGTERM)
    except OSError:
        pass

    sys.exit(0)
Esempio n. 4
0
# This implement the sagemaker serving service shell.  It starts nginx and gunicorn.
# Parameter               Env Var                      Default Value
# number of workers       BENTO_SERVER_TIMEOUT         60s
# timeout                 GUNICORN_WORKER_COUNT        number of cpu cores / 2 + 1
# api name                API_NAME                     None

import subprocess
import os
import signal
import sys

from bentoml.server.utils import get_gunicorn_num_of_workers

bento_server_timeout = os.environ.get('BENTO_SERVER_TIMEOUT', 60)
bento_server_workers = int(
    os.environ.get('GUNICORN_WORKER_COUNT', get_gunicorn_num_of_workers())
)


def sigterm_handler(nginx_pid, gunicorn_pid):
    try:
        os.kill(nginx_pid, signal.SIGQUIT)
    except OSError:
        pass
    try:
        os.kill(gunicorn_pid, signal.SIGTERM)
    except OSError:
        pass

    sys.exit(0)
Esempio n. 5
0
import atexit

from bentoml.server.utils import get_gunicorn_num_of_workers
from bentoml.utils.usage_stats import track_server_stop

workers = get_gunicorn_num_of_workers()


def worker_exit(server, worker):  # pylint: disable=unused-argument
    from prometheus_client import multiprocess

    multiprocess.mark_process_dead(worker.pid)


def post_fork(server, worker):
    server.log.debug("Worker spawned (pid: %s)", worker.pid)


def pre_fork(server, worker):  # pylint: disable=unused-argument
    pass


def pre_exec(server):
    server.log.debug("Forked child, re-executing.")


def when_ready(server):
    server.log.debug("Server is ready. Spawning workers")


def worker_int(worker):
Esempio n. 6
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento=None,
        with_conda=False,
        enable_microbatch=False,
        microbatch_workers=1,
    ):
        if not psutil.POSIX:
            _echo(
                "The `bentoml server-gunicon` command is only supported on POSIX. "
                "On windows platform, use `bentoml serve` for local API testing and "
                "docker for running production API endpoint: "
                "https://docs.docker.com/docker-for-windows/ "
            )
            return
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path
        )

        if with_conda:
            return run_with_conda_env(
                pip_installed_bundle_path,
                'bentoml serve_gunicorn {bento} -p {port} -w {workers} '
                '--timeout {timeout} {flags}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                    flags="--enable-microbatch" if enable_microbatch else "",
                ),
            )

        if workers is None:
            workers = get_gunicorn_num_of_workers()

        # Gunicorn only supports POSIX platforms
        from bentoml.server.gunicorn_server import GunicornBentoServer
        from bentoml.server.marshal_server import GunicornMarshalServer

        if enable_microbatch:
            prometheus_lock = multiprocessing.Lock()
            # avoid load model before gunicorn fork
            with reserve_free_port() as api_server_port:
                marshal_server = GunicornMarshalServer(
                    bundle_path=bento_service_bundle_path,
                    port=port,
                    workers=microbatch_workers,
                    prometheus_lock=prometheus_lock,
                    outbound_host="localhost",
                    outbound_port=api_server_port,
                    outbound_workers=workers,
                )

                gunicorn_app = GunicornBentoServer(
                    bento_service_bundle_path,
                    api_server_port,
                    workers,
                    timeout,
                    prometheus_lock,
                )
            marshal_server.async_run()
            gunicorn_app.run()
        else:
            gunicorn_app = GunicornBentoServer(
                bento_service_bundle_path, port, workers, timeout
            )
            gunicorn_app.run()