Ejemplo n.º 1
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento=None,
        with_conda=False,
        enable_microbatch=False,
        microbatch_workers=1,
    ):
        track_cli('serve_gunicorn')
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path)

        if with_conda:
            run_with_conda_env(
                pip_installed_bundle_path,
                'bentoml serve_gunicorn {bento} -p {port} -w {workers} '
                '--timeout {timeout} {flags}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                    flags="--enable-microbatch" if enable_microbatch else "",
                ),
            )
            return

        if workers is None:
            workers = get_gunicorn_num_of_workers()

        from bentoml.server.gunicorn_server import GunicornBentoServer

        if enable_microbatch:
            prometheus_lock = multiprocessing.Lock()
            # avoid load model before gunicorn fork
            with reserve_free_port() as api_server_port:
                marshal_server = GunicornMarshalServer(
                    bundle_path=bento_service_bundle_path,
                    port=port,
                    workers=microbatch_workers,
                    prometheus_lock=prometheus_lock,
                    outbound_host="localhost",
                    outbound_port=api_server_port,
                    outbound_workers=workers,
                )

                gunicorn_app = GunicornBentoServer(
                    bento_service_bundle_path,
                    api_server_port,
                    workers,
                    timeout,
                    prometheus_lock,
                )
            marshal_server.async_run()
            gunicorn_app.run()
        else:
            gunicorn_app = GunicornBentoServer(bento_service_bundle_path, port,
                                               workers, timeout)
            gunicorn_app.run()
Ejemplo n.º 2
0
def start_prod_server(
    saved_bundle_path: str,
    port: int,
    timeout: int,
    workers: int,
    enable_microbatch: bool,
    microbatch_workers: int,
    enable_swagger: bool,
):
    logger.info("Starting BentoML API server in production mode..")

    import psutil
    import multiprocessing

    assert (
        psutil.POSIX
    ), "BentoML API Server production mode only supports POSIX platforms"

    from bentoml.server.gunicorn_server import GunicornBentoServer
    from bentoml.server.marshal_server import GunicornMarshalServer
    from bentoml.server.utils import get_gunicorn_num_of_workers
    from bentoml.utils import reserve_free_port

    if workers is None:
        workers = get_gunicorn_num_of_workers()

    if enable_microbatch:
        prometheus_lock = multiprocessing.Lock()
        # avoid load model before gunicorn fork
        with reserve_free_port() as api_server_port:
            marshal_server = GunicornMarshalServer(
                bundle_path=saved_bundle_path,
                port=port,
                workers=microbatch_workers,
                prometheus_lock=prometheus_lock,
                outbound_host="localhost",
                outbound_port=api_server_port,
                outbound_workers=workers,
            )

            gunicorn_app = GunicornBentoServer(
                saved_bundle_path,
                api_server_port,
                workers,
                timeout,
                prometheus_lock,
                enable_swagger,
            )
        marshal_server.async_run()
        gunicorn_app.run()
    else:
        gunicorn_app = GunicornBentoServer(saved_bundle_path,
                                           port,
                                           workers,
                                           timeout,
                                           enable_swagger=enable_swagger)
        gunicorn_app.run()
Ejemplo n.º 3
0
    def serve_gunicorn(port,
                       workers,
                       timeout,
                       archive_path=archive_path,
                       with_conda=False):
        if with_conda:
            config = load_bentoml_config(archive_path)
            metadata = config['metadata']
            env_name = metadata['service_name'] + '_' + metadata[
                'service_version']
            pip_req = os.path.join(archive_path, 'requirements.txt')

            subprocess.call(
                'command -v conda >/dev/null 2>&1 || {{ echo >&2 "--with-conda '
                'parameter requires conda but it\'s not installed."; exit 1; }} && '
                'conda env update -n {env_name} -f {env_file} && '
                'conda init bash && '
                'eval "$(conda shell.bash hook)" && '
                'conda activate {env_name} && '
                '{{ [ -f {pip_req} ] && pip install -r {pip_req} || echo "no pip '
                'dependencies."; }} &&'
                'bentoml serve_gunicorn {archive_path} -p {port} -w {workers} '
                '--timeout {timeout}'.format(
                    env_name=env_name,
                    env_file=os.path.join(archive_path, 'environment.yml'),
                    archive_path=archive_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                    pip_req=pip_req,
                ),
                shell=True,
            )
            return

        track_cli('serve_gunicorn')

        from bentoml.server.gunicorn_server import GunicornBentoServer

        gunicorn_app = GunicornBentoServer(archive_path, port, workers,
                                           timeout)
        gunicorn_app.run()
Ejemplo n.º 4
0
def _start_prod_server(
    saved_bundle_path: str,
    config: BentoMLConfiguration,
    port: Optional[int] = None,
    prometheus_lock: Optional[multiprocessing.Lock] = None,
):

    logger.info("Starting BentoML API server in production mode..")

    container = BentoMLContainer()
    container.config.from_dict(config.as_dict())

    container.wire(packages=[sys.modules[__name__]])

    from bentoml.server.gunicorn_server import GunicornBentoServer

    if port is None:
        gunicorn_app = GunicornBentoServer(
            saved_bundle_path, prometheus_lock=prometheus_lock,
        )
    else:
        gunicorn_app = GunicornBentoServer(
            saved_bundle_path, port=port, prometheus_lock=prometheus_lock,
        )
    gunicorn_app.run()
Ejemplo n.º 5
0
def _start_prod_server(
    saved_bundle_path: str,
    port: int,
    timeout: int,
    workers: int,
    enable_swagger: bool,
    prometheus_lock: Optional[multiprocessing.Lock] = None,
):

    logger.info("Starting BentoML API server in production mode..")

    from bentoml.server.gunicorn_server import GunicornBentoServer

    gunicorn_app = GunicornBentoServer(
        saved_bundle_path,
        port=port,
        timeout=timeout,
        workers=workers,
        prometheus_lock=prometheus_lock,
        enable_swagger=enable_swagger,
    )
    gunicorn_app.run()
Ejemplo n.º 6
0
    def serve_gunicorn(port, workers, timeout, bento=None, with_conda=False):
        track_cli('serve_gunicorn')
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path)

        if with_conda:
            run_with_conda_env(
                pip_installed_bundle_path,
                'bentoml serve_gunicorn {bento} -p {port} -w {workers} '
                '--timeout {timeout}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                ),
            )
            return

        from bentoml.server.gunicorn_server import GunicornBentoServer

        gunicorn_app = GunicornBentoServer(bento_service_bundle_path, port,
                                           workers, timeout)
        gunicorn_app.run()
Ejemplo n.º 7
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento=None,
        with_conda=False,
        enable_microbatch=False,
        microbatch_workers=1,
    ):
        if not psutil.POSIX:
            _echo(
                "The `bentoml server-gunicon` command is only supported on POSIX. "
                "On windows platform, use `bentoml serve` for local API testing and "
                "docker for running production API endpoint: "
                "https://docs.docker.com/docker-for-windows/ "
            )
            return
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path
        )

        if with_conda:
            return run_with_conda_env(
                pip_installed_bundle_path,
                'bentoml serve_gunicorn {bento} -p {port} -w {workers} '
                '--timeout {timeout} {flags}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    workers=workers,
                    timeout=timeout,
                    flags="--enable-microbatch" if enable_microbatch else "",
                ),
            )

        if workers is None:
            workers = get_gunicorn_num_of_workers()

        # Gunicorn only supports POSIX platforms
        from bentoml.server.gunicorn_server import GunicornBentoServer
        from bentoml.server.marshal_server import GunicornMarshalServer

        if enable_microbatch:
            prometheus_lock = multiprocessing.Lock()
            # avoid load model before gunicorn fork
            with reserve_free_port() as api_server_port:
                marshal_server = GunicornMarshalServer(
                    bundle_path=bento_service_bundle_path,
                    port=port,
                    workers=microbatch_workers,
                    prometheus_lock=prometheus_lock,
                    outbound_host="localhost",
                    outbound_port=api_server_port,
                    outbound_workers=workers,
                )

                gunicorn_app = GunicornBentoServer(
                    bento_service_bundle_path,
                    api_server_port,
                    workers,
                    timeout,
                    prometheus_lock,
                )
            marshal_server.async_run()
            gunicorn_app.run()
        else:
            gunicorn_app = GunicornBentoServer(
                bento_service_bundle_path, port, workers, timeout
            )
            gunicorn_app.run()
Ejemplo n.º 8
0
def start_prod_server(
    saved_bundle_path: str,
    port: int = Provide[BentoMLContainer.config.api_server.port],
    timeout: int = Provide[BentoMLContainer.config.api_server.timeout],
    workers: int = Provide[BentoMLContainer.api_server_workers],
    enable_microbatch: bool = Provide[
        BentoMLContainer.config.api_server.enable_microbatch],
    mb_max_batch_size: int = Provide[
        BentoMLContainer.config.marshal_server.max_batch_size],
    mb_max_latency: int = Provide[
        BentoMLContainer.config.marshal_server.max_latency],
    microbatch_workers: int = Provide[
        BentoMLContainer.config.marshal_server.workers],
    enable_swagger: bool = Provide[
        BentoMLContainer.config.api_server.enable_swagger],
):
    logger.info("Starting BentoML API server in production mode..")

    import multiprocessing

    import psutil

    assert (
        psutil.POSIX
    ), "BentoML API Server production mode only supports POSIX platforms"

    from bentoml.server.gunicorn_server import GunicornBentoServer
    from bentoml.server.marshal_server import GunicornMarshalServer
    from bentoml.utils import reserve_free_port

    if enable_microbatch:
        prometheus_lock = multiprocessing.Lock()
        # avoid load model before gunicorn fork
        with reserve_free_port() as api_server_port:
            marshal_server = GunicornMarshalServer(
                bundle_path=saved_bundle_path,
                port=port,
                workers=microbatch_workers,
                prometheus_lock=prometheus_lock,
                outbound_host="localhost",
                outbound_port=api_server_port,
                outbound_workers=workers,
                mb_max_batch_size=mb_max_batch_size,
                mb_max_latency=mb_max_latency,
            )

            gunicorn_app = GunicornBentoServer(
                saved_bundle_path,
                api_server_port,
                workers,
                timeout,
                prometheus_lock,
                enable_swagger,
            )
        marshal_server.async_run()
        gunicorn_app.run()
    else:
        gunicorn_app = GunicornBentoServer(saved_bundle_path,
                                           port,
                                           workers,
                                           timeout,
                                           enable_swagger=enable_swagger)
        gunicorn_app.run()
Ejemplo n.º 9
0
    def serve_gunicorn(port, workers, timeout, archive_path=archive_path):
        track_cli('serve_gunicorn')

        gunicorn_app = GunicornBentoServer(archive_path, port, workers,
                                           timeout)
        gunicorn_app.run()