Esempio n. 1
0
def _start_prod_batching_server(
    saved_bundle_path: str,
    api_server_port: int,
    config: BentoMLConfiguration,
    prometheus_lock: Optional[multiprocessing.Lock] = None,
):

    logger.info("Starting BentoML Batching server in production mode..")

    container = BentoMLContainer()
    container.config.from_dict(config.as_dict())

    from bentoml import marshal
    from bentoml.server.marshal_server import GunicornMarshalServer

    container.wire(packages=[sys.modules[__name__], marshal])

    # avoid load model before gunicorn fork
    marshal_server = GunicornMarshalServer(
        bundle_path=saved_bundle_path,
        prometheus_lock=prometheus_lock,
        outbound_host="localhost",
        outbound_port=api_server_port,
    )
    marshal_server.run()
Esempio n. 2
0
    def serve(
        port,
        bento,
        enable_microbatch,
        mb_max_batch_size,
        mb_max_latency,
        run_with_ngrok,
        yatai_url,
        enable_swagger,
        config,
    ):
        saved_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path, yatai_url
        )

        container = BentoMLContainer()
        config = BentoMLConfiguration(override_config_file=config)
        config.override(["api_server", "port"], port)
        config.override(["api_server", "enable_microbatch"], enable_microbatch)
        config.override(["api_server", "run_with_ngrok"], run_with_ngrok)
        config.override(["api_server", "enable_swagger"], enable_swagger)
        config.override(["marshal_server", "max_batch_size"], mb_max_batch_size)
        config.override(["marshal_server", "max_latency"], mb_max_latency)
        container.config.from_dict(config.as_dict())

        from bentoml import marshal, server

        container.wire(packages=[marshal, server])

        start_dev_server(saved_bundle_path)
Esempio n. 3
0
def _start_prod_server(
    saved_bundle_path: str,
    config: BentoMLConfiguration,
    port: Optional[int] = None,
    prometheus_lock: Optional[multiprocessing.Lock] = None,
):

    logger.info("Starting BentoML API server in production mode..")

    container = BentoMLContainer()
    container.config.from_dict(config.as_dict())

    container.wire(packages=[sys.modules[__name__]])

    from bentoml.server.gunicorn_server import GunicornBentoServer

    if port is None:
        gunicorn_app = GunicornBentoServer(
            saved_bundle_path, prometheus_lock=prometheus_lock,
        )
    else:
        gunicorn_app = GunicornBentoServer(
            saved_bundle_path, port=port, prometheus_lock=prometheus_lock,
        )
    gunicorn_app.run()
Esempio n. 4
0
def inject_dependencies():
    """Inject dependencies and configuration to BentoML packages"""

    from timeit import default_timer as timer

    start = timer()

    logger.debug("Start dependency injection")

    from bentoml.configuration.containers import BentoMLContainer, BentoMLConfiguration

    config_file = get_local_config_file()
    if config_file.endswith(".yml"):
        configuration = BentoMLConfiguration(override_config_file=config_file)
    else:
        configuration = BentoMLConfiguration()

    container = BentoMLContainer()
    container.config.from_dict(configuration.as_dict())

    from bentoml import marshal, server, tracing, cli

    container.wire(packages=[marshal, server, tracing, cli])

    end = timer()

    logger.debug("Dependency injection completed in %.3f seconds", end - start)
Esempio n. 5
0
def inject_dependencies():
    """Inject dependencies and configuration to BentoML packages"""

    from timeit import default_timer as timer

    start = timer()

    logger.debug("Start dependency injection")

    from bentoml.configuration.containers import BentoMLContainer, BentoMLConfiguration

    config_file = get_local_config_file()
    if config_file and config_file.endswith(".yml"):
        configuration = BentoMLConfiguration(override_config_file=config_file)
    else:
        configuration = BentoMLConfiguration()

    container = BentoMLContainer()
    container.config.from_dict(configuration.as_dict())

    from bentoml import (
        marshal,
        server,
        tracing,
        cli,
        adapters,
        saved_bundle,
        service,
    )
    from bentoml.yatai import yatai_service
    from bentoml.yatai import yatai_service_impl
    from bentoml.yatai.repository import s3_repository, gcs_repository

    container.wire(
        modules=[
            yatai_service, s3_repository, gcs_repository, yatai_service_impl
        ],
        packages=[
            marshal, server, tracing, cli, adapters, saved_bundle, service
        ],
    )

    end = timer()

    logger.debug("Dependency injection completed in %.3f seconds", end - start)
Esempio n. 6
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento,
        enable_microbatch,
        mb_max_batch_size,
        mb_max_latency,
        microbatch_workers,
        yatai_url,
        enable_swagger,
        config,
    ):
        if not psutil.POSIX:
            _echo(
                "The `bentoml serve-gunicorn` command is only supported on POSIX. "
                "On windows platform, use `bentoml serve` for local API testing and "
                "docker for running production API endpoint: "
                "https://docs.docker.com/docker-for-windows/ ")
            return
        saved_bundle_path = resolve_bundle_path(bento,
                                                pip_installed_bundle_path,
                                                yatai_url)

        container = BentoMLContainer()
        config = BentoMLConfiguration(override_config_file=config)
        config.override(["api_server", "port"], port)
        config.override(["api_server", "workers"], workers)
        config.override(["api_server", "timeout"], timeout)
        config.override(["api_server", "enable_microbatch"], enable_microbatch)
        config.override(["api_server", "enable_swagger"], enable_swagger)
        config.override(["marshal_server", "max_batch_size"],
                        mb_max_batch_size)
        config.override(["marshal_server", "max_latency"], mb_max_latency)
        config.override(["marshal_server", "workers"], microbatch_workers)
        container.config.from_dict(config.as_dict())

        from bentoml import marshal, server

        container.wire(packages=[marshal, server])

        start_prod_server(saved_bundle_path)
Esempio n. 7
0
    def run(api_name, config, run_args, bento=None):
        container = BentoMLContainer()
        config = BentoMLConfiguration(override_config_file=config)
        container.config.from_dict(config.as_dict())

        from bentoml import tracing

        container.wire(modules=[tracing])

        parser = argparse.ArgumentParser()
        parser.add_argument('--yatai-url', type=str, default=None)
        parsed_args, _ = parser.parse_known_args(run_args)
        yatai_url = parsed_args.yatai_url
        saved_bundle_path = resolve_bundle_path(bento,
                                                pip_installed_bundle_path,
                                                yatai_url)

        api = load_bento_service_api(saved_bundle_path, api_name)
        exit_code = api.handle_cli(run_args)
        sys.exit(exit_code)
Esempio n. 8
0
def _start_dev_server(
    saved_bundle_path: str,
    api_server_port: int,
    config: BentoMLConfiguration,
):

    logger.info("Starting BentoML API server in development mode..")

    from bentoml.saved_bundle import load_from_dir

    bento_service = load_from_dir(saved_bundle_path)

    from bentoml.server.api_server import BentoAPIServer

    container = BentoMLContainer()
    container.config.from_dict(config.as_dict())
    container.wire(packages=[sys.modules[__name__]])

    api_server = BentoAPIServer(bento_service)
    api_server.start(port=api_server_port)
Esempio n. 9
0
def test_api_server_workers():
    container = BentoMLContainer()

    config_auto_workers = tempfile.NamedTemporaryFile(delete=False)
    config_auto_workers.write(b"""
api_server:
  workers: Null
""")
    config_auto_workers.close()

    container.config.from_dict(
        BentoMLConfiguration(
            default_config_file=config_auto_workers.name,
            validate_schema=False,
            legacy_compatibility=False,
        ).as_dict(), )
    os.remove(config_auto_workers.name)
    workers = container.api_server_workers()
    assert workers is not None
    assert workers > 0

    config_manual_workers = tempfile.NamedTemporaryFile(delete=False)
    config_manual_workers.write(b"""
api_server:
  workers: 42
""")
    config_manual_workers.close()

    container.config.from_dict(
        BentoMLConfiguration(
            default_config_file=config_manual_workers.name,
            validate_schema=False,
            legacy_compatibility=False,
        ).as_dict(), )
    os.remove(config_manual_workers.name)
    workers = container.api_server_workers()
    assert workers is not None
    assert workers == 42
Esempio n. 10
0
def _start_dev_proxy(
    saved_bundle_path: str,
    api_server_port: int,
    config: BentoMLConfiguration,
):

    logger.info("Starting BentoML API proxy in development mode..")

    from bentoml import marshal

    container = BentoMLContainer()
    container.config.from_dict(config.as_dict())
    container.wire(packages=[marshal])

    from bentoml.marshal.marshal import MarshalService

    marshal_server = MarshalService(
        saved_bundle_path,
        outbound_host="localhost",
        outbound_port=api_server_port,
    )

    marshal_server.fork_start_app()