Example #1
0
    def serve(
        port,
        bento,
        enable_microbatch,
        mb_max_batch_size,
        mb_max_latency,
        run_with_ngrok,
        yatai_url,
        enable_swagger,
        config,
    ):
        saved_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path, yatai_url
        )

        container = BentoMLContainer()
        config = BentoMLConfiguration(override_config_file=config)
        config.override(["api_server", "port"], port)
        config.override(["api_server", "enable_microbatch"], enable_microbatch)
        config.override(["api_server", "run_with_ngrok"], run_with_ngrok)
        config.override(["api_server", "enable_swagger"], enable_swagger)
        config.override(["marshal_server", "max_batch_size"], mb_max_batch_size)
        config.override(["marshal_server", "max_latency"], mb_max_latency)
        container.config.from_dict(config.as_dict())

        from bentoml import marshal, server

        container.wire(packages=[marshal, server])

        start_dev_server(saved_bundle_path)
Example #2
0
def start_dev_server(
    bundle_path: str,
    port: Optional[int] = None,
    enable_microbatch: Optional[bool] = None,
    mb_max_batch_size: Optional[int] = None,
    mb_max_latency: Optional[int] = None,
    run_with_ngrok: Optional[bool] = None,
    enable_swagger: Optional[bool] = None,
    config_file: Optional[str] = None,
):
    config = BentoMLConfiguration(override_config_file=config_file)
    config.override(["api_server", "port"], port)
    config.override(["api_server", "enable_microbatch"], enable_microbatch)
    config.override(["api_server", "enable_swagger"], enable_swagger)
    config.override(["marshal_server", "max_batch_size"], mb_max_batch_size)
    config.override(["marshal_server", "max_latency"], mb_max_latency)

    if run_with_ngrok:
        from threading import Timer

        from bentoml.utils.flask_ngrok import start_ngrok

        thread = Timer(1, start_ngrok, args=(port, ))
        thread.setDaemon(True)
        thread.start()

    with reserve_free_port() as api_server_port:
        # start server right after port released
        #  to reduce potential race

        model_server_proc = multiprocessing.Process(
            target=_start_dev_server,
            kwargs=dict(
                api_server_port=api_server_port,
                saved_bundle_path=bundle_path,
                config=config,
            ),
            daemon=True,
        )
    model_server_proc.start()

    try:
        _start_dev_proxy(
            api_server_port=api_server_port,
            saved_bundle_path=bundle_path,
            config=config,
        )
    finally:
        model_server_proc.terminate()
Example #3
0
def start_prod_server(
    saved_bundle_path: str,
    port: Optional[int] = None,
    workers: Optional[int] = None,
    timeout: Optional[int] = None,
    enable_microbatch: Optional[bool] = None,
    enable_swagger: Optional[bool] = None,
    mb_max_batch_size: Optional[int] = None,
    mb_max_latency: Optional[int] = None,
    microbatch_workers: Optional[int] = None,
    config_file: Optional[str] = None,
):
    import psutil

    assert (
        psutil.POSIX
    ), "BentoML API Server production mode only supports POSIX platforms"

    config = BentoMLConfiguration(override_config_file=config_file)
    config.override(["api_server", "port"], port)
    config.override(["api_server", "workers"], workers)
    config.override(["api_server", "timeout"], timeout)
    config.override(["api_server", "enable_microbatch"], enable_microbatch)
    config.override(["api_server", "enable_swagger"], enable_swagger)
    config.override(["marshal_server", "max_batch_size"], mb_max_batch_size)
    config.override(["marshal_server", "max_latency"], mb_max_latency)
    config.override(["marshal_server", "workers"], microbatch_workers)

    if config.config['api_server'].get('enable_microbatch'):
        prometheus_lock = multiprocessing.Lock()
        with reserve_free_port() as api_server_port:
            pass

        model_server_job = multiprocessing.Process(
            target=_start_prod_server,
            kwargs=dict(
                saved_bundle_path=saved_bundle_path,
                port=api_server_port,
                config=config,
                prometheus_lock=prometheus_lock,
            ),
            daemon=True,
        )
        model_server_job.start()

        try:
            _start_prod_batching_server(
                saved_bundle_path=saved_bundle_path,
                config=config,
                api_server_port=api_server_port,
                prometheus_lock=prometheus_lock,
            )
        finally:
            model_server_job.terminate()
    else:
        _start_prod_server(saved_bundle_path=saved_bundle_path, config=config)
Example #4
0
    def serve_gunicorn(
        port,
        workers,
        timeout,
        bento,
        enable_microbatch,
        mb_max_batch_size,
        mb_max_latency,
        microbatch_workers,
        yatai_url,
        enable_swagger,
        config,
    ):
        if not psutil.POSIX:
            _echo(
                "The `bentoml serve-gunicorn` command is only supported on POSIX. "
                "On windows platform, use `bentoml serve` for local API testing and "
                "docker for running production API endpoint: "
                "https://docs.docker.com/docker-for-windows/ ")
            return
        saved_bundle_path = resolve_bundle_path(bento,
                                                pip_installed_bundle_path,
                                                yatai_url)

        container = BentoMLContainer()
        config = BentoMLConfiguration(override_config_file=config)
        config.override(["api_server", "port"], port)
        config.override(["api_server", "workers"], workers)
        config.override(["api_server", "timeout"], timeout)
        config.override(["api_server", "enable_microbatch"], enable_microbatch)
        config.override(["api_server", "enable_swagger"], enable_swagger)
        config.override(["marshal_server", "max_batch_size"],
                        mb_max_batch_size)
        config.override(["marshal_server", "max_latency"], mb_max_latency)
        config.override(["marshal_server", "workers"], microbatch_workers)
        container.config.from_dict(config.as_dict())

        from bentoml import marshal, server

        container.wire(packages=[marshal, server])

        start_prod_server(saved_bundle_path)
Example #5
0
def test_override():
    config = BentoMLConfiguration(legacy_compatibility=False)
    config.override(["api_server", "port"], 6000)
    config_dict = config.as_dict()
    assert config_dict is not None
    assert config_dict["api_server"]["port"] == 6000
Example #6
0
def test_override_empty_key():
    config = BentoMLConfiguration(legacy_compatibility=False)
    with pytest.raises(BentoMLConfigException) as e:
        config.override([], 6000)
    assert e is not None
Example #7
0
def test_override_nonexist_key():
    config = BentoMLConfiguration()
    with pytest.raises(BentoMLConfigException) as e:
        config.override(["non-existent", "non-existent"], 6000)
    assert e is not None
Example #8
0
def test_override_schema_violation():
    config = BentoMLConfiguration()
    with pytest.raises(BentoMLConfigException) as e:
        config.override(["api_server", "port"], "non-integer")
    assert e is not None
Example #9
0
def test_override():
    config = BentoMLConfiguration()
    config.override(["bento_server", "port"], 6000)
    config_dict = config.as_dict()
    assert config_dict is not None
    assert config_dict["bento_server"]["port"] == 6000
Example #10
0
def test_override_empty_key():
    config = BentoMLConfiguration()
    with pytest.raises(BentoMLConfigException) as e:
        config.override([], 6000)
    assert e is not None
Example #11
0
def test_override_none_value():
    config = BentoMLConfiguration(legacy_compatibility=False)
    config.override(["bento_server", "port"], None)
    config_dict = config.as_dict()
    assert config_dict is not None
    assert config_dict["bento_server"]["port"] == 5000