def serve(port, bento=None, with_conda=False, enable_microbatch=False): bento_service_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path ) bento_service = load(bento_service_bundle_path) if with_conda: return run_with_conda_env( bento_service_bundle_path, 'bentoml serve {bento} --port {port} {flags}'.format( bento=bento_service_bundle_path, port=port, flags="--enable-microbatch" if enable_microbatch else "", ), ) if enable_microbatch: from bentoml.marshal.marshal import MarshalService with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( bento_service_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()
def start_dev_server(saved_bundle_path: str, port: int, enable_microbatch: bool, run_with_ngrok: bool): logger.info("Starting BentoML API server in development mode..") from bentoml import load from bentoml.server.api_server import BentoAPIServer from bentoml.marshal.marshal import MarshalService from bentoml.utils import reserve_free_port bento_service = load(saved_bundle_path) if run_with_ngrok: from bentoml.utils.flask_ngrok import start_ngrok from threading import Timer thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()