def start_dev_server( saved_bundle_path: str, port: int = Provide[BentoMLContainer.config.api_server.port], enable_microbatch: bool = Provide[ BentoMLContainer.config.api_server.enable_microbatch], mb_max_batch_size: int = Provide[ BentoMLContainer.config.marshal_server.max_batch_size], mb_max_latency: int = Provide[ BentoMLContainer.config.marshal_server.max_latency], run_with_ngrok: bool = Provide[ BentoMLContainer.config.api_server.run_with_ngrok], enable_swagger: bool = Provide[ BentoMLContainer.config.api_server.enable_swagger], ): logger.info("Starting BentoML API server in development mode..") import multiprocessing from bentoml.saved_bundle import load_from_dir from bentoml.server.api_server import BentoAPIServer from bentoml.utils import reserve_free_port if run_with_ngrok: from threading import Timer from bentoml.utils.flask_ngrok import start_ngrok thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_proc = multiprocessing.Process( target=start_dev_batching_server, kwargs=dict( api_server_port=api_server_port, saved_bundle_path=saved_bundle_path, port=port, mb_max_latency=mb_max_latency, mb_max_batch_size=mb_max_batch_size, ), daemon=True, ) marshal_proc.start() bento_service = load_from_dir(saved_bundle_path) api_server = BentoAPIServer(bento_service, port=api_server_port, enable_swagger=enable_swagger) api_server.start() else: bento_service = load_from_dir(saved_bundle_path) api_server = BentoAPIServer(bento_service, port=port, enable_swagger=enable_swagger) api_server.start()
def start_dev_server(saved_bundle_path: str, port: int, enable_microbatch: bool, run_with_ngrok: bool): logger.info("Starting BentoML API server in development mode..") from bentoml import load from bentoml.server.api_server import BentoAPIServer from bentoml.marshal.marshal import MarshalService from bentoml.utils import reserve_free_port bento_service = load(saved_bundle_path) if run_with_ngrok: from bentoml.utils.flask_ngrok import start_ngrok from threading import Timer thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()
def _start_dev_server( saved_bundle_path: str, api_server_port: int, enable_swagger: bool, ): logger.info("Starting BentoML API server in development mode..") from bentoml.server.api_server import BentoAPIServer from bentoml.saved_bundle import load_from_dir bento_service = load_from_dir(saved_bundle_path) api_server = BentoAPIServer(bento_service, enable_swagger=enable_swagger) api_server.start(port=api_server_port)
def _start_dev_server( saved_bundle_path: str, api_server_port: int, config: BentoMLConfiguration, ): logger.info("Starting BentoML API server in development mode..") from bentoml.saved_bundle import load_from_dir bento_service = load_from_dir(saved_bundle_path) from bentoml.server.api_server import BentoAPIServer container = BentoMLContainer() container.config.from_dict(config.as_dict()) container.wire(packages=[sys.modules[__name__]]) api_server = BentoAPIServer(bento_service) api_server.start(port=api_server_port)