def serve(port, bento=None, with_conda=False, enable_microbatch=False): bento_service_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path ) bento_service = load(bento_service_bundle_path) if with_conda: return run_with_conda_env( bento_service_bundle_path, 'bentoml serve {bento} --port {port} {flags}'.format( bento=bento_service_bundle_path, port=port, flags="--enable-microbatch" if enable_microbatch else "", ), ) if enable_microbatch: from bentoml.marshal.marshal import MarshalService with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( bento_service_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()
def start_dev_server(saved_bundle_path: str, port: int, enable_microbatch: bool, run_with_ngrok: bool): logger.info("Starting BentoML API server in development mode..") from bentoml import load from bentoml.server.api_server import BentoAPIServer from bentoml.marshal.marshal import MarshalService from bentoml.utils import reserve_free_port bento_service = load(saved_bundle_path) if run_with_ngrok: from bentoml.utils.flask_ngrok import start_ngrok from threading import Timer thread = Timer(1, start_ngrok, args=(port, )) thread.setDaemon(True) thread.start() if enable_microbatch: with reserve_free_port() as api_server_port: # start server right after port released # to reduce potential race marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, ) api_server = BentoAPIServer(bento_service, port=api_server_port) marshal_server.async_start(port=port) api_server.start() else: api_server = BentoAPIServer(bento_service, port=port) api_server.start()
def load(self): server = MarshalService( self.bento_service_bundle_path, self.outbound_host, self.outbound_port, outbound_workers=self.outbound_workers, ) return server.make_app()
def load(self): server = MarshalService( self.bento_service_bundle_path, self.outbound_host, self.outbound_port, outbound_workers=self.outbound_workers, mb_max_batch_size=self.mb_max_batch_size, mb_max_latency=self.mb_max_latency, ) return server.make_app()
def start_dev_batching_server( saved_bundle_path: str, port: int, api_server_port: int, mb_max_batch_size: int, mb_max_latency: int, ): from bentoml.marshal.marshal import MarshalService marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, outbound_workers=1, mb_max_batch_size=mb_max_batch_size, mb_max_latency=mb_max_latency, ) logger.info("Running micro batch service on :%d", port) marshal_server.fork_start_app(port=port)
def _start_dev_proxy( port: int, saved_bundle_path: str, api_server_port: int, enable_microbatch: bool, mb_max_batch_size: int, mb_max_latency: int, ): logger.info("Starting BentoML API proxy in development mode..") from bentoml.marshal.marshal import MarshalService marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, enable_microbatch=enable_microbatch, mb_max_batch_size=mb_max_batch_size, mb_max_latency=mb_max_latency, ) marshal_server.fork_start_app(port=port)
def _start_dev_proxy( saved_bundle_path: str, api_server_port: int, config: BentoMLConfiguration, ): logger.info("Starting BentoML API proxy in development mode..") from bentoml import marshal container = BentoMLContainer() container.config.from_dict(config.as_dict()) container.wire(packages=[marshal]) from bentoml.marshal.marshal import MarshalService marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, ) marshal_server.fork_start_app()