Esempio n. 1
0
    def serve(port, bento=None, with_conda=False, enable_microbatch=False):
        track_cli('serve')
        bento_service_bundle_path = resolve_bundle_path(
            bento, pip_installed_bundle_path
        )
        bento_service = load(bento_service_bundle_path)

        if with_conda:
            run_with_conda_env(
                bento_service_bundle_path,
                'bentoml serve {bento} --port {port} {flags}'.format(
                    bento=bento_service_bundle_path,
                    port=port,
                    flags="--enable-microbatch" if enable_microbatch else "",
                ),
            )
            return

        if enable_microbatch:
            with reserve_free_port() as api_server_port:
                # start server right after port released
                #  to reduce potential race
                marshal_server = MarshalService(
                    bento_service_bundle_path,
                    outbound_host="localhost",
                    outbound_port=api_server_port,
                    outbound_workers=1,
                )
                api_server = BentoAPIServer(bento_service, port=api_server_port)
            marshal_server.async_start(port=port)
            api_server.start()
        else:
            api_server = BentoAPIServer(bento_service, port=port)
            api_server.start()
Esempio n. 2
0
 def load(self):
     server = MarshalService(
         self.bento_service_bundle_path,
         self.outbound_host,
         self.outbound_port,
         outbound_workers=self.outbound_workers,
     )
     return server.make_app()
Esempio n. 3
0
class MarshalServer:
    """
    MarshalServer creates a reverse proxy server in front of actual API server,
    implementing the micro batching feature.
    Requests in a short period(mb_max_latency) are collected and sent to API server,
    merged into a single request.
    """

    _DEFAULT_PORT = config("apiserver").getint("default_port")
    _DEFAULT_MAX_LATENCY = config("marshal_server").getint(
        "default_max_latency")
    _DEFAULT_MAX_BATCH_SIZE = config("marshal_server").getint(
        "default_max_batch_size")

    def __init__(self, target_host, target_port, port=_DEFAULT_PORT):
        self.port = port
        self.marshal_app = MarshalService(target_host, target_port)

    def setup_routes_from_pb(self, bento_service_metadata_pb):
        for api_config in bento_service_metadata_pb.apis:
            if api_config.handler_type in HANDLER_TYPES_BATCH_MODE_SUPPORTED:
                handler_config = getattr(api_config, "handler_config", {})
                max_latency = (handler_config["mb_max_latency"]
                               if "mb_max_latency" in handler_config else
                               self._DEFAULT_MAX_LATENCY)
                self.marshal_app.add_batch_handler(
                    api_config.name, max_latency, self._DEFAULT_MAX_BATCH_SIZE)
                marshal_logger.info("Micro batch enabled for API `%s`",
                                    api_config.name)

    def async_start(self):
        """
        Start an micro batch server at the specific port on the instance or parameter.
        """
        track_server('marshal')
        marshal_proc = multiprocessing.Process(
            target=self.marshal_app.fork_start_app,
            kwargs=dict(port=self.port),
            daemon=True,
        )
        # TODO: make sure child process dies when parent process is killed.
        marshal_proc.start()
        marshal_logger.info("Running micro batch service on :%d", self.port)
Esempio n. 4
0
 def __init__(self, target_host, target_port, port=_DEFAULT_PORT):
     self.port = port
     self.marshal_app = MarshalService(target_host, target_port)
Esempio n. 5
0
 def load(self):
     server = MarshalService(
         self.bento_service_bundle_path, self.target_host, self.target_port,
     )
     return server.make_app()