def __init__(self): assert ray.is_initialized() # Delay import due to GlobalState depends on HTTP actor from ray.experimental.serve.global_state import GlobalState self.serve_global_state = GlobalState() self.route_table_cache = dict() self.route_checker_should_shutdown = False
def init(kv_store_connector=None, kv_store_path="/tmp/ray_serve.db", blocking=False, http_host=DEFAULT_HTTP_HOST, http_port=DEFAULT_HTTP_PORT, ray_init_kwargs={"object_store_memory": int(1e8)}, gc_window_seconds=3600): """Initialize a serve cluster. If serve cluster has already initialized, this function will just return. Calling `ray.init` before `serve.init` is optional. When there is not a ray cluster initialized, serve will call `ray.init` with `object_store_memory` requirement. Args: kv_store_connector (callable): Function of (namespace) => TableObject. We will use a SQLite connector that stores to /tmp by default. kv_store_path (str, path): Path to the SQLite table. blocking (bool): If true, the function will wait for the HTTP server to be healthy, and other components to be ready before returns. http_host (str): Host for HTTP server. Default to "0.0.0.0". http_port (int): Port for HTTP server. Default to 8000. ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray connection. Default to {"object_store_memory": int(1e8)} for performance stability reason gc_window_seconds(int): How long will we keep the metric data in memory. Data older than the gc_window will be deleted. The default is 3600 seconds, which is 1 hour. """ global global_state # Noop if global_state is no longer None if global_state is not None: return # Initialize ray if needed. if not ray.is_initialized(): ray.init(**ray_init_kwargs) # Try to get serve nursery if there exists try: ray.experimental.get_actor(SERVE_NURSERY_NAME) global_state = GlobalState() return except ValueError: pass # Serve has not been initialized, perform init sequence # Todo, move the db to session_dir # ray.worker._global_node.address_info["session_dir"] def kv_store_connector(namespace): return SQLiteKVStore(namespace, db_path=kv_store_path) nursery = start_initial_state(kv_store_connector) global_state = GlobalState(nursery) global_state.init_or_get_http_server(host=http_host, port=http_port) global_state.init_or_get_router() global_state.init_or_get_metric_monitor( gc_window_seconds=gc_window_seconds) if blocking: block_until_http_ready("http://{}:{}".format(http_host, http_port))
import inspect import numpy as np import ray from ray.experimental.serve.task_runner import RayServeMixin, TaskRunnerActor from ray.experimental.serve.utils import pformat_color_json, logger from ray.experimental.serve.global_state import GlobalState global_state = GlobalState() def init(blocking=False, object_store_memory=int(1e8)): """Initialize a serve cluster. Calling `ray.init` before `serve.init` is optional. When there is not a ray cluster initialized, serve will call `ray.init` with `object_store_memory` requirement. Args: blocking (bool): If true, the function will wait for the HTTP server to be healthy before returns. object_store_memory (int): Allocated shared memory size in bytes. The default is 100MiB. The default is kept low for latency stability reason. """ if not ray.is_initialized(): ray.init(object_store_memory=object_store_memory) # NOTE(simon): Currently the initialization order is fixed. # HTTP server depends on the API server.
class HTTPProxy: """ This class should be instantiated and ran by ASGI server. >>> import uvicorn >>> uvicorn.run(HTTPProxy(kv_store_actor_handle, router_handle)) # blocks forever """ def __init__(self): assert ray.is_initialized() # Delay import due to GlobalState depends on HTTP actor from ray.experimental.serve.global_state import GlobalState self.serve_global_state = GlobalState() self.route_table_cache = dict() self.route_checker_should_shutdown = False async def route_checker(self, interval): while True: if self.route_checker_should_shutdown: return self.route_table_cache = ( self.serve_global_state.route_table.list_service()) await asyncio.sleep(interval) async def handle_lifespan_message(self, scope, receive, send): assert scope["type"] == "lifespan" message = await receive() if message["type"] == "lifespan.startup": await _async_init() asyncio.ensure_future( self.route_checker(interval=HTTP_ROUTER_CHECKER_INTERVAL_S)) await send({"type": "lifespan.startup.complete"}) elif message["type"] == "lifespan.shutdown": self.route_checker_should_shutdown = True await send({"type": "lifespan.shutdown.complete"}) async def receive_http_body(self, scope, receive, send): body_buffer = [] more_body = True while more_body: message = await receive() assert message["type"] == "http.request" more_body = message["more_body"] body_buffer.append(message["body"]) return b"".join(body_buffer) async def __call__(self, scope, receive, send): # NOTE: This implements ASGI protocol specified in # https://asgi.readthedocs.io/en/latest/specs/index.html if scope["type"] == "lifespan": await self.handle_lifespan_message(scope, receive, send) return assert scope["type"] == "http" current_path = scope["path"] if current_path == "/": await JSONResponse(self.route_table_cache)(scope, receive, send) return # TODO(simon): Use werkzeug route mapper to support variable path if current_path not in self.route_table_cache: error_message = ("Path {} not found. " "Please ping http://.../ for routing table" ).format(current_path) await JSONResponse({"error": error_message}, status_code=404)(scope, receive, send) return endpoint_name = self.route_table_cache[current_path] http_body_bytes = await self.receive_http_body(scope, receive, send) # get slo_ms before enqueuing the query query_string = scope["query_string"].decode("ascii") query_kwargs = parse_qs(query_string) request_slo_ms = query_kwargs.pop("slo_ms", None) if request_slo_ms is not None: try: if len(request_slo_ms) != 1: raise ValueError( "Multiple SLO specified, please specific only one.") request_slo_ms = request_slo_ms[0] request_slo_ms = float(request_slo_ms) if request_slo_ms < 0: raise ValueError( "Request SLO must be positive, it is {}".format( request_slo_ms)) except ValueError as e: await JSONResponse({"error": str(e)})(scope, receive, send) return result_object_id_bytes = await ( self.serve_global_state.init_or_get_router( ).enqueue_request.remote(service=endpoint_name, request_args=(scope, http_body_bytes), request_kwargs=dict(), request_context=TaskContext.Web, request_slo_ms=request_slo_ms)) result = await ray.ObjectID(result_object_id_bytes) if isinstance(result, ray.exceptions.RayTaskError): await JSONResponse({ "error": "internal error, please use python API to debug" })(scope, receive, send) else: await JSONResponse({"result": result})(scope, receive, send)
def init(kv_store_connector=None, kv_store_path=None, blocking=False, start_server=True, http_host=DEFAULT_HTTP_HOST, http_port=DEFAULT_HTTP_PORT, ray_init_kwargs={ "object_store_memory": int(1e8), "num_cpus": max(cpu_count(), 8) }, gc_window_seconds=3600, queueing_policy=RoutePolicy.Random, policy_kwargs={}): """Initialize a serve cluster. If serve cluster has already initialized, this function will just return. Calling `ray.init` before `serve.init` is optional. When there is not a ray cluster initialized, serve will call `ray.init` with `object_store_memory` requirement. Args: kv_store_connector (callable): Function of (namespace) => TableObject. We will use a SQLite connector that stores to /tmp by default. kv_store_path (str, path): Path to the SQLite table. blocking (bool): If true, the function will wait for the HTTP server to be healthy, and other components to be ready before returns. start_server (bool): If true, `serve.init` starts http server. (Default: True) http_host (str): Host for HTTP server. Default to "0.0.0.0". http_port (int): Port for HTTP server. Default to 8000. ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray connection. Default to {"object_store_memory": int(1e8)} for performance stability reason gc_window_seconds(int): How long will we keep the metric data in memory. Data older than the gc_window will be deleted. The default is 3600 seconds, which is 1 hour. queueing_policy(RoutePolicy): Define the queueing policy for selecting the backend for a service. (Default: RoutePolicy.Random) policy_kwargs: Arguments required to instantiate a queueing policy """ global global_state # Noop if global_state is no longer None if global_state is not None: return # Initialize ray if needed. if not ray.is_initialized(): ray.init(**ray_init_kwargs) # Try to get serve nursery if there exists try: ray.experimental.get_actor(SERVE_NURSERY_NAME) global_state = GlobalState() return except ValueError: pass # Register serialization context once ray.register_custom_serializer(Query, Query.ray_serialize, Query.ray_deserialize) if kv_store_path is None: _, kv_store_path = mkstemp() # Serve has not been initialized, perform init sequence # Todo, move the db to session_dir # ray.worker._global_node.address_info["session_dir"] def kv_store_connector(namespace): return SQLiteKVStore(namespace, db_path=kv_store_path) nursery = start_initial_state(kv_store_connector) global_state = GlobalState(nursery) if start_server: global_state.init_or_get_http_server(host=http_host, port=http_port) global_state.init_or_get_router(queueing_policy=queueing_policy, policy_kwargs=policy_kwargs) global_state.init_or_get_metric_monitor( gc_window_seconds=gc_window_seconds) if start_server and blocking: block_until_http_ready("http://{}:{}".format(http_host, http_port))