async def test_power_of_two_choices(serve_instance): q = PowerOfTwoPolicyQueueActor.remote() enqueue_futures = [] # First, fill the queue for backend-1 with 3 requests await q.set_traffic.remote("svc", {"backend-1": 1.0}) for _ in range(3): future = q.enqueue_request.remote(RequestMetadata("svc", None), "1") enqueue_futures.append(future) # Then, add a new backend, this backend should be filled next await q.set_traffic.remote("svc", {"backend-1": 0.5, "backend-2": 0.5}) for _ in range(2): future = q.enqueue_request.remote(RequestMetadata("svc", None), "2") enqueue_futures.append(future) runner_1, runner_2 = (make_task_runner_mock() for _ in range(2)) for _ in range(3): await q.dequeue_request.remote("backend-1", runner_1) await q.dequeue_request.remote("backend-2", runner_2) await asyncio.gather(*enqueue_futures) assert len(await runner_1.get_all_calls.remote()) == 3 assert len(await runner_2.get_all_calls.remote()) == 2
async def test_alter_backend(serve_instance, task_runner_mock_actor): q = RandomPolicyQueueActor.remote() await q.set_traffic.remote("svc", {"backend-1": 1}) await q.dequeue_request.remote("backend-1", task_runner_mock_actor) await q.enqueue_request.remote(RequestMetadata("svc", None), 1) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.request_args[0] == 1 await q.set_traffic.remote("svc", {"backend-2": 1}) await q.dequeue_request.remote("backend-2", task_runner_mock_actor) await q.enqueue_request.remote(RequestMetadata("svc", None), 2) got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.request_args[0] == 2
async def test_ray_serve_mixin(serve_instance): q = RoundRobinPolicyQueueActor.remote() CONSUMER_NAME = "runner-cls" PRODUCER_NAME = "prod-cls" class MyAdder: def __init__(self, inc): self.increment = inc def __call__(self, flask_request, i=None): return i + self.increment @ray.remote class CustomActor(MyAdder, RayServeMixin): pass runner = CustomActor.remote(3) runner._ray_serve_setup.remote(CONSUMER_NAME, q, runner) runner._ray_serve_fetch.remote() q.link.remote(PRODUCER_NAME, CONSUMER_NAME) for query in [333, 444, 555]: query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python) result = await q.enqueue_request.remote(query_param, i=query) assert result == query + 3
def remote(self, *args, **kwargs): if len(args) != 0: raise RayServeException( "handle.remote must be invoked with keyword arguments.") # create RequestMetadata instance request_in_object = RequestMetadata(self.endpoint_name, TaskContext.Python, self.relative_slo_ms, self.absolute_slo_ms) return self.router_handle.enqueue_request.remote( request_in_object, **kwargs)
async def test_single_prod_cons_queue(serve_instance, task_runner_mock_actor): q = RandomPolicyQueueActor.remote() q.link.remote("svc", "backend") q.dequeue_request.remote("backend", task_runner_mock_actor) # Make sure we get the request result back result = await q.enqueue_request.remote(RequestMetadata("svc", None), 1) assert result == "DONE" # Make sure it's the right request got_work = await task_runner_mock_actor.get_recent_call.remote() assert got_work.request_args[0] == 1 assert got_work.request_kwargs == {}
async def test_split_traffic_random(serve_instance, task_runner_mock_actor): q = RandomPolicyQueueActor.remote() await q.set_traffic.remote("svc", {"backend-1": 0.5, "backend-2": 0.5}) runner_1, runner_2 = [make_task_runner_mock() for _ in range(2)] for _ in range(20): await q.dequeue_request.remote("backend-1", runner_1) await q.dequeue_request.remote("backend-2", runner_2) # assume 50% split, the probability of all 20 requests goes to a # single queue is 0.5^20 ~ 1-6 for _ in range(20): await q.enqueue_request.remote(RequestMetadata("svc", None), 1) got_work = [ await runner.get_recent_call.remote() for runner in (runner_1, runner_2) ] assert [g.request_args[0] for g in got_work] == [1, 1]
async def test_runner_actor(serve_instance): q = RoundRobinPolicyQueueActor.remote() def echo(flask_request, i=None): return i CONSUMER_NAME = "runner" PRODUCER_NAME = "prod" runner = TaskRunnerActor.remote(echo) runner._ray_serve_setup.remote(CONSUMER_NAME, q, runner) runner._ray_serve_fetch.remote() q.link.remote(PRODUCER_NAME, CONSUMER_NAME) for query in [333, 444, 555]: query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python) result = await q.enqueue_request.remote(query_param, i=query) assert result == query
async def test_fixed_packing(serve_instance): packing_num = 4 q = FixedPackingPolicyQueueActor.remote(packing_num=packing_num) await q.set_traffic.remote("svc", {"backend-1": 0.5, "backend-2": 0.5}) runner_1, runner_2 = (make_task_runner_mock() for _ in range(2)) # both the backends will get equal number of queries # as it is packed round robin for _ in range(packing_num): await q.dequeue_request.remote("backend-1", runner_1) await q.dequeue_request.remote("backend-2", runner_2) for backend, runner in zip(["1", "2"], [runner_1, runner_2]): for _ in range(packing_num): input_value = "should-go-to-backend-{}".format(backend) await q.enqueue_request.remote(RequestMetadata("svc", None), input_value) all_calls = await runner.get_all_calls.remote() for call in all_calls: assert call.request_args[0] == input_value
async def test_round_robin(serve_instance, task_runner_mock_actor): q = RoundRobinPolicyQueueActor.remote() await q.set_traffic.remote("svc", {"backend-1": 0.5, "backend-2": 0.5}) runner_1, runner_2 = [make_task_runner_mock() for _ in range(2)] # NOTE: this is the only difference between the # test_split_traffic_random and test_round_robin for _ in range(10): await q.dequeue_request.remote("backend-1", runner_1) await q.dequeue_request.remote("backend-2", runner_2) for _ in range(20): await q.enqueue_request.remote(RequestMetadata("svc", None), 1) got_work = [ await runner.get_recent_call.remote() for runner in (runner_1, runner_2) ] assert [g.request_args[0] for g in got_work] == [1, 1]
async def test_task_runner_check_context(serve_instance): q = RoundRobinPolicyQueueActor.remote() def echo(flask_request, i=None): # Accessing the flask_request without web context should throw. return flask_request.args["i"] CONSUMER_NAME = "runner" PRODUCER_NAME = "producer" runner = TaskRunnerActor.remote(echo) runner._ray_serve_setup.remote(CONSUMER_NAME, q, runner) runner._ray_serve_fetch.remote() q.link.remote(PRODUCER_NAME, CONSUMER_NAME) query_param = RequestMetadata(PRODUCER_NAME, context.TaskContext.Python) result_oid = q.enqueue_request.remote(query_param, i=42) with pytest.raises(ray.exceptions.RayTaskError): await result_oid
async def test_slo(serve_instance, task_runner_mock_actor): q = RandomPolicyQueueActor.remote() await q.link.remote("svc", "backend") all_request_sent = [] for i in range(10): slo_ms = 1000 - 100 * i all_request_sent.append( q.enqueue_request.remote( RequestMetadata("svc", None, relative_slo_ms=slo_ms), i)) for i in range(10): await q.dequeue_request.remote("backend", task_runner_mock_actor) await asyncio.gather(*all_request_sent) i_should_be = 9 all_calls = await task_runner_mock_actor.get_all_calls.remote() all_calls = all_calls[-10:] for call in all_calls: assert call.request_args[0] == i_should_be i_should_be -= 1
async def __call__(self, scope, receive, send): # NOTE: This implements ASGI protocol specified in # https://asgi.readthedocs.io/en/latest/specs/index.html if scope["type"] == "lifespan": await self.handle_lifespan_message(scope, receive, send) return assert scope["type"] == "http" current_path = scope["path"] if current_path == "/": await JSONResponse(self.route_table_cache)(scope, receive, send) return # TODO(simon): Use werkzeug route mapper to support variable path if current_path not in self.route_table_cache: error_message = ("Path {} not found. " "Please ping http://.../ for routing table" ).format(current_path) await JSONResponse({"error": error_message}, status_code=404)(scope, receive, send) return endpoint_name = self.route_table_cache[current_path] http_body_bytes = await self.receive_http_body(scope, receive, send) # get slo_ms before enqueuing the query query_string = scope["query_string"].decode("ascii") query_kwargs = parse_qs(query_string) relative_slo_ms = query_kwargs.pop("relative_slo_ms", None) absolute_slo_ms = query_kwargs.pop("absolute_slo_ms", None) try: relative_slo_ms = self._check_slo_ms(relative_slo_ms) absolute_slo_ms = self._check_slo_ms(absolute_slo_ms) if relative_slo_ms is not None and absolute_slo_ms is not None: raise ValueError("Both relative and absolute slo's" "cannot be specified.") except ValueError as e: await JSONResponse({"error": str(e)})(scope, receive, send) return # create objects necessary for enqueue # enclosing http_body_bytes to list due to # https://github.com/ray-project/ray/issues/6944 # TODO(alind): remove list enclosing after issue is fixed args = (scope, [http_body_bytes]) request_in_object = RequestMetadata(endpoint_name, TaskContext.Web, relative_slo_ms=relative_slo_ms, absolute_slo_ms=absolute_slo_ms) actual_result = await (self.serve_global_state.init_or_get_router( ).enqueue_request.remote(request_in_object, *args)) result = actual_result if isinstance(result, ray.exceptions.RayTaskError): await JSONResponse({ "error": "internal error, please use python API to debug" })(scope, receive, send) else: await JSONResponse({"result": result})(scope, receive, send)