def test_redeploy_multiple_replicas(serve_instance, use_handle): # Tests that redeploying a deployment with multiple replicas performs # a rolling update. client = serve_instance name = "test" @ray.remote(num_cpus=0) def call(block=False): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote(block)) else: ret = requests.get( f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2) class V1: async def handler(self, block: bool): if block: signal = ray.get_actor(signal_name) await signal.wait.remote() return f"1|{os.getpid()}" async def __call__(self, request): return await self.handler(request.query_params["block"] == "True") class V2: async def handler(self, *args): return f"2|{os.getpid()}" async def __call__(self, request): return await self.handler() def make_nonblocking_calls(expected, expect_blocking=False): # Returns dict[val, set(pid)]. blocking = [] responses = defaultdict(set) start = time.time() while time.time() - start < 30: refs = [call.remote(block=False) for _ in range(10)] ready, not_ready = ray.wait(refs, timeout=0.5) for ref in ready: val, pid = ray.get(ref) responses[val].add(pid) for ref in not_ready: blocking.extend(not_ready) if (all( len(responses[val]) == num for val, num in expected.items()) and (expect_blocking is False or len(blocking) > 0)): break else: assert False, f"Timed out, responses: {responses}." return responses, blocking V1.deploy() responses1, _ = make_nonblocking_calls({"1": 2}) pids1 = responses1["1"] # ref2 will block a single replica until the signal is sent. Check that # some requests are now blocking. ref2 = call.remote(block=True) responses2, blocking2 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) assert list(responses2["1"])[0] in pids1 # Redeploy new version. Since there is one replica blocking, only one new # replica should be started up. V2 = V1.options(func_or_class=V2, version="2") goal_ref = V2.deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) responses3, blocking3 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) # Signal the original call to exit. ray.get(signal.send.remote()) val, pid = ray.get(ref2) assert val == "1" assert pid in responses1["1"] # Now the goal and requests to the new version should complete. # We should have two running replicas of the new version. assert client._wait_for_goal(goal_ref) make_nonblocking_calls({"2": 2})
def test_dependency_refcounts(ray_start_regular): @ray.remote def one_dep(dep, signal=None, fail=False): if signal is not None: ray.get(signal.wait.remote()) if fail: raise Exception("failed on purpose") @ray.remote def one_dep_large(dep, signal=None): if signal is not None: ray.get(signal.wait.remote()) # This will be spilled to plasma. return np.zeros(10 * 1024 * 1024, dtype=np.uint8) # Test that regular plasma dependency refcounts are decremented once the # task finishes. signal = SignalActor.remote() large_dep = ray.put(np.zeros(10 * 1024 * 1024, dtype=np.uint8)) result = one_dep.remote(large_dep, signal=signal) check_refcounts({large_dep: (1, 1), result: (1, 0)}) ray.get(signal.send.remote()) # Reference count should be removed once the task finishes. check_refcounts({large_dep: (1, 0), result: (1, 0)}) del large_dep, result check_refcounts({}) # Test that inlined dependency refcounts are decremented once they are # inlined. signal = SignalActor.remote() dep = one_dep.remote(None, signal=signal) check_refcounts({dep: (1, 0)}) result = one_dep.remote(dep) check_refcounts({dep: (1, 1), result: (1, 0)}) ray.get(signal.send.remote()) # Reference count should be removed as soon as the dependency is inlined. check_refcounts({dep: (1, 0), result: (1, 0)}) del dep, result check_refcounts({}) # Test that spilled plasma dependency refcounts are decremented once # the task finishes. signal1, signal2 = SignalActor.remote(), SignalActor.remote() dep = one_dep_large.remote(None, signal=signal1) check_refcounts({dep: (1, 0)}) result = one_dep.remote(dep, signal=signal2) check_refcounts({dep: (1, 1), result: (1, 0)}) ray.get(signal1.send.remote()) ray.get(dep, timeout=10) # Reference count should remain because the dependency is in plasma. check_refcounts({dep: (1, 1), result: (1, 0)}) ray.get(signal2.send.remote()) # Reference count should be removed because the task finished. check_refcounts({dep: (1, 0), result: (1, 0)}) del dep, result check_refcounts({}) # Test that regular plasma dependency refcounts are decremented if a task # fails. signal = SignalActor.remote() large_dep = ray.put(np.zeros(10 * 1024 * 1024, dtype=np.uint8)) result = one_dep.remote(large_dep, signal=signal, fail=True) check_refcounts({large_dep: (1, 1), result: (1, 0)}) ray.get(signal.send.remote()) # Reference count should be removed once the task finishes. check_refcounts({large_dep: (1, 0), result: (1, 0)}) del large_dep, result check_refcounts({}) # Test that spilled plasma dependency refcounts are decremented if a task # fails. signal1, signal2 = SignalActor.remote(), SignalActor.remote() dep = one_dep_large.remote(None, signal=signal1) check_refcounts({dep: (1, 0)}) result = one_dep.remote(dep, signal=signal2, fail=True) check_refcounts({dep: (1, 1), result: (1, 0)}) ray.get(signal1.send.remote()) ray.get(dep, timeout=10) # Reference count should remain because the dependency is in plasma. check_refcounts({dep: (1, 1), result: (1, 0)}) ray.get(signal2.send.remote()) # Reference count should be removed because the task finished. check_refcounts({dep: (1, 0), result: (1, 0)}) del dep, result check_refcounts({})
def test_redeploy_single_replica(serve_instance, use_handle): # Tests that redeploying a deployment with a single replica waits for the # replica to completely shut down before starting a new one. client = serve_instance name = "test" @ray.remote def call(block=False): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote(block)) else: ret = requests.get( f"http://*****:*****@serve.deployment(name=name, version="1") class V1: async def handler(self, block: bool): if block: signal = ray.get_actor(signal_name) await signal.wait.remote() return f"1|{os.getpid()}" async def __call__(self, request): return await self.handler(request.query_params["block"] == "True") class V2: async def handler(self, *args): return f"2|{os.getpid()}" async def __call__(self, request): return await self.handler() V1.deploy() ref1 = call.remote(block=False) val1, pid1 = ray.get(ref1) assert val1 == "1" # ref2 will block until the signal is sent. ref2 = call.remote(block=True) assert len(ray.wait([ref2], timeout=0.1)[0]) == 0 # Redeploy new version. This should not go through until the old version # replica completely stops. V2 = V1.options(func_or_class=V2, version="2") goal_ref = V2.deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) # It may take some time for the handle change to propagate and requests # to get sent to the new version. Repeatedly send requests until they # start blocking start = time.time() new_version_ref = None while time.time() - start < 30: ready, not_ready = ray.wait([call.remote(block=False)], timeout=0.5) if len(ready) == 1: # If the request doesn't block, it must have been the old version. val, pid = ray.get(ready[0]) assert val == "1" assert pid == pid1 elif len(not_ready) == 1: # If the request blocks, it must have been the new version. new_version_ref = not_ready[0] break else: assert False, "Timed out waiting for new version to be called." # Signal the original call to exit. ray.get(signal.send.remote()) val2, pid2 = ray.get(ref2) assert val2 == "1" assert pid2 == pid1 # Now the goal and request to the new version should complete. assert client._wait_for_goal(goal_ref) new_version_val, new_version_pid = ray.get(new_version_ref) assert new_version_val == "2" assert new_version_pid != pid2
def test_async_actor_task_retries(ray_start_regular): # https://github.com/ray-project/ray/issues/11683 signal = SignalActor.remote() @ray.remote class DyingActor: def __init__(self): print("DyingActor init called") self.should_exit = False def set_should_exit(self): print("DyingActor.set_should_exit called") self.should_exit = True async def get(self, x, wait=False): print(f"DyingActor.get called with x={x}, wait={wait}") if self.should_exit: os._exit(0) if wait: await signal.wait.remote() return x # Normal in order actor task retries should work dying = DyingActor.options( max_restarts=-1, max_task_retries=-1, ).remote() assert ray.get(dying.get.remote(1)) == 1 ray.get(dying.set_should_exit.remote()) assert ray.get(dying.get.remote(42)) == 42 # Now let's try out of order retries: # Task seqno 0 will return # Task seqno 1 will be pending and retried later # Task seqno 2 will return # Task seqno 3 will crash the actor and retried later dying = DyingActor.options( max_restarts=-1, max_task_retries=-1, ).remote() # seqno 0 ref_0 = dying.get.remote(0) assert ray.get(ref_0) == 0 # seqno 1 ref_1 = dying.get.remote(1, wait=True) # seqno 2 ref_2 = dying.set_should_exit.remote() assert ray.get(ref_2) is None # seqno 3, this will crash the actor because previous task set should exit # to true. ref_3 = dying.get.remote(3) # At this point the actor should be restarted. The two pending tasks # [ref_1, ref_3] should be retried, but not the completed tasks [ref_0, # ref_2]. Critically, if ref_2 was retried, ref_3 can never return. ray.get(signal.send.remote()) assert ray.get(ref_1) == 1 assert ray.get(ref_3) == 3
async def test_replica_set(ray_instance, mock_controller_with_name): signal = SignalActor.remote() @ray.remote(num_cpus=0) class MockWorker: _num_queries = 0 @ray.method(num_returns=2) async def handle_request(self, request): self._num_queries += 1 await signal.wait.remote() return b"", "DONE" async def num_queries(self): return self._num_queries # We will test a scenario with two replicas in the replica set. rs = ReplicaSet( mock_controller_with_name[1], "my_backend", asyncio.get_event_loop(), ) workers = [MockWorker.remote() for _ in range(2)] rs.set_max_concurrent_queries(BackendConfig(max_concurrent_queries=1)) rs.update_worker_replicas(workers) # Send two queries. They should go through the router but blocked by signal # actors. query = Query([], {}, RequestMetadata("request-id", "endpoint")) first_ref = await rs.assign_replica(query) second_ref = await rs.assign_replica(query) # These should be blocked by signal actor. with pytest.raises(ray.exceptions.GetTimeoutError): ray.get([first_ref, second_ref], timeout=1) # Each replica should have exactly one inflight query. Let make sure the # queries arrived there. for worker in workers: while await worker.num_queries.remote() != 1: await asyncio.sleep(1) # Let's try to send another query. third_ref_pending_task = asyncio.get_event_loop().create_task( rs.assign_replica(query)) # We should fail to assign a replica, so this coroutine should still be # pending after some time. await asyncio.sleep(0.2) assert not third_ref_pending_task.done() # Let's unblock the two workers await signal.send.remote() assert await first_ref == "DONE" assert await second_ref == "DONE" # The third request should be unblocked and sent to first worker. # This meas we should be able to get the object ref. third_ref = await third_ref_pending_task # Now we got the object ref, let's get it result. await signal.send.remote() assert await third_ref == "DONE" # Finally, make sure that one of the replica processed the third query. num_queries_set = {(await worker.num_queries.remote()) for worker in workers} assert num_queries_set == {2, 1}
def test_reconfigure_multiple_replicas(serve_instance, use_handle): # Tests that updating the user_config with multiple replicas performs a # rolling update. client = serve_instance name = "test" @ray.remote(num_cpus=0) def call(): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote()) else: ret = requests.get(f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2) class V1: def __init__(self): self.config = None async def reconfigure(self, config): # Don't block when the replica is first created. if self.config is not None: signal = ray.get_actor(signal_name) ray.get(signal.wait.remote()) self.config = config async def handler(self): return f"{self.config}|{os.getpid()}" async def __call__(self, request): return await self.handler() def make_nonblocking_calls(expected, expect_blocking=False): # Returns dict[val, set(pid)]. blocking = [] responses = defaultdict(set) start = time.time() while time.time() - start < 30: refs = [call.remote() for _ in range(10)] ready, not_ready = ray.wait(refs, timeout=0.5) for ref in ready: val, pid = ray.get(ref) responses[val].add(pid) for ref in not_ready: blocking.extend(not_ready) if (all( len(responses[val]) == num for val, num in expected.items()) and (expect_blocking is False or len(blocking) > 0)): break time.sleep(0.1) else: assert False, f"Timed out, responses: {responses}." return responses, blocking V1.options(user_config="1").deploy() responses1, _ = make_nonblocking_calls({"1": 2}) pids1 = responses1["1"] # Reconfigure should block one replica until the signal is sent. Check that # some requests are now blocking. goal_ref = V1.options(user_config="2").deploy(_blocking=False) responses2, blocking2 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) assert list(responses2["1"])[0] in pids1 # Signal reconfigure to finish. Now the goal should complete and both # replicas should have the updated config. ray.get(signal.send.remote()) assert client._wait_for_goal(goal_ref) make_nonblocking_calls({"2": 2})
async def test_router_use_max_concurrency(serve_instance): # The VisibleRouter::get_queues method needs to pickle queries # so we register serializer here. In regular code path, query # serialization is done by Serve manually for performance. ray.register_custom_serializer(Query, Query.ray_serialize, Query.ray_deserialize) signal = SignalActor.remote() @ray.remote class MockWorker: async def handle_request(self, request): await signal.wait.remote() return "DONE" def ready(self): pass class VisibleRouter(Router): def get_queues(self): return self.queries_counter, self.backend_queues worker = MockWorker.remote() q = ray.remote(VisibleRouter).remote() await q.setup.remote("") backend_name = "max-concurrent-test" config = BackendConfig({"max_concurrent_queries": 1}) await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0})) await q.add_new_worker.remote(backend_name, "replica-tag", worker) await q.set_backend_config.remote(backend_name, config) # We send over two queries first_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1) second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1) # Neither queries should be available with pytest.raises(ray.exceptions.RayTimeoutError): ray.get([first_query, second_query], timeout=0.2) # Let's retrieve the router internal state queries_counter, backend_queues = await q.get_queues.remote() # There should be just one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # The second query is buffered assert len(backend_queues["max-concurrent-test"]) == 1 # Let's unblock the first query await signal.send.remote(clear=True) assert await first_query == "DONE" # The internal state of router should have changed. queries_counter, backend_queues = await q.get_queues.remote() # There should still be one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # But there shouldn't be any queries in the queue assert len(backend_queues["max-concurrent-test"]) == 0 # Unblocking the second query await signal.send.remote(clear=True) assert await second_query == "DONE" # Checking the internal state of the router one more time queries_counter, backend_queues = await q.get_queues.remote() assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 0 assert len(backend_queues["max-concurrent-test"]) == 0
async def test_router_use_max_concurrency(serve_instance): signal = SignalActor.remote() @ray.remote class MockWorker: async def handle_request(self, request): await signal.wait.remote() return "DONE" def ready(self): pass class VisibleRouter(Router): def get_queues(self): return self.queries_counter, self.backend_queues worker = MockWorker.remote() q = ray.remote(VisibleRouter).remote() await q.setup.remote("", serve_instance._controller_name) backend_name = "max-concurrent-test" config = BackendConfig(max_concurrent_queries=1) await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0})) await q.add_new_worker.remote(backend_name, "replica-tag", worker) await q.set_backend_config.remote(backend_name, config) # We send over two queries first_query = q.enqueue_request.remote( RequestMetadata(get_random_letters(10), "svc", None), 1) second_query = q.enqueue_request.remote( RequestMetadata(get_random_letters(10), "svc", None), 1) # Neither queries should be available with pytest.raises(ray.exceptions.GetTimeoutError): ray.get([first_query, second_query], timeout=0.2) # Let's retrieve the router internal state queries_counter, backend_queues = await q.get_queues.remote() # There should be just one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # The second query is buffered assert len(backend_queues["max-concurrent-test"]) == 1 # Let's unblock the first query await signal.send.remote(clear=True) assert await first_query == "DONE" # The internal state of router should have changed. queries_counter, backend_queues = await q.get_queues.remote() # There should still be one inflight request assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 1 # But there shouldn't be any queries in the queue assert len(backend_queues["max-concurrent-test"]) == 0 # Unblocking the second query await signal.send.remote(clear=True) assert await second_query == "DONE" # Checking the internal state of the router one more time queries_counter, backend_queues = await q.get_queues.remote() assert queries_counter[backend_name][ "max-concurrent-test:replica-tag"] == 0 assert len(backend_queues["max-concurrent-test"]) == 0