def test_redeploy_single_replica(serve_instance, use_handle): # Tests that redeploying a deployment with a single replica waits for the # replica to completely shut down before starting a new one. client = serve_instance name = "test" @ray.remote def call(block=False): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote(block)) else: ret = requests.get( f"http://*****:*****@serve.deployment(name=name, version="1") class V1: async def handler(self, block: bool): if block: signal = ray.get_actor(signal_name) await signal.wait.remote() return f"1|{os.getpid()}" async def __call__(self, request): return await self.handler(request.query_params["block"] == "True") class V2: async def handler(self, *args): return f"2|{os.getpid()}" async def __call__(self, request): return await self.handler() V1.deploy() ref1 = call.remote(block=False) val1, pid1 = ray.get(ref1) assert val1 == "1" # ref2 will block until the signal is sent. ref2 = call.remote(block=True) assert len(ray.wait([ref2], timeout=0.1)[0]) == 0 # Redeploy new version. This should not go through until the old version # replica completely stops. V2 = V1.options(backend_def=V2, version="2") goal_ref = V2.deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) # It may take some time for the handle change to propagate and requests # to get sent to the new version. Repeatedly send requests until they # start blocking start = time.time() new_version_ref = None while time.time() - start < 30: ready, not_ready = ray.wait([call.remote(block=False)], timeout=0.5) if len(ready) == 1: # If the request doesn't block, it must have been the old version. val, pid = ray.get(ready[0]) assert val == "1" assert pid == pid1 elif len(not_ready) == 1: # If the request blocks, it must have been the new version. new_version_ref = not_ready[0] break else: assert False, "Timed out waiting for new version to be called." # Signal the original call to exit. ray.get(signal.send.remote()) val2, pid2 = ray.get(ref2) assert val2 == "1" assert pid2 == pid1 # Now the goal and request to the new version should complete. assert client._wait_for_goal(goal_ref) new_version_val, new_version_pid = ray.get(new_version_ref) assert new_version_val == "2" assert new_version_pid != pid2
def test_redeploy_multiple_replicas(serve_instance, use_handle): # Tests that redeploying a deployment with multiple replicas performs # a rolling update. client = serve_instance name = "test" @ray.remote(num_cpus=0) def call(block=False): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote(block)) else: ret = requests.get( f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2) class V1: async def handler(self, block: bool): if block: signal = ray.get_actor(signal_name) await signal.wait.remote() return f"1|{os.getpid()}" async def __call__(self, request): return await self.handler(request.query_params["block"] == "True") class V2: async def handler(self, *args): return f"2|{os.getpid()}" async def __call__(self, request): return await self.handler() def make_nonblocking_calls(expected, expect_blocking=False): # Returns dict[val, set(pid)]. blocking = [] responses = defaultdict(set) start = time.time() while time.time() - start < 30: refs = [call.remote(block=False) for _ in range(10)] ready, not_ready = ray.wait(refs, timeout=0.5) for ref in ready: val, pid = ray.get(ref) responses[val].add(pid) for ref in not_ready: blocking.extend(not_ready) if (all( len(responses[val]) == num for val, num in expected.items()) and (expect_blocking is False or len(blocking) > 0)): break else: assert False, f"Timed out, responses: {responses}." return responses, blocking V1.deploy() responses1, _ = make_nonblocking_calls({"1": 2}) pids1 = responses1["1"] # ref2 will block a single replica until the signal is sent. Check that # some requests are now blocking. ref2 = call.remote(block=True) responses2, blocking2 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) assert list(responses2["1"])[0] in pids1 # Redeploy new version. Since there is one replica blocking, only one new # replica should be started up. V2 = V1.options(backend_def=V2, version="2") goal_ref = V2.deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) responses3, blocking3 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) # Signal the original call to exit. ray.get(signal.send.remote()) val, pid = ray.get(ref2) assert val == "1" assert pid in responses1["1"] # Now the goal and requests to the new version should complete. # We should have two running replicas of the new version. assert client._wait_for_goal(goal_ref) make_nonblocking_calls({"2": 2})
def test_reconfigure_multiple_replicas(serve_instance, use_handle): # Tests that updating the user_config with multiple replicas performs a # rolling update. client = serve_instance name = "test" @ray.remote(num_cpus=0) def call(): if use_handle: handle = serve.get_deployment(name).get_handle() ret = ray.get(handle.handler.remote()) else: ret = requests.get(f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2) class V1: def __init__(self): self.config = None async def reconfigure(self, config): # Don't block when the replica is first created. if self.config is not None: signal = ray.get_actor(signal_name) ray.get(signal.wait.remote()) self.config = config async def handler(self): return f"{self.config}|{os.getpid()}" async def __call__(self, request): return await self.handler() def make_nonblocking_calls(expected, expect_blocking=False): # Returns dict[val, set(pid)]. blocking = [] responses = defaultdict(set) start = time.time() while time.time() - start < 30: refs = [call.remote() for _ in range(10)] ready, not_ready = ray.wait(refs, timeout=0.5) for ref in ready: val, pid = ray.get(ref) responses[val].add(pid) for ref in not_ready: blocking.extend(not_ready) if (all( len(responses[val]) == num for val, num in expected.items()) and (expect_blocking is False or len(blocking) > 0)): break time.sleep(0.1) else: assert False, f"Timed out, responses: {responses}." return responses, blocking V1.options(user_config="1").deploy() responses1, _ = make_nonblocking_calls({"1": 2}) pids1 = responses1["1"] # Reconfigure should block one replica until the signal is sent. Check that # some requests are now blocking. goal_ref = V1.options(user_config="2").deploy(_blocking=False) responses2, blocking2 = make_nonblocking_calls( { "1": 1 }, expect_blocking=True) assert list(responses2["1"])[0] in pids1 # Signal reconfigure to finish. Now the goal should complete and both # replicas should have the updated config. ray.get(signal.send.remote()) assert client._wait_for_goal(goal_ref) make_nonblocking_calls({"2": 2})