def test_updating_config(serve_instance): class BatchSimple: def __init__(self): self.count = 0 @serve.accept_batch def __call__(self, flask_request, temp=None): batch_size = serve.context.batch_size return [1] * batch_size serve.create_backend( "bsimple:v1", BatchSimple, config={ "max_batch_size": 2, "num_replicas": 3 }) serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple") master_actor = serve.api._get_master_actor() old_replica_tag_list = ray.get( master_actor._list_replicas.remote("bsimple:v1")) serve.update_backend_config("bsimple:v1", {"max_batch_size": 5}) new_replica_tag_list = ray.get( master_actor._list_replicas.remote("bsimple:v1")) new_all_tag_list = [] for worker_dict in ray.get( master_actor.get_all_worker_handles.remote()).values(): new_all_tag_list.extend(list(worker_dict.keys())) # the old and new replica tag list should be identical # and should be subset of all_tag_list assert set(old_replica_tag_list) <= set(new_all_tag_list) assert set(old_replica_tag_list) == set(new_replica_tag_list)
def test_imported_backend(serve_instance): config = BackendConfig(user_config="config") serve.create_backend("imported", "ray.serve.utils.MockImportedBackend", "input_arg", config=config) serve.create_endpoint("imported", backend="imported") # Basic sanity check. handle = serve.get_handle("imported") assert ray.get(handle.remote()) == {"arg": "input_arg", "config": "config"} # Check that updating backend config works. serve.update_backend_config("imported", BackendConfig(user_config="new_config")) assert ray.get(handle.remote()) == { "arg": "input_arg", "config": "new_config" } # Check that other call methods work. handle = handle.options(method_name="other_method") assert ray.get(handle.remote("hello")) == "hello" # Check that functions work as well. serve.create_backend("imported_func", "ray.serve.utils.mock_imported_function") serve.create_endpoint("imported_func", backend="imported_func") handle = serve.get_handle("imported_func") assert ray.get(handle.remote("hello")) == "hello"
def test_scaling_replicas(serve_instance): class Counter: def __init__(self): self.count = 0 def __call__(self, _): self.count += 1 return self.count serve.create_backend("counter:v1", Counter, config={"num_replicas": 2}) serve.create_endpoint("counter", backend="counter:v1", route="/increment") # Keep checking the routing table until /increment is populated while "/increment" not in requests.get( "http://127.0.0.1:8000/-/routes").json(): time.sleep(0.2) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # If the load is shared among two replicas. The max result cannot be 10. assert max(counter_result) < 10 serve.update_backend_config("counter:v1", {"num_replicas": 1}) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # Give some time for a replica to spin down. But majority of the request # should be served by the only remaining replica. assert max(counter_result) - min(counter_result) > 6
def backend_setup(tag: str, worker_args: Tuple, replicas: int, max_batch_size: int) -> None: """ Setups the backend for the distributed explanation task. Parameters ---------- tag A tag for the backend component. The same tag must be passed to `endpoint_setup`. worker_args A tuple containing the arguments for initialising the explainer and fitting it. replicas The number of backend replicas that serve explanations. max_batch_size Maximum number of requests to batch and send to a worker process. """ if max_batch_size == 1: config = {'num_replicas': max(replicas, 1)} serve.create_backend(tag, wrappers.KernelShapModel, *worker_args) else: config = { 'num_replicas': max(replicas, 1), 'max_batch_size': max_batch_size } serve.create_backend(tag, wrappers.BatchKernelShapModel, *worker_args) serve.update_backend_config(tag, config) logging.info(f"Backends: {serve.list_backends()}")
async def main(): ray.init(log_to_driver=False) serve.init() serve.create_backend("backend", backend) serve.create_endpoint("endpoint", backend="backend", route="/api") actors = [Client.remote() for _ in range(NUM_CLIENTS)] for num_replicas in [1, 8]: for backend_config in [ { "max_batch_size": 1, "max_concurrent_queries": 1 }, { "max_batch_size": 1, "max_concurrent_queries": 10000 }, { "max_batch_size": 10000, "max_concurrent_queries": 10000 }, ]: backend_config["num_replicas"] = num_replicas serve.update_backend_config("backend", backend_config) print(repr(backend_config) + ":") async with aiohttp.ClientSession() as session: # TODO(edoakes): large data causes broken pipe errors. for data_size in ["small"]: await trial(actors, session, data_size)
def test_updating_config(serve_instance): class BatchSimple: def __init__(self): self.count = 0 def __call__(self, request): return 1 config = BackendConfig(max_concurrent_queries=2, num_replicas=3) serve.create_backend("bsimple:v1", BatchSimple, config=config) serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple") controller = serve.api._global_client._controller old_replica_tag_list = list( ray.get(controller._all_replica_handles.remote())["bsimple:v1"].keys()) update_config = BackendConfig(max_concurrent_queries=5) serve.update_backend_config("bsimple:v1", update_config) new_replica_tag_list = list( ray.get(controller._all_replica_handles.remote())["bsimple:v1"].keys()) new_all_tag_list = [] for worker_dict in ray.get( controller._all_replica_handles.remote()).values(): new_all_tag_list.extend(list(worker_dict.keys())) # the old and new replica tag list should be identical # and should be subset of all_tag_list assert set(old_replica_tag_list) <= set(new_all_tag_list) assert set(old_replica_tag_list) == set(new_replica_tag_list)
def test_scaling_replicas(serve_instance): class Counter: def __init__(self): self.count = 0 def __call__(self, _): self.count += 1 return self.count config = BackendConfig(num_replicas=2) serve.create_backend("counter:v1", Counter, config=config) serve.create_endpoint("counter", backend="counter:v1", route="/increment") counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # If the load is shared among two replicas. The max result cannot be 10. assert max(counter_result) < 10 update_config = BackendConfig(num_replicas=1) serve.update_backend_config("counter:v1", update_config) counter_result = [] for _ in range(10): resp = requests.get("http://127.0.0.1:8000/increment").json() counter_result.append(resp) # Give some time for a replica to spin down. But majority of the request # should be served by the only remaining replica. assert max(counter_result) - min(counter_result) > 6
def test_backend_user_config(serve_instance): class Counter: def __init__(self): self.count = 10 def __call__(self, starlette_request): return self.count, os.getpid() def reconfigure(self, config): self.count = config["count"] config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2}) serve.create_backend("counter", Counter, config=config) serve.create_endpoint("counter", backend="counter") handle = serve.get_handle("counter") def check(val, num_replicas): pids_seen = set() for i in range(100): result = ray.get(handle.remote()) if str(result[0]) != val: return False pids_seen.add(result[1]) return len(pids_seen) == num_replicas wait_for_condition(lambda: check("123", 2)) serve.update_backend_config("counter", BackendConfig(num_replicas=3)) wait_for_condition(lambda: check("123", 3)) config = BackendConfig(user_config={"count": 456}) serve.update_backend_config("counter", config) wait_for_condition(lambda: check("456", 3))
def test_worker_replica_failure(serve_instance): serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0 serve.init() class Worker: # Assumes that two replicas are started. Will hang forever in the # constructor for any workers that are restarted. def __init__(self, path): self.should_hang = False if not os.path.exists(path): with open(path, "w") as f: f.write("1") else: with open(path, "r") as f: num = int(f.read()) with open(path, "w") as f: if num == 2: self.should_hang = True else: f.write(str(num + 1)) if self.should_hang: while True: pass def __call__(self): pass temp_path = os.path.join(tempfile.gettempdir(), serve.utils.get_random_letters()) serve.create_backend("replica_failure", Worker, temp_path) serve.update_backend_config("replica_failure", BackendConfig(num_replicas=2)) serve.create_endpoint("replica_failure", backend="replica_failure", route="/replica_failure") # Wait until both replicas have been started. responses = set() while len(responses) == 1: responses.add(request_with_retries("/replica_failure", timeout=1).text) time.sleep(0.1) # Kill one of the replicas. handles = _get_worker_handles("replica_failure") assert len(handles) == 2 ray.kill(handles[0], no_restart=False) # Check that the other replica still serves requests. for _ in range(10): while True: try: # The timeout needs to be small here because the request to # the restarting worker will hang. request_with_retries("/replica_failure", timeout=0.1) break except TimeoutError: time.sleep(0.1)
def test_worker_replica_failure(serve_instance): @ray.remote class Counter: def __init__(self): self.count = 0 def inc_and_get(self): self.count += 1 return self.count class Worker: # Assumes that two replicas are started. Will hang forever in the # constructor for any workers that are restarted. def __init__(self, counter): self.should_hang = False self.index = ray.get(counter.inc_and_get.remote()) if self.index > 2: while True: pass def __call__(self, *args): return self.index counter = Counter.remote() serve.create_backend("replica_failure", Worker, counter) serve.update_backend_config("replica_failure", BackendConfig(num_replicas=2)) serve.create_endpoint("replica_failure", backend="replica_failure", route="/replica_failure") # Wait until both replicas have been started. responses = set() start = time.time() while time.time() - start < 30: time.sleep(0.1) response = request_with_retries("/replica_failure", timeout=1).text assert response in ["1", "2"] responses.add(response) if len(responses) > 1: break else: raise TimeoutError("Timed out waiting for replicas after 30s.") # Kill one of the replicas. handles = _get_worker_handles("replica_failure") assert len(handles) == 2 ray.kill(handles[0], no_restart=False) # Check that the other replica still serves requests. for _ in range(10): while True: try: # The timeout needs to be small here because the request to # the restarting worker will hang. request_with_retries("/replica_failure", timeout=0.1) break except TimeoutError: time.sleep(0.1)
# will be serviced by the echo:v1 backend. serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo") print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text) # The service will be reachable from http print(ray.get(serve.get_handle("my_endpoint").remote(response="hello"))) # as well as within the ray system. # We can also add a new backend and split the traffic. def echo_v2(flask_request): # magic, only from web. return "something new" serve.create_backend("echo:v2", echo_v2) # The two backend will now split the traffic 50%-50%. serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) # Observe requests are now split between two backends. for _ in range(10): print(requests.get("http://127.0.0.1:8000/echo").text) time.sleep(0.5) # You can also change number of replicas for each backend independently. serve.update_backend_config("echo:v1", {"num_replicas": 2}) serve.update_backend_config("echo:v2", {"num_replicas": 2})
import ray from ray import serve from ray.serve import BackendConfig ray.init() serve.start() class Threshold: def __init__(self): # self.model won't be changed by reconfigure. self.model = random.Random() # Imagine this is some heavyweight model. def reconfigure(self, config): # This will be called when the class is created and when # the user_config field of BackendConfig is updated. self.threshold = config["threshold"] def __call__(self, request): return self.model.random() > self.threshold backend_config = BackendConfig(user_config={"threshold": 0.01}) serve.create_backend("threshold", Threshold, config=backend_config) serve.create_endpoint("threshold", backend="threshold", route="/threshold") print(requests.get("http://127.0.0.1:8000/threshold").text) # true, probably backend_config = BackendConfig(user_config={"threshold": 0.99}) serve.update_backend_config("threshold", backend_config) print(requests.get("http://127.0.0.1:8000/threshold").text) # false, probably