def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False): print("Serving checkpoint: {}".format(checkpoint)) checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint, config, metrics) serve.init() backend_name = "mnist:day_{}".format(day) serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config, metrics, gpu) if "mnist" not in serve.list_endpoints(): # First time we serve a model - create endpoint serve.create_endpoint("mnist", backend=backend_name, route="/mnist", methods=["POST"]) else: # The endpoint already exists, route all traffic to the new model # Here you could also implement an incremental rollout, where only # a part of the traffic is sent to the new backend and the # rest is sent to the existing backends. serve.set_traffic("mnist", {backend_name: 1.0}) # Delete previous existing backends for existing_backend in serve.list_backends(): if existing_backend.startswith("mnist:day") and \ existing_backend != backend_name: serve.delete_backend(existing_backend) return True
def test_list_backends(serve_instance): @serve.accept_batch def f(): pass config1 = BackendConfig(max_batch_size=10) serve.create_backend("backend", f, config=config1) backends = serve.list_backends() assert len(backends) == 1 assert "backend" in backends assert backends["backend"].max_batch_size == 10 config2 = BackendConfig(num_replicas=10) serve.create_backend("backend2", f, config=config2) backends = serve.list_backends() assert len(backends) == 2 assert backends["backend2"].num_replicas == 10 serve.delete_backend("backend") backends = serve.list_backends() assert len(backends) == 1 assert "backend2" in backends serve.delete_backend("backend2") assert len(serve.list_backends()) == 0
def test_list_backends(serve_instance): serve.init() @serve.accept_batch def f(): pass serve.create_backend("backend", f, config={"max_batch_size": 10}) backends = serve.list_backends() assert len(backends) == 1 assert "backend" in backends assert backends["backend"]["max_batch_size"] == 10 serve.create_backend("backend2", f, config={"num_replicas": 10}) backends = serve.list_backends() assert len(backends) == 2 assert backends["backend2"]["num_replicas"] == 10 serve.delete_backend("backend") backends = serve.list_backends() assert len(backends) == 1 assert "backend2" in backends serve.delete_backend("backend2") assert len(serve.list_backends()) == 0
def serve_instance(_shared_serve_instance): yield _shared_serve_instance controller = serve.api._global_client._controller # Clear all state between tests to avoid naming collisions. for endpoint in ray.get(controller.get_all_endpoints.remote()): serve.delete_endpoint(endpoint) for backend in ray.get(controller.get_all_backends.remote()).keys(): serve.delete_backend(backend, force=True)
def serve_instance(_shared_serve_instance): yield master = serve.api._get_master_actor() # Clear all state between tests to avoid naming collisions. for endpoint in retry_actor_failures(master.get_all_endpoints): serve.delete_endpoint(endpoint) for backend in retry_actor_failures(master.get_all_backends): serve.delete_backend(backend)
def serve_instance(_shared_serve_instance): serve.init() yield master = serve.api._get_master_actor() # Clear all state between tests to avoid naming collisions. for endpoint in ray.get(master.get_all_endpoints.remote()): serve.delete_endpoint(endpoint) for backend in ray.get(master.get_all_backends.remote()): serve.delete_backend(backend)
def serve_instance(_shared_serve_instance): serve.init() yield # Re-init if necessary. serve.init() controller = serve.api._get_controller() # Clear all state between tests to avoid naming collisions. for endpoint in ray.get(controller.get_all_endpoints.remote()): serve.delete_endpoint(endpoint) for backend in ray.get(controller.get_all_backends.remote()): serve.delete_backend(backend)
def create_endpoint(self): if len(self.endpoints) == self.max_endpoints: endpoint_to_delete = self.endpoints.pop() serve.delete_endpoint(endpoint_to_delete) serve.delete_backend(endpoint_to_delete) new_endpoint = "".join( [random.choice(string.ascii_letters) for _ in range(10)]) def handler(self, *args): return new_endpoint serve.create_backend(new_endpoint, handler) serve.create_endpoint(new_endpoint, "/" + new_endpoint) serve.set_traffic(new_endpoint, {new_endpoint: 1.0}) self.endpoints.append(new_endpoint)
def test_serve_forceful_shutdown(serve_instance): def sleeper(_): while True: time.sleep(1000) serve.create_backend( "sleeper", sleeper, config=BackendConfig(experimental_graceful_shutdown_timeout_s=1)) serve.create_endpoint("sleeper", backend="sleeper") handle = serve.get_handle("sleeper") ref = handle.remote() serve.delete_endpoint("sleeper") serve.delete_backend("sleeper") with pytest.raises(ray.exceptions.RayActorError): ray.get(ref)
def test_cluster_name(): with pytest.raises(TypeError): serve.init(cluster_name=1) route = "/api" backend = "backend" endpoint = "endpoint" serve.init(cluster_name="cluster1", blocking=True, http_port=8001) serve.create_endpoint(endpoint, route=route) def function(): return "hello1" serve.create_backend(backend, function) serve.set_traffic(endpoint, {backend: 1.0}) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" # Create a second cluster on port 8002. Create an endpoint and backend with # the same names and check that they don't collide. serve.init(cluster_name="cluster2", blocking=True, http_port=8002) serve.create_endpoint(endpoint, route=route) def function(): return "hello2" serve.create_backend(backend, function) serve.set_traffic(endpoint, {backend: 1.0}) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" assert requests.get("http://127.0.0.1:8002" + route).text == "hello2" # Check that deleting the backend in the current cluster doesn't. serve.delete_endpoint(endpoint) serve.delete_backend(backend) assert requests.get("http://127.0.0.1:8001" + route).text == "hello1" # Check that we can re-connect to the first cluster. serve.init(cluster_name="cluster1") serve.delete_endpoint(endpoint) serve.delete_backend(backend)
def test_delete_backend(serve_instance): def function(_): return "hello" serve.create_backend("delete:v1", function) serve.create_endpoint("delete_backend", backend="delete:v1", route="/delete-backend") assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello" # Check that we can't delete the backend while it's in use. with pytest.raises(ValueError): serve.delete_backend("delete:v1") serve.create_backend("delete:v2", function) serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5}) with pytest.raises(ValueError): serve.delete_backend("delete:v1") # Check that the backend can be deleted once it's no longer in use. serve.set_traffic("delete_backend", {"delete:v2": 1.0}) serve.delete_backend("delete:v1") # Check that we can no longer use the previously deleted backend. with pytest.raises(ValueError): serve.set_traffic("delete_backend", {"delete:v1": 1.0}) def function2(_): return "olleh" # Check that we can now reuse the previously delete backend's tag. serve.create_backend("delete:v1", function2) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) for _ in range(10): try: assert requests.get( "http://127.0.0.1:8000/delete-backend").text == "olleh" break except AssertionError: time.sleep(0.5) # wait for the traffic policy to propogate else: assert requests.get( "http://127.0.0.1:8000/delete-backend").text == "olleh"
def test_delete_backend(serve_instance): serve.create_endpoint("delete_backend", "/delete-backend") def function(): return "hello" serve.create_backend("delete:v1", function) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) assert requests.get("http://127.0.0.1:8000/delete-backend").text == "hello" # Check that we can't delete the backend while it's in use. with pytest.raises(ValueError): serve.delete_backend("delete:v1") serve.create_backend("delete:v2", function) serve.set_traffic("delete_backend", {"delete:v1": 0.5, "delete:v2": 0.5}) with pytest.raises(ValueError): serve.delete_backend("delete:v1") # Check that the backend can be deleted once it's no longer in use. serve.set_traffic("delete_backend", {"delete:v2": 1.0}) serve.delete_backend("delete:v1") # Check that we can no longer use the previously deleted backend. with pytest.raises(ValueError): serve.set_traffic("delete_backend", {"delete:v1": 1.0}) def function2(): return "olleh" # Check that we can now reuse the previously delete backend's tag. serve.create_backend("delete:v1", function2) serve.set_traffic("delete_backend", {"delete:v1": 1.0}) assert requests.get("http://127.0.0.1:8000/delete-backend").text == "olleh"
import sys import ray from ray import serve ray.init(address="auto", ignore_reinit_error=True) serve.init() try: serve.delete_endpoint("sentiment_endpoint") except: pass try: serve.delete_backend("pytorch_backend") except: pass
def do_blocking_delete(): serve.delete_endpoint("wait") serve.delete_backend("wait")