def get_policies_api(request): def _process(row, backend_list, backend_traffic): row[5] = json.loads(row[5]) row[6] = json.loads(row[6]) row += (None,None) if row[7] is None or row[7] not in backend_list.keys() \ else (backend_list[row[7]].num_replicas, backend_list[row[7]].max_concurrent_queries) row += (None, ) if row[7] is None else (backend_traffic.get( row[7], None), ) return tuple(row) sql = '''SELECT policy.cluster_id as trainer_id, trainer_cluster.name as trainer_name, policy.policy_id as policy_id, policy.model_name as model_name, policy.checkpoint as checkpoint, policy.agent_config, policy.sim_config, policy.backend_name FROM policy INNER JOIN trainer_cluster ON policy.cluster_id = trainer_cluster.id''' db = db_connect(BACKOFFICE_DB_NAME, check_same_thread=False) rows = select_all(db, sql) backend = serve.connect() global BACKOFFICE_ENDPOINTS backend_traffic = { backend: endpoint for endpoint, props in backend.list_endpoints().items() if endpoint not in BACKOFFICE_ENDPOINTS.keys() for backend in props['traffic'].keys() } data = [ _process(list(row), backend.list_backends(), backend_traffic) for row in rows ] return json.dumps(data)
def test_detached_deployment(): # https://github.com/ray-project/ray/issues/11437 cluster = Cluster() head_node = cluster.add_node(node_ip_address="127.0.0.1", num_cpus=6) # Create first job, check we can run a simple serve endpoint ray.init(head_node.address) first_job_id = ray.get_runtime_context().job_id client = serve.start(detached=True) client.create_backend("f", lambda _: "hello") client.create_endpoint("f", backend="f") assert ray.get(client.get_handle("f").remote()) == "hello" ray.shutdown() # Create the second job, make sure we can still create new backends. ray.init(head_node.address) assert ray.get_runtime_context().job_id != first_job_id client = serve.connect() client.create_backend("g", lambda _: "world") client.create_endpoint("g", backend="g") assert ray.get(client.get_handle("g").remote()) == "world" # Test passed, clean up. client.shutdown() ray.shutdown() cluster.shutdown()
def test_connect(serve_instance): client = serve_instance # Check that you can have multiple clients to the same detached instance. client2 = serve.connect() assert client._controller_name == client2._controller_name # Check that you can have detached and non-detached instances. client3 = serve.start(http_port=8004) assert client3._controller_name != client._controller_name # Check that you can call serve.connect() from within a backend for both # detached and non-detached instances. def connect_in_backend(_): client = serve.connect() client.create_backend("backend-ception", connect_in_backend) return client._controller_name client.create_backend("connect_in_backend", connect_in_backend) client.create_endpoint("endpoint", backend="connect_in_backend") handle = client.get_handle("endpoint") assert ray.get(handle.remote()) == client._controller_name assert "backend-ception" in client.list_backends().keys() client3.create_backend("connect_in_backend", connect_in_backend) client3.create_endpoint("endpoint", backend="connect_in_backend") handle = client3.get_handle("endpoint") assert ray.get(handle.remote()) == client3._controller_name assert "backend-ception" in client3.list_backends().keys()
def __init__(self, port=None, tag=None): # log if tag is None: self.log = logging.getLogger(__name__) else: self.log = logging.getLogger("%s.%s" % (tag, RAY.TAG)) ray.init(address="auto") nodes_info = ray.nodes() try: self.client = serve.start(http_options={ "location": "EveryNode", "host": "0.0.0.0", "port": port, "middlewares": [ Middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"]) ] }, detached=True) self.log.info( "Ray serve initialized, node number: {} \n Nodes Info: {}". format(len(nodes_info), nodes_info)) except RayServeException: self.client = serve.connect() self.log.info( "Connected existing Ray serve, node number: {} \n Nodes Info: {}" .format(len(nodes_info), nodes_info))
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance) start = """ import ray ray.util.connect("{}") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://localhost:8000/hello").text == "hello" delete = """ import ray ray.util.connect("{}") from ray import serve serve.get_deployment("test1").delete() """.format(ray_client_instance) run_string_as_driver(delete) assert "test1" not in serve.list_backends() assert "test1" not in serve.list_endpoints()
def client_init(endpoints, update): if update: client = serve.connect() for endpoint in endpoints: delete_endpoint(client, endpoint) return client # `detached=True` starts a long-running Ray Serve instance service, see also `ray_init()` return serve.start(http_host="0.0.0.0", http_port=8000, detached=True)
def get_endpoints_api(request): backend = serve.connect() global BACKOFFICE_ENDPOINTS data = { k: v for k, v in backend.list_endpoints().items() if k not in BACKOFFICE_ENDPOINTS.keys() } return json.dumps(data)
def __init__(self): # Get handles to the two underlying models. client = serve.connect() self.color_handle = client.get_handle("color") self.plot_handle = client.get_handle("plot") # Store user click data in a detached actor. self.impressions = ImpressionStore.options( lifetime="detached", name="impressions").remote()
def __init__(self, max_deployments=1): self.client = serve.connect() self.max_deployments = max_deployments self.weighted_actions = [ (self.create_deployment, 1), (self.verify_deployment, 4), ] self.deployments = [] for _ in range(max_deployments): self.create_deployment()
def __init__(self, uri): super().__init__(uri) try: # TODO: support URI and redis password (ray-serve:/192.168....)? ray.init(address="auto") except ConnectionError: raise MlflowException("Could not find a running Ray instance.") try: self.client = serve.connect() except RayServeException: raise MlflowException( "Could not find a running Ray Serve instance on this Ray " "cluster.")
def run_loop(self): while True: if self.should_retrain(): # Retrain the model. print("Retraining model...") new_data_df = ray.get(self.impressions.get_model_clicks.remote("plot")) new_model = retrain_sklearn_lr_model(new_data_df) # Deploy the new model (using incremental rollout). client = serve.connect() backend_name = f"plot:{int(time.time())}" client.create_backend(backend_name, PlotRecommender, new_model) client.set_traffic("plot", {"plot:v0": 0.9, backend_name: 0.1}) print(f"Deployed new backend {backend_name}.") time.sleep(1)
def test_scale_up(ray_cluster): cluster = ray_cluster cluster.add_node(num_cpus=1) cluster.connect() # By default, Serve controller and proxy actors use 0 CPUs, # so initially there should only be room for 1 replica. @serve.deployment("D", version="1", num_replicas=1) def D(*args): return os.getpid() def get_pids(expected, timeout=30): pids = set() start = time.time() while len(pids) < expected: pids.add(requests.get("http://localhost:8000/D").text) if time.time() - start >= timeout: raise TimeoutError("Timed out waiting for pids.") return pids serve.start(detached=True) client = serve.connect() D.deploy() pids1 = get_pids(1) goal_ref = D.options(num_replicas=3).deploy(_blocking=False) # Check that a new replica has not started in 1.0 seconds. This # doesn't guarantee that a new replica won't ever be started, but # 1.0 seconds is a reasonable upper bound on replica startup time. assert not client._wait_for_goal(goal_ref, timeout=1.0) assert get_pids(1) == pids1 # Add a node with another CPU, another replica should get placed. cluster.add_node(num_cpus=1) assert not client._wait_for_goal(goal_ref, timeout=1.0) pids2 = get_pids(2) assert pids1.issubset(pids2) # Add a node with another CPU, the final replica should get placed # and the deploy goal should be done. cluster.add_node(num_cpus=1) assert client._wait_for_goal(goal_ref) pids3 = get_pids(3) assert pids2.issubset(pids3)
def start_backend_server(config = None): #stderrout = sys.stderr #sys.stderr = open('modelserver.log', 'w') if not ray.is_initialized(): ray.init(include_dashboard=False, log_to_driver=False, logging_level=0, address='auto') try: backend_server = serve.connect() except RayServeException: backend_server = serve.start(detached=True) if config != None: global _POLICY_ACTOR_CONFIG _POLICY_ACTOR_CONFIG = config #sys.stderr = stderrout #print("{} INFO Model Server started on {}".format(datetime.now(), addr)) #print( # "{} INFO Trainers Should Deploy Policies on this Server using address='{}'".format(datetime.now(), addr)) return backend_server
def test_scale_up(ray_cluster): cluster = ray_cluster head_node = cluster.add_node(num_cpus=3) @serve.deployment("D", version="1", num_replicas=1) def D(*args): return os.getpid() def get_pids(expected, timeout=30): pids = set() start = time.time() while len(pids) < expected: pids.add(requests.get("http://localhost:8000/D").text) if time.time() - start >= timeout: raise TimeoutError("Timed out waiting for pids.") return pids ray.init(head_node.address) serve.start(detached=True) client = serve.connect() D.deploy() pids1 = get_pids(1) goal_ref = D.options(num_replicas=3).deploy(_blocking=False) assert not client._wait_for_goal(goal_ref, timeout=0.1) assert get_pids(1) == pids1 # Add a node with another CPU, another replica should get placed. cluster.add_node(num_cpus=1) assert not client._wait_for_goal(goal_ref, timeout=0.1) pids2 = get_pids(2) assert pids1.issubset(pids2) # Add a node with another CPU, the final replica should get placed # and the deploy goal should be done. cluster.add_node(num_cpus=1) assert client._wait_for_goal(goal_ref) pids3 = get_pids(3) assert pids2.issubset(pids3)
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False): print("Serving checkpoint: {}".format(checkpoint)) checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint, config, metrics) try: # Try to connect to an existing cluster. client = serve.connect() except RayServeException: # If this is the first run, need to start the cluster. client = serve.start(detached=True) backend_name = "mnist:day_{}".format(day) client.create_backend(backend_name, MNISTBackend, checkpoint_path, config, metrics, gpu) if "mnist" not in client.list_endpoints(): # First time we serve a model - create endpoint client.create_endpoint("mnist", backend=backend_name, route="/mnist", methods=["POST"]) else: # The endpoint already exists, route all traffic to the new model # Here you could also implement an incremental rollout, where only # a part of the traffic is sent to the new backend and the # rest is sent to the existing backends. client.set_traffic("mnist", {backend_name: 1.0}) # Delete previous existing backends for existing_backend in client.list_backends(): if existing_backend.startswith("mnist:day") and \ existing_backend != backend_name: client.delete_backend(existing_backend) return True
def check_handle_router_id(): client = serve.connect() handle = client.get_handle("hi") return get_node_id_for_actor(handle.router_handle)
def shutdown(): serve.connect().shutdown()
model_distribution = await self.impressions.model_distribution.remote( session_key, request.args["liked_id"]) # Select which results to send to the user based on their clicks. distribution, impressions, chosen = choose_ensemble_results( model_distribution, results) # Record this click and these recommendations. await self.impressions.record_impressions.remote( session_key, impressions) return { "sessionKey": session_key, "dist": distribution, "ids": chosen, "sources": { i["id"]: source for source, impression in impressions.items() for i in impression } } if __name__ == "__main__": ray.init(address="auto") client = serve.connect() client.create_backend("ensemble:v0", ComposedModel) client.create_endpoint("ensemble", backend="ensemble:v0", route="/rec/ensemble")
def wrapper_model(web_request): handle = serve.connect().get_handle("echo") return ray.get(handle.remote(web_request))
def __init__(self): client = serve.connect() self.handle = client.get_handle("hello_world")
def __init__(self): client = serve.connect() self.handle = client.get_handle("endpoint1")
def do_blocking_delete(): client = serve.connect() client.delete_endpoint("wait") client.delete_backend("wait")
def __init__(self): client = serve.connect() self.model = client.get_handle("sum_model")
def test_ray_client(ray_client_instance): ray.util.connect(ray_client_instance, namespace="") start = """ import ray ray.util.connect("{}", namespace="") from ray import serve serve.start(detached=True) """.format(ray_client_instance) run_string_as_driver(start) serve.connect() deploy = """ import ray ray.util.connect("{}", namespace="") from ray import serve @serve.deployment(name="test1", route_prefix="/hello") def f(*args): return "hello" f.deploy() """.format(ray_client_instance) run_string_as_driver(deploy) assert "test1" in serve.list_backends() assert "test1" in serve.list_endpoints() assert requests.get("http://*****:*****@app.get("/") def hello(): return "hello" @serve.deployment @serve.ingress(app) class A: pass A.deploy() """.format(ray_client_instance) run_string_as_driver(fastapi) assert requests.get("http://localhost:8000/A").json() == "hello"
def __init__(self, models): client = serve.connect() self.model_handles = [ client.get_handle(model, sync=False) for model in models ]
def __init__(self): client = serve.connect() self.model_one = client.get_handle("model_one") self.model_two = client.get_handle("model_two")
def connect_in_backend(_): client = serve.connect() client.create_backend("backend-ception", connect_in_backend) return client._controller_name
'get_trainers': (get_trainers_api, '/trainers'), 'get_endpoints': (get_endpoints_api, '/endpoints'), 'get_policies': (get_policies_api, '/policies'), } # Policies CPU Multiplexing factor. 0.5 = 2 policies / CPU POLICY_ACTOR_CONFIG = {'num_cpus': 0.5} # Start Backend if __name__ == "__main__": if not ray.is_initialized(): ray.init(address='auto') try: backend_server = serve.connect() except RayServeException: backend_server = serve.start(detached=True) endpoint_list = list(backend_server.list_endpoints().keys()) backend_list = list(backend_server.list_backends().keys()) backoffice_actor_config = {'num_cpus': 1 / len(BACKOFFICE_ENDPOINTS)} # Policy replicas policy_config = {'num_replicas': 1} for name, (service_function, route) in BACKOFFICE_ENDPOINTS.items(): if name in endpoint_list: backend_server.delete_endpoint(name) if name in backend_list:
def __init__(self, kill_period_s=1): self.client = serve.connect() self.kill_period_s = kill_period_s
def __init__(self): client = serve.connect() self.handle = client.get_handle("backend")