""" Example service that prints out http context. """ import time import requests from ray import serve from ray.serve.utils import pformat_color_json def echo(flask_request): return "hello " + flask_request.args.get("name", "serve!") serve.init() serve.create_endpoint("my_endpoint", "/echo") serve.create_backend("echo:v1", echo) serve.set_traffic("my_endpoint", {"echo:v1": 1.0}) while True: resp = requests.get("http://127.0.0.1:8000/echo").json() print(pformat_color_json(resp)) print("...Sleeping for 2 seconds...") time.sleep(2)
print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text) # The service will be reachable from http print(ray.get(serve.get_handle("my_endpoint").remote(response="hello"))) # as well as within the ray system. # We can also add a new backend and split the traffic. def echo_v2(flask_request): # magic, only from web. return "something new" serve.create_backend("echo:v2", echo_v2) # The two backend will now split the traffic 50%-50%. serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5}) # Observe requests are now split between two backends. for _ in range(10): print(requests.get("http://127.0.0.1:8000/echo").text) time.sleep(0.5) # You can also change number of replicas for each backend independently. serve.update_backend_config("echo:v1", {"num_replicas": 2}) serve.update_backend_config("echo:v2", {"num_replicas": 2}) # As well as retrieving relevant system metrics print(pformat_color_json(serve.stat()))