Python scale Examples

Programming Language: Python

Namespace/Package Name: ray.experimental.serve

Method/Function: scale

Examples at hotexamples.com: 3

Python scale - 3 examples found. These are the top rated real world Python examples of ray.experimental.serve.scale extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def scale(backend_tag, num_replicas):
    if num_replicas <= 0:
        click.Abort(
            "Cannot set number of replicas to be smaller or equal to 0.")
    ray.init(address="auto")
    serve.init()

    serve.scale(backend_tag, num_replicas)

Example #2

Show file

File: test_api.py Project: ujvl/ray

def test_scaling_replicas(serve_instance):
    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, _):
            self.count += 1
            return self.count

    serve.create_endpoint("counter", "/increment")

    # Keep checking the routing table until /increment is populated
    while "/increment" not in requests.get("http://127.0.0.1:8000/").json():
        time.sleep(0.2)

    serve.create_backend(Counter, "counter:v1")
    serve.link("counter", "counter:v1")

    serve.scale("counter:v1", 2)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()["result"]
        counter_result.append(resp)

    # If the load is shared among two replicas. The max result cannot be 10.
    assert max(counter_result) < 10

    serve.scale("counter:v1", 1)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()["result"]
        counter_result.append(resp)
    # Give some time for a replica to spin down. But majority of the request
    # should be served by the only remaining replica.
    assert max(counter_result) - min(counter_result) > 6

Example #3

Show file

print(requests.get("http://127.0.0.1:8000/echo").json())
# The service will be reachable from http

print(ray.get(serve.get_handle("my_endpoint").remote(response="hello")))

# as well as within the ray system.


# We can also add a new backend and split the traffic.
def echo_v2(flask_request):
    # magic, only from web.
    return "something new"


serve.create_backend(echo_v2, "echo:v2")

# The two backend will now split the traffic 50%-50%.
serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})

# Observe requests are now split between two backends.
for _ in range(10):
    print(requests.get("http://127.0.0.1:8000/echo").json())
    time.sleep(0.5)

# You can also scale each backend independently.
serve.scale("echo:v1", 2)
serve.scale("echo:v2", 2)

# As well as retrieving relevant system metrics
print(pformat_color_json(serve.stat()))