Beispiel #1
0
def test_batching(serve_instance):
    class BatchingExample:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            self.count += 1
            batch_size = serve.context.batch_size
            return [self.count] * batch_size

    serve.create_endpoint("counter1", "/increment")

    # Keep checking the routing table until /increment is populated
    while "/increment" not in requests.get("http://127.0.0.1:8000/").json():
        time.sleep(0.2)

    # set the max batch size
    b_config = BackendConfig(max_batch_size=5)
    serve.create_backend(BatchingExample,
                         "counter:v11",
                         backend_config=b_config)
    serve.link("counter1", "counter:v11")

    future_list = []
    handle = serve.get_handle("counter1")
    for _ in range(20):
        f = handle.remote(temp=1)
        future_list.append(f)

    counter_result = ray.get(future_list)
    # since count is only updated per batch of queries
    # If there atleast one __call__ fn call with batch size greater than 1
    # counter result will always be less than 20
    assert max(counter_result) < 20
def test_new_driver(serve_instance):
    script = """
import ray
ray.init(address="auto")

from ray.experimental import serve
serve.init()

@serve.route("/driver")
def driver(flask_request):
    return "OK!"
"""

    with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
        path = f.name
        f.write(script)

    proc = subprocess.Popen(["python", path])
    return_code = proc.wait(timeout=10)
    assert return_code == 0

    handle = serve.get_handle("driver")
    assert ray.get(handle.remote()) == "OK!"

    os.remove(path)
Beispiel #3
0
def test_no_route(serve_instance):
    serve.create_endpoint("noroute-endpoint", blocking=True)
    global_state = serve.api._get_global_state()

    result = global_state.route_table.list_service(include_headless=True)
    assert result[NO_ROUTE_KEY] == ["noroute-endpoint"]

    without_headless_result = global_state.route_table.list_service()
    assert NO_ROUTE_KEY not in without_headless_result

    def func(_, i=1):
        return 1

    serve.create_backend(func, "backend:1")
    serve.link("noroute-endpoint", "backend:1")
    service_handle = serve.get_handle("noroute-endpoint")
    result = ray.get(service_handle.remote(i=1))
    assert result == 1
Beispiel #4
0
def test_batching_exception(serve_instance):
    class NoListReturned:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            batch_size = serve.context.batch_size
            return batch_size

    serve.create_endpoint("exception-test", "/noListReturned")
    # set the max batch size
    b_config = BackendConfig(max_batch_size=5)
    serve.create_backend(NoListReturned,
                         "exception:v1",
                         backend_config=b_config)
    serve.link("exception-test", "exception:v1")

    handle = serve.get_handle("exception-test")
    with pytest.raises(ray.exceptions.RayTaskError):
        assert ray.get(handle.remote(temp=1))
Beispiel #5
0
import time

import requests

import ray
from ray.experimental import serve
from ray.experimental.serve.utils import pformat_color_json


def echo(_):
    raise Exception("Something went wrong...")


serve.init(blocking=True)

serve.create_endpoint("my_endpoint", "/echo", blocking=True)
serve.create_backend(echo, "echo:v1")
serve.link("my_endpoint", "echo:v1")

for _ in range(2):
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))

    print("...Sleeping for 2 seconds...")
    time.sleep(2)

handle = serve.get_handle("my_endpoint")
print("Invoke from python will raise exception with traceback:")
ray.get(handle.remote())
Beispiel #6
0
serve.create_endpoint("ECG")
# create data point service for hospital
serve.create_endpoint("hospital",
                      route="/hospital",
                      kwargs_creator=kwargs_creator)

# create backend for ECG
b_config = BackendConfig(num_replicas=1)
serve.create_backend(PytorchPredictorECG,
                     "PredictECG",
                     model,
                     cuda,
                     backend_config=b_config)
# link service and backend
serve.link("ECG", "PredictECG")
handle = serve.get_handle("ECG")

# prepare args for StorePatientData backend.
service_handles_dict = {"ECG": handle}
# do prediction after every 3750 queries.
num_queries_dict = {"ECG": 3750}
# Always keep num_replicas as 1 as this is a stateful Backend
# This backend will store all the patient's data and transfer
# the prediction to respective Backend (ECG handle in this case)
b_config_hospital = BackendConfig(num_replicas=1)
serve.create_backend(StorePatientData,
                     "StoreData",
                     service_handles_dict,
                     num_queries_dict,
                     backend_config=b_config_hospital)
serve.link("hospital", "StoreData")
Beispiel #7
0
        else:
            result = []
            for b in base_number:
                ans = b + self.increment
                result.append(ans)
            return result


serve.init(blocking=True)
serve.create_endpoint("magic_counter", "/counter", blocking=True)
b_config = BackendConfig(max_batch_size=5)
serve.create_backend(MagicCounter, "counter:v1", 42,
                     backend_config=b_config)  # increment=42
serve.link("magic_counter", "counter:v1")

print("Sending ten queries via HTTP")
for i in range(10):
    url = "http://127.0.0.1:8000/counter?base_number={}".format(i)
    print("> Pinging {}".format(url))
    resp = requests.get(url).json()
    print(pformat_color_json(resp))

    time.sleep(0.2)

print("Sending ten queries via Python")
handle = serve.get_handle("magic_counter")
for i in range(10):
    print("> Pinging handle.remote(base_number={})".format(i))
    result = ray.get(handle.remote(base_number=i))
    print("< Result {}".format(result))
Beispiel #8
0
                /    \
               /      \
              /        \
             /          \
  "my_endpoint2"     "my_endpoint3"
            \            /
             \          /
              \        /
               \      /
                \    /
                 \  /
                  \/
            "my_endpoint4"
"""

# get the handle of the endpoints
handle1 = serve.get_handle("echo_v1")
handle2 = serve.get_handle("echo_v2")
handle3 = serve.get_handle("echo_v3")
handle4 = serve.get_handle("echo_v4")

start = time.time()
print("Start firing to the pipeline: {} s".format(time.time()))
handle1_oid = handle1.remote(response="hello")
handle4_oid = handle4.remote(relay1=handle2.remote(relay=handle1_oid),
                             relay2=handle3.remote(relay=handle1_oid))
print("Firing ended now waiting for the result,"
      "time taken: {} s".format(time.time() - start))
result = ray.get(handle4_oid)
print("Result: {}, time taken: {} s".format(result, time.time() - start))
Beispiel #9
0
def echo_v1(flask_request, response="hello from python!"):
    if serve.context.web:
        response = flask_request.url
    return response


serve.create_backend(echo_v1, "echo:v1")

# We can link an endpoint to a backend, the means all the traffic
# goes to my_endpoint will now goes to echo:v1 backend.
serve.link("my_endpoint", "echo:v1")

print(requests.get("http://127.0.0.1:8000/echo").json())
# The service will be reachable from http

print(ray.get(serve.get_handle("my_endpoint").remote(response="hello")))

# as well as within the ray system.


# We can also add a new backend and split the traffic.
def echo_v2(flask_request):
    # magic, only from web.
    return "something new"


serve.create_backend(echo_v2, "echo:v2")

# The two backend will now split the traffic 50%-50%.
serve.split("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
Beispiel #10
0
def benchmark(func, name):
    for _ in range(NUM_WARMUPS):
        func()

    for _ in range(NUM_REPEATS):
        with profile(name):
            func()


def work(_):
    time.sleep(0.05)


@ray.remote
def work_ray():
    time.sleep(0.05)


serve.init()
serve.create_endpoint('sleep', '/')
serve.create_backend(work, 'sleep:v1')
serve.link('sleep', 'sleep:v1')

handle = serve.get_handle('sleep')

benchmark(lambda: ray.get(handle.remote()), "serve_sleep")
benchmark(lambda: ray.get(work_ray.remote()), "ray_sleep")

summarize_profile()