Ejemplo n.º 1
0
def test_updating_config(serve_instance):
    class BatchSimple:
        def __init__(self):
            self.count = 0

        @serve.accept_batch
        def __call__(self, flask_request, temp=None):
            batch_size = serve.context.batch_size
            return [1] * batch_size

    serve.create_backend(
        "bsimple:v1",
        BatchSimple,
        config={
            "max_batch_size": 2,
            "num_replicas": 3
        })
    serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple")

    master_actor = serve.api._get_master_actor()
    old_replica_tag_list = ray.get(
        master_actor._list_replicas.remote("bsimple:v1"))

    serve.update_backend_config("bsimple:v1", {"max_batch_size": 5})
    new_replica_tag_list = ray.get(
        master_actor._list_replicas.remote("bsimple:v1"))
    new_all_tag_list = []
    for worker_dict in ray.get(
            master_actor.get_all_worker_handles.remote()).values():
        new_all_tag_list.extend(list(worker_dict.keys()))

    # the old and new replica tag list should be identical
    # and should be subset of all_tag_list
    assert set(old_replica_tag_list) <= set(new_all_tag_list)
    assert set(old_replica_tag_list) == set(new_replica_tag_list)
Ejemplo n.º 2
0
def test_imported_backend(serve_instance):
    config = BackendConfig(user_config="config")
    serve.create_backend("imported",
                         "ray.serve.utils.MockImportedBackend",
                         "input_arg",
                         config=config)
    serve.create_endpoint("imported", backend="imported")

    # Basic sanity check.
    handle = serve.get_handle("imported")
    assert ray.get(handle.remote()) == {"arg": "input_arg", "config": "config"}

    # Check that updating backend config works.
    serve.update_backend_config("imported",
                                BackendConfig(user_config="new_config"))
    assert ray.get(handle.remote()) == {
        "arg": "input_arg",
        "config": "new_config"
    }

    # Check that other call methods work.
    handle = handle.options(method_name="other_method")
    assert ray.get(handle.remote("hello")) == "hello"

    # Check that functions work as well.
    serve.create_backend("imported_func",
                         "ray.serve.utils.mock_imported_function")
    serve.create_endpoint("imported_func", backend="imported_func")
    handle = serve.get_handle("imported_func")
    assert ray.get(handle.remote("hello")) == "hello"
Ejemplo n.º 3
0
def test_scaling_replicas(serve_instance):
    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, _):
            self.count += 1
            return self.count

    serve.create_backend("counter:v1", Counter, config={"num_replicas": 2})
    serve.create_endpoint("counter", backend="counter:v1", route="/increment")

    # Keep checking the routing table until /increment is populated
    while "/increment" not in requests.get(
            "http://127.0.0.1:8000/-/routes").json():
        time.sleep(0.2)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)

    # If the load is shared among two replicas. The max result cannot be 10.
    assert max(counter_result) < 10

    serve.update_backend_config("counter:v1", {"num_replicas": 1})

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)
    # Give some time for a replica to spin down. But majority of the request
    # should be served by the only remaining replica.
    assert max(counter_result) - min(counter_result) > 6
Ejemplo n.º 4
0
def backend_setup(tag: str, worker_args: Tuple, replicas: int,
                  max_batch_size: int) -> None:
    """
    Setups the backend for the distributed explanation task.

    Parameters
    ----------
    tag
        A tag for the backend component. The same tag must be passed to `endpoint_setup`.
    worker_args
        A tuple containing the arguments for initialising the explainer and fitting it.
    replicas
        The number of backend replicas that serve explanations.
    max_batch_size
        Maximum number of requests to batch and send to a worker process.
    """

    if max_batch_size == 1:
        config = {'num_replicas': max(replicas, 1)}
        serve.create_backend(tag, wrappers.KernelShapModel, *worker_args)
    else:
        config = {
            'num_replicas': max(replicas, 1),
            'max_batch_size': max_batch_size
        }
        serve.create_backend(tag, wrappers.BatchKernelShapModel, *worker_args)
    serve.update_backend_config(tag, config)

    logging.info(f"Backends: {serve.list_backends()}")
Ejemplo n.º 5
0
async def main():
    ray.init(log_to_driver=False)
    serve.init()

    serve.create_backend("backend", backend)
    serve.create_endpoint("endpoint", backend="backend", route="/api")

    actors = [Client.remote() for _ in range(NUM_CLIENTS)]
    for num_replicas in [1, 8]:
        for backend_config in [
            {
                "max_batch_size": 1,
                "max_concurrent_queries": 1
            },
            {
                "max_batch_size": 1,
                "max_concurrent_queries": 10000
            },
            {
                "max_batch_size": 10000,
                "max_concurrent_queries": 10000
            },
        ]:
            backend_config["num_replicas"] = num_replicas
            serve.update_backend_config("backend", backend_config)
            print(repr(backend_config) + ":")
            async with aiohttp.ClientSession() as session:
                # TODO(edoakes): large data causes broken pipe errors.
                for data_size in ["small"]:
                    await trial(actors, session, data_size)
Ejemplo n.º 6
0
def test_updating_config(serve_instance):
    class BatchSimple:
        def __init__(self):
            self.count = 0

        def __call__(self, request):
            return 1

    config = BackendConfig(max_concurrent_queries=2, num_replicas=3)
    serve.create_backend("bsimple:v1", BatchSimple, config=config)
    serve.create_endpoint("bsimple", backend="bsimple:v1", route="/bsimple")

    controller = serve.api._global_client._controller
    old_replica_tag_list = list(
        ray.get(controller._all_replica_handles.remote())["bsimple:v1"].keys())

    update_config = BackendConfig(max_concurrent_queries=5)
    serve.update_backend_config("bsimple:v1", update_config)
    new_replica_tag_list = list(
        ray.get(controller._all_replica_handles.remote())["bsimple:v1"].keys())
    new_all_tag_list = []
    for worker_dict in ray.get(
            controller._all_replica_handles.remote()).values():
        new_all_tag_list.extend(list(worker_dict.keys()))

    # the old and new replica tag list should be identical
    # and should be subset of all_tag_list
    assert set(old_replica_tag_list) <= set(new_all_tag_list)
    assert set(old_replica_tag_list) == set(new_replica_tag_list)
Ejemplo n.º 7
0
def test_scaling_replicas(serve_instance):
    class Counter:
        def __init__(self):
            self.count = 0

        def __call__(self, _):
            self.count += 1
            return self.count

    config = BackendConfig(num_replicas=2)
    serve.create_backend("counter:v1", Counter, config=config)

    serve.create_endpoint("counter", backend="counter:v1", route="/increment")

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)

    # If the load is shared among two replicas. The max result cannot be 10.
    assert max(counter_result) < 10

    update_config = BackendConfig(num_replicas=1)
    serve.update_backend_config("counter:v1", update_config)

    counter_result = []
    for _ in range(10):
        resp = requests.get("http://127.0.0.1:8000/increment").json()
        counter_result.append(resp)
    # Give some time for a replica to spin down. But majority of the request
    # should be served by the only remaining replica.
    assert max(counter_result) - min(counter_result) > 6
Ejemplo n.º 8
0
def test_backend_user_config(serve_instance):
    class Counter:
        def __init__(self):
            self.count = 10

        def __call__(self, starlette_request):
            return self.count, os.getpid()

        def reconfigure(self, config):
            self.count = config["count"]

    config = BackendConfig(num_replicas=2, user_config={"count": 123, "b": 2})
    serve.create_backend("counter", Counter, config=config)
    serve.create_endpoint("counter", backend="counter")
    handle = serve.get_handle("counter")

    def check(val, num_replicas):
        pids_seen = set()
        for i in range(100):
            result = ray.get(handle.remote())
            if str(result[0]) != val:
                return False
            pids_seen.add(result[1])
        return len(pids_seen) == num_replicas

    wait_for_condition(lambda: check("123", 2))

    serve.update_backend_config("counter", BackendConfig(num_replicas=3))
    wait_for_condition(lambda: check("123", 3))

    config = BackendConfig(user_config={"count": 456})
    serve.update_backend_config("counter", config)
    wait_for_condition(lambda: check("456", 3))
Ejemplo n.º 9
0
def test_worker_replica_failure(serve_instance):
    serve.http_proxy.MAX_ACTOR_DEAD_RETRIES = 0
    serve.init()

    class Worker:
        # Assumes that two replicas are started. Will hang forever in the
        # constructor for any workers that are restarted.
        def __init__(self, path):
            self.should_hang = False
            if not os.path.exists(path):
                with open(path, "w") as f:
                    f.write("1")
            else:
                with open(path, "r") as f:
                    num = int(f.read())

                with open(path, "w") as f:
                    if num == 2:
                        self.should_hang = True
                    else:
                        f.write(str(num + 1))

            if self.should_hang:
                while True:
                    pass

        def __call__(self):
            pass

    temp_path = os.path.join(tempfile.gettempdir(),
                             serve.utils.get_random_letters())
    serve.create_backend("replica_failure", Worker, temp_path)
    serve.update_backend_config("replica_failure",
                                BackendConfig(num_replicas=2))
    serve.create_endpoint("replica_failure",
                          backend="replica_failure",
                          route="/replica_failure")

    # Wait until both replicas have been started.
    responses = set()
    while len(responses) == 1:
        responses.add(request_with_retries("/replica_failure", timeout=1).text)
        time.sleep(0.1)

    # Kill one of the replicas.
    handles = _get_worker_handles("replica_failure")
    assert len(handles) == 2
    ray.kill(handles[0], no_restart=False)

    # Check that the other replica still serves requests.
    for _ in range(10):
        while True:
            try:
                # The timeout needs to be small here because the request to
                # the restarting worker will hang.
                request_with_retries("/replica_failure", timeout=0.1)
                break
            except TimeoutError:
                time.sleep(0.1)
Ejemplo n.º 10
0
def test_worker_replica_failure(serve_instance):
    @ray.remote
    class Counter:
        def __init__(self):
            self.count = 0

        def inc_and_get(self):
            self.count += 1
            return self.count

    class Worker:
        # Assumes that two replicas are started. Will hang forever in the
        # constructor for any workers that are restarted.
        def __init__(self, counter):
            self.should_hang = False
            self.index = ray.get(counter.inc_and_get.remote())
            if self.index > 2:
                while True:
                    pass

        def __call__(self, *args):
            return self.index

    counter = Counter.remote()
    serve.create_backend("replica_failure", Worker, counter)
    serve.update_backend_config("replica_failure",
                                BackendConfig(num_replicas=2))
    serve.create_endpoint("replica_failure",
                          backend="replica_failure",
                          route="/replica_failure")

    # Wait until both replicas have been started.
    responses = set()
    start = time.time()
    while time.time() - start < 30:
        time.sleep(0.1)
        response = request_with_retries("/replica_failure", timeout=1).text
        assert response in ["1", "2"]
        responses.add(response)
        if len(responses) > 1:
            break
    else:
        raise TimeoutError("Timed out waiting for replicas after 30s.")

    # Kill one of the replicas.
    handles = _get_worker_handles("replica_failure")
    assert len(handles) == 2
    ray.kill(handles[0], no_restart=False)

    # Check that the other replica still serves requests.
    for _ in range(10):
        while True:
            try:
                # The timeout needs to be small here because the request to
                # the restarting worker will hang.
                request_with_retries("/replica_failure", timeout=0.1)
                break
            except TimeoutError:
                time.sleep(0.1)
Ejemplo n.º 11
0
# will be serviced by the echo:v1 backend.
serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")

print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text)
# The service will be reachable from http

print(ray.get(serve.get_handle("my_endpoint").remote(response="hello")))

# as well as within the ray system.


# We can also add a new backend and split the traffic.
def echo_v2(flask_request):
    # magic, only from web.
    return "something new"


serve.create_backend("echo:v2", echo_v2)

# The two backend will now split the traffic 50%-50%.
serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})

# Observe requests are now split between two backends.
for _ in range(10):
    print(requests.get("http://127.0.0.1:8000/echo").text)
    time.sleep(0.5)

# You can also change number of replicas for each backend independently.
serve.update_backend_config("echo:v1", {"num_replicas": 2})
serve.update_backend_config("echo:v2", {"num_replicas": 2})
Ejemplo n.º 12
0
import ray
from ray import serve
from ray.serve import BackendConfig

ray.init()
serve.start()


class Threshold:
    def __init__(self):
        # self.model won't be changed by reconfigure.
        self.model = random.Random()  # Imagine this is some heavyweight model.

    def reconfigure(self, config):
        # This will be called when the class is created and when
        # the user_config field of BackendConfig is updated.
        self.threshold = config["threshold"]

    def __call__(self, request):
        return self.model.random() > self.threshold


backend_config = BackendConfig(user_config={"threshold": 0.01})
serve.create_backend("threshold", Threshold, config=backend_config)
serve.create_endpoint("threshold", backend="threshold", route="/threshold")
print(requests.get("http://127.0.0.1:8000/threshold").text)  # true, probably

backend_config = BackendConfig(user_config={"threshold": 0.99})
serve.update_backend_config("threshold", backend_config)
print(requests.get("http://127.0.0.1:8000/threshold").text)  # false, probably