Example #1
0
def test_redeploy_multiple_replicas(serve_instance, use_handle):
    # Tests that redeploying a deployment with multiple replicas performs
    # a rolling update.
    client = serve_instance

    name = "test"

    @ray.remote(num_cpus=0)
    def call(block=False):
        if use_handle:
            handle = serve.get_deployment(name).get_handle()
            ret = ray.get(handle.handler.remote(block))
        else:
            ret = requests.get(
                f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2)
    class V1:
        async def handler(self, block: bool):
            if block:
                signal = ray.get_actor(signal_name)
                await signal.wait.remote()

            return f"1|{os.getpid()}"

        async def __call__(self, request):
            return await self.handler(request.query_params["block"] == "True")

    class V2:
        async def handler(self, *args):
            return f"2|{os.getpid()}"

        async def __call__(self, request):
            return await self.handler()

    def make_nonblocking_calls(expected, expect_blocking=False):
        # Returns dict[val, set(pid)].
        blocking = []
        responses = defaultdict(set)
        start = time.time()
        while time.time() - start < 30:
            refs = [call.remote(block=False) for _ in range(10)]
            ready, not_ready = ray.wait(refs, timeout=0.5)
            for ref in ready:
                val, pid = ray.get(ref)
                responses[val].add(pid)
            for ref in not_ready:
                blocking.extend(not_ready)

            if (all(
                    len(responses[val]) == num
                    for val, num in expected.items())
                    and (expect_blocking is False or len(blocking) > 0)):
                break
        else:
            assert False, f"Timed out, responses: {responses}."

        return responses, blocking

    V1.deploy()
    responses1, _ = make_nonblocking_calls({"1": 2})
    pids1 = responses1["1"]

    # ref2 will block a single replica until the signal is sent. Check that
    # some requests are now blocking.
    ref2 = call.remote(block=True)
    responses2, blocking2 = make_nonblocking_calls(
        {
            "1": 1
        }, expect_blocking=True)
    assert list(responses2["1"])[0] in pids1

    # Redeploy new version. Since there is one replica blocking, only one new
    # replica should be started up.
    V2 = V1.options(func_or_class=V2, version="2")
    goal_ref = V2.deploy(_blocking=False)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    responses3, blocking3 = make_nonblocking_calls(
        {
            "1": 1
        }, expect_blocking=True)

    # Signal the original call to exit.
    ray.get(signal.send.remote())
    val, pid = ray.get(ref2)
    assert val == "1"
    assert pid in responses1["1"]

    # Now the goal and requests to the new version should complete.
    # We should have two running replicas of the new version.
    assert client._wait_for_goal(goal_ref)
    make_nonblocking_calls({"2": 2})
def test_dependency_refcounts(ray_start_regular):
    @ray.remote
    def one_dep(dep, signal=None, fail=False):
        if signal is not None:
            ray.get(signal.wait.remote())
        if fail:
            raise Exception("failed on purpose")

    @ray.remote
    def one_dep_large(dep, signal=None):
        if signal is not None:
            ray.get(signal.wait.remote())
        # This will be spilled to plasma.
        return np.zeros(10 * 1024 * 1024, dtype=np.uint8)

    # Test that regular plasma dependency refcounts are decremented once the
    # task finishes.
    signal = SignalActor.remote()
    large_dep = ray.put(np.zeros(10 * 1024 * 1024, dtype=np.uint8))
    result = one_dep.remote(large_dep, signal=signal)
    check_refcounts({large_dep: (1, 1), result: (1, 0)})
    ray.get(signal.send.remote())
    # Reference count should be removed once the task finishes.
    check_refcounts({large_dep: (1, 0), result: (1, 0)})
    del large_dep, result
    check_refcounts({})

    # Test that inlined dependency refcounts are decremented once they are
    # inlined.
    signal = SignalActor.remote()
    dep = one_dep.remote(None, signal=signal)
    check_refcounts({dep: (1, 0)})
    result = one_dep.remote(dep)
    check_refcounts({dep: (1, 1), result: (1, 0)})
    ray.get(signal.send.remote())
    # Reference count should be removed as soon as the dependency is inlined.
    check_refcounts({dep: (1, 0), result: (1, 0)})
    del dep, result
    check_refcounts({})

    # Test that spilled plasma dependency refcounts are decremented once
    # the task finishes.
    signal1, signal2 = SignalActor.remote(), SignalActor.remote()
    dep = one_dep_large.remote(None, signal=signal1)
    check_refcounts({dep: (1, 0)})
    result = one_dep.remote(dep, signal=signal2)
    check_refcounts({dep: (1, 1), result: (1, 0)})
    ray.get(signal1.send.remote())
    ray.get(dep, timeout=10)
    # Reference count should remain because the dependency is in plasma.
    check_refcounts({dep: (1, 1), result: (1, 0)})
    ray.get(signal2.send.remote())
    # Reference count should be removed because the task finished.
    check_refcounts({dep: (1, 0), result: (1, 0)})
    del dep, result
    check_refcounts({})

    # Test that regular plasma dependency refcounts are decremented if a task
    # fails.
    signal = SignalActor.remote()
    large_dep = ray.put(np.zeros(10 * 1024 * 1024, dtype=np.uint8))
    result = one_dep.remote(large_dep, signal=signal, fail=True)
    check_refcounts({large_dep: (1, 1), result: (1, 0)})
    ray.get(signal.send.remote())
    # Reference count should be removed once the task finishes.
    check_refcounts({large_dep: (1, 0), result: (1, 0)})
    del large_dep, result
    check_refcounts({})

    # Test that spilled plasma dependency refcounts are decremented if a task
    # fails.
    signal1, signal2 = SignalActor.remote(), SignalActor.remote()
    dep = one_dep_large.remote(None, signal=signal1)
    check_refcounts({dep: (1, 0)})
    result = one_dep.remote(dep, signal=signal2, fail=True)
    check_refcounts({dep: (1, 1), result: (1, 0)})
    ray.get(signal1.send.remote())
    ray.get(dep, timeout=10)
    # Reference count should remain because the dependency is in plasma.
    check_refcounts({dep: (1, 1), result: (1, 0)})
    ray.get(signal2.send.remote())
    # Reference count should be removed because the task finished.
    check_refcounts({dep: (1, 0), result: (1, 0)})
    del dep, result
    check_refcounts({})
Example #3
0
def test_redeploy_single_replica(serve_instance, use_handle):
    # Tests that redeploying a deployment with a single replica waits for the
    # replica to completely shut down before starting a new one.
    client = serve_instance

    name = "test"

    @ray.remote
    def call(block=False):
        if use_handle:
            handle = serve.get_deployment(name).get_handle()
            ret = ray.get(handle.handler.remote(block))
        else:
            ret = requests.get(
                f"http://*****:*****@serve.deployment(name=name, version="1")
    class V1:
        async def handler(self, block: bool):
            if block:
                signal = ray.get_actor(signal_name)
                await signal.wait.remote()

            return f"1|{os.getpid()}"

        async def __call__(self, request):
            return await self.handler(request.query_params["block"] == "True")

    class V2:
        async def handler(self, *args):
            return f"2|{os.getpid()}"

        async def __call__(self, request):
            return await self.handler()

    V1.deploy()
    ref1 = call.remote(block=False)
    val1, pid1 = ray.get(ref1)
    assert val1 == "1"

    # ref2 will block until the signal is sent.
    ref2 = call.remote(block=True)
    assert len(ray.wait([ref2], timeout=0.1)[0]) == 0

    # Redeploy new version. This should not go through until the old version
    # replica completely stops.
    V2 = V1.options(func_or_class=V2, version="2")
    goal_ref = V2.deploy(_blocking=False)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)

    # It may take some time for the handle change to propagate and requests
    # to get sent to the new version. Repeatedly send requests until they
    # start blocking
    start = time.time()
    new_version_ref = None
    while time.time() - start < 30:
        ready, not_ready = ray.wait([call.remote(block=False)], timeout=0.5)
        if len(ready) == 1:
            # If the request doesn't block, it must have been the old version.
            val, pid = ray.get(ready[0])
            assert val == "1"
            assert pid == pid1
        elif len(not_ready) == 1:
            # If the request blocks, it must have been the new version.
            new_version_ref = not_ready[0]
            break
    else:
        assert False, "Timed out waiting for new version to be called."

    # Signal the original call to exit.
    ray.get(signal.send.remote())
    val2, pid2 = ray.get(ref2)
    assert val2 == "1"
    assert pid2 == pid1

    # Now the goal and request to the new version should complete.
    assert client._wait_for_goal(goal_ref)
    new_version_val, new_version_pid = ray.get(new_version_ref)
    assert new_version_val == "2"
    assert new_version_pid != pid2
Example #4
0
def test_async_actor_task_retries(ray_start_regular):
    # https://github.com/ray-project/ray/issues/11683

    signal = SignalActor.remote()

    @ray.remote
    class DyingActor:
        def __init__(self):
            print("DyingActor init called")
            self.should_exit = False

        def set_should_exit(self):
            print("DyingActor.set_should_exit called")
            self.should_exit = True

        async def get(self, x, wait=False):
            print(f"DyingActor.get called with x={x}, wait={wait}")
            if self.should_exit:
                os._exit(0)
            if wait:
                await signal.wait.remote()
            return x

    # Normal in order actor task retries should work
    dying = DyingActor.options(
        max_restarts=-1,
        max_task_retries=-1,
    ).remote()

    assert ray.get(dying.get.remote(1)) == 1
    ray.get(dying.set_should_exit.remote())
    assert ray.get(dying.get.remote(42)) == 42

    # Now let's try out of order retries:
    # Task seqno 0 will return
    # Task seqno 1 will be pending and retried later
    # Task seqno 2 will return
    # Task seqno 3 will crash the actor and retried later
    dying = DyingActor.options(
        max_restarts=-1,
        max_task_retries=-1,
    ).remote()

    # seqno 0
    ref_0 = dying.get.remote(0)
    assert ray.get(ref_0) == 0
    # seqno 1
    ref_1 = dying.get.remote(1, wait=True)
    # seqno 2
    ref_2 = dying.set_should_exit.remote()
    assert ray.get(ref_2) is None
    # seqno 3, this will crash the actor because previous task set should exit
    # to true.
    ref_3 = dying.get.remote(3)

    # At this point the actor should be restarted. The two pending tasks
    # [ref_1, ref_3] should be retried, but not the completed tasks [ref_0,
    # ref_2]. Critically, if ref_2 was retried, ref_3 can never return.
    ray.get(signal.send.remote())
    assert ray.get(ref_1) == 1
    assert ray.get(ref_3) == 3
Example #5
0
async def test_replica_set(ray_instance, mock_controller_with_name):
    signal = SignalActor.remote()

    @ray.remote(num_cpus=0)
    class MockWorker:
        _num_queries = 0

        @ray.method(num_returns=2)
        async def handle_request(self, request):
            self._num_queries += 1
            await signal.wait.remote()
            return b"", "DONE"

        async def num_queries(self):
            return self._num_queries

    # We will test a scenario with two replicas in the replica set.
    rs = ReplicaSet(
        mock_controller_with_name[1],
        "my_backend",
        asyncio.get_event_loop(),
    )
    workers = [MockWorker.remote() for _ in range(2)]
    rs.set_max_concurrent_queries(BackendConfig(max_concurrent_queries=1))
    rs.update_worker_replicas(workers)

    # Send two queries. They should go through the router but blocked by signal
    # actors.
    query = Query([], {}, RequestMetadata("request-id", "endpoint"))
    first_ref = await rs.assign_replica(query)
    second_ref = await rs.assign_replica(query)

    # These should be blocked by signal actor.
    with pytest.raises(ray.exceptions.GetTimeoutError):
        ray.get([first_ref, second_ref], timeout=1)

    # Each replica should have exactly one inflight query. Let make sure the
    # queries arrived there.
    for worker in workers:
        while await worker.num_queries.remote() != 1:
            await asyncio.sleep(1)

    # Let's try to send another query.
    third_ref_pending_task = asyncio.get_event_loop().create_task(
        rs.assign_replica(query))
    # We should fail to assign a replica, so this coroutine should still be
    # pending after some time.
    await asyncio.sleep(0.2)
    assert not third_ref_pending_task.done()

    # Let's unblock the two workers
    await signal.send.remote()
    assert await first_ref == "DONE"
    assert await second_ref == "DONE"

    # The third request should be unblocked and sent to first worker.
    # This meas we should be able to get the object ref.
    third_ref = await third_ref_pending_task

    # Now we got the object ref, let's get it result.
    await signal.send.remote()
    assert await third_ref == "DONE"

    # Finally, make sure that one of the replica processed the third query.
    num_queries_set = {(await worker.num_queries.remote())
                       for worker in workers}
    assert num_queries_set == {2, 1}
Example #6
0
def test_reconfigure_multiple_replicas(serve_instance, use_handle):
    # Tests that updating the user_config with multiple replicas performs a
    # rolling update.
    client = serve_instance

    name = "test"

    @ray.remote(num_cpus=0)
    def call():
        if use_handle:
            handle = serve.get_deployment(name).get_handle()
            ret = ray.get(handle.handler.remote())
        else:
            ret = requests.get(f"http://*****:*****@serve.deployment(name=name, version="1", num_replicas=2)
    class V1:
        def __init__(self):
            self.config = None

        async def reconfigure(self, config):
            # Don't block when the replica is first created.
            if self.config is not None:
                signal = ray.get_actor(signal_name)
                ray.get(signal.wait.remote())
            self.config = config

        async def handler(self):
            return f"{self.config}|{os.getpid()}"

        async def __call__(self, request):
            return await self.handler()

    def make_nonblocking_calls(expected, expect_blocking=False):
        # Returns dict[val, set(pid)].
        blocking = []
        responses = defaultdict(set)
        start = time.time()
        while time.time() - start < 30:
            refs = [call.remote() for _ in range(10)]
            ready, not_ready = ray.wait(refs, timeout=0.5)
            for ref in ready:
                val, pid = ray.get(ref)
                responses[val].add(pid)
            for ref in not_ready:
                blocking.extend(not_ready)

            if (all(
                    len(responses[val]) == num
                    for val, num in expected.items())
                    and (expect_blocking is False or len(blocking) > 0)):
                break
            time.sleep(0.1)
        else:
            assert False, f"Timed out, responses: {responses}."

        return responses, blocking

    V1.options(user_config="1").deploy()
    responses1, _ = make_nonblocking_calls({"1": 2})
    pids1 = responses1["1"]

    # Reconfigure should block one replica until the signal is sent. Check that
    # some requests are now blocking.
    goal_ref = V1.options(user_config="2").deploy(_blocking=False)
    responses2, blocking2 = make_nonblocking_calls(
        {
            "1": 1
        }, expect_blocking=True)
    assert list(responses2["1"])[0] in pids1

    # Signal reconfigure to finish. Now the goal should complete and both
    # replicas should have the updated config.
    ray.get(signal.send.remote())
    assert client._wait_for_goal(goal_ref)
    make_nonblocking_calls({"2": 2})
Example #7
0
async def test_router_use_max_concurrency(serve_instance):
    # The VisibleRouter::get_queues method needs to pickle queries
    # so we register serializer here. In regular code path, query
    # serialization is done by Serve manually for performance.
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)

    signal = SignalActor.remote()

    @ray.remote
    class MockWorker:
        async def handle_request(self, request):
            await signal.wait.remote()
            return "DONE"

        def ready(self):
            pass

    class VisibleRouter(Router):
        def get_queues(self):
            return self.queries_counter, self.backend_queues

    worker = MockWorker.remote()
    q = ray.remote(VisibleRouter).remote()
    await q.setup.remote("")
    backend_name = "max-concurrent-test"
    config = BackendConfig({"max_concurrent_queries": 1})
    await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0}))
    await q.add_new_worker.remote(backend_name, "replica-tag", worker)
    await q.set_backend_config.remote(backend_name, config)

    # We send over two queries
    first_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)
    second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)

    # Neither queries should be available
    with pytest.raises(ray.exceptions.RayTimeoutError):
        ray.get([first_query, second_query], timeout=0.2)

    # Let's retrieve the router internal state
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should be just one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # The second query is buffered
    assert len(backend_queues["max-concurrent-test"]) == 1

    # Let's unblock the first query
    await signal.send.remote(clear=True)
    assert await first_query == "DONE"

    # The internal state of router should have changed.
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should still be one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # But there shouldn't be any queries in the queue
    assert len(backend_queues["max-concurrent-test"]) == 0

    # Unblocking the second query
    await signal.send.remote(clear=True)
    assert await second_query == "DONE"

    # Checking the internal state of the router one more time
    queries_counter, backend_queues = await q.get_queues.remote()
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 0
    assert len(backend_queues["max-concurrent-test"]) == 0
Example #8
0
async def test_router_use_max_concurrency(serve_instance):
    signal = SignalActor.remote()

    @ray.remote
    class MockWorker:
        async def handle_request(self, request):
            await signal.wait.remote()
            return "DONE"

        def ready(self):
            pass

    class VisibleRouter(Router):
        def get_queues(self):
            return self.queries_counter, self.backend_queues

    worker = MockWorker.remote()
    q = ray.remote(VisibleRouter).remote()
    await q.setup.remote("", serve_instance._controller_name)
    backend_name = "max-concurrent-test"
    config = BackendConfig(max_concurrent_queries=1)
    await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0}))
    await q.add_new_worker.remote(backend_name, "replica-tag", worker)
    await q.set_backend_config.remote(backend_name, config)

    # We send over two queries
    first_query = q.enqueue_request.remote(
        RequestMetadata(get_random_letters(10), "svc", None), 1)
    second_query = q.enqueue_request.remote(
        RequestMetadata(get_random_letters(10), "svc", None), 1)

    # Neither queries should be available
    with pytest.raises(ray.exceptions.GetTimeoutError):
        ray.get([first_query, second_query], timeout=0.2)

    # Let's retrieve the router internal state
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should be just one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # The second query is buffered
    assert len(backend_queues["max-concurrent-test"]) == 1

    # Let's unblock the first query
    await signal.send.remote(clear=True)
    assert await first_query == "DONE"

    # The internal state of router should have changed.
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should still be one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # But there shouldn't be any queries in the queue
    assert len(backend_queues["max-concurrent-test"]) == 0

    # Unblocking the second query
    await signal.send.remote(clear=True)
    assert await second_query == "DONE"

    # Checking the internal state of the router one more time
    queries_counter, backend_queues = await q.get_queues.remote()
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 0
    assert len(backend_queues["max-concurrent-test"]) == 0