Exemple #1
0
def test_back_pressure(shutdown_only_with_initialization_check):
    ray.init()

    signal_actor = Semaphore.options(max_pending_calls=10).remote(value=0)

    try:
        for i in range(10):
            signal_actor.acquire.remote()
    except ray.exceptions.PendingCallsLimitExceeded:
        assert False

    with pytest.raises(ray.exceptions.PendingCallsLimitExceeded):
        signal_actor.acquire.remote()

    @ray.remote
    def release(signal_actor):
        ray.get(signal_actor.release.remote())
        return 1

    # Release signal actor through common task,
    # because actor tasks will be back pressured
    for i in range(10):
        ray.get(release.remote(signal_actor))

    # Check whether we can call remote actor normally after
    # back presssure released.
    try:
        signal_actor.acquire.remote()
    except ray.exceptions.PendingCallsLimitExceeded:
        assert False

    ray.shutdown()
Exemple #2
0
def test_multi_driver_logging(ray_start_regular):
    address_info = ray_start_regular
    address = address_info["address"]

    # ray.init(address=address)
    driver1_wait = Semaphore.options(name="driver1_wait").remote(value=0)
    driver2_wait = Semaphore.options(name="driver2_wait").remote(value=0)
    main_wait = Semaphore.options(name="main_wait").remote(value=0)

    # The creation of an actor is asynchronous.
    # We need to wait for the completion of the actor creation,
    # otherwise we can't get the actor by name.
    ray.get(driver1_wait.locked.remote())
    ray.get(driver2_wait.locked.remote())
    ray.get(main_wait.locked.remote())

    # Params are address, semaphore name, output1, output2
    driver_script_template = """
import ray
import sys
from ray._private.test_utils import Semaphore

@ray.remote(num_cpus=0)
def remote_print(s, file=None):
    print(s, file=file)

ray.init(address="{}", namespace="default_test_namespace")

driver_wait = ray.get_actor("{}")
main_wait = ray.get_actor("main_wait")

ray.get(main_wait.release.remote())
ray.get(driver_wait.acquire.remote())

s1 = "{}"
ray.get(remote_print.remote(s1))

ray.get(main_wait.release.remote())
ray.get(driver_wait.acquire.remote())

s2 = "{}"
ray.get(remote_print.remote(s2))

ray.get(main_wait.release.remote())
    """

    p1 = run_string_as_driver_nonblocking(
        driver_script_template.format(address, "driver1_wait", "1", "2"))
    p2 = run_string_as_driver_nonblocking(
        driver_script_template.format(address, "driver2_wait", "3", "4"))

    ray.get(main_wait.acquire.remote())
    ray.get(main_wait.acquire.remote())
    # At this point both of the other drivers are fully initialized.

    ray.get(driver1_wait.release.remote())
    ray.get(driver2_wait.release.remote())

    # At this point driver1 should receive '1' and driver2 '3'
    ray.get(main_wait.acquire.remote())
    ray.get(main_wait.acquire.remote())

    ray.get(driver1_wait.release.remote())
    ray.get(driver2_wait.release.remote())

    # At this point driver1 should receive '2' and driver2 '4'
    ray.get(main_wait.acquire.remote())
    ray.get(main_wait.acquire.remote())

    driver1_out = p1.stdout.read().decode("ascii")
    driver2_out = p2.stdout.read().decode("ascii")
    if sys.platform == "win32":
        driver1_out = driver1_out.replace("\r", "")
        driver2_out = driver2_out.replace("\r", "")
    driver1_out_split = driver1_out.split("\n")
    driver2_out_split = driver2_out.split("\n")

    assert driver1_out_split[0][-1] == "1", driver1_out_split
    assert driver1_out_split[1][-1] == "2", driver1_out_split
    assert driver2_out_split[0][-1] == "3", driver2_out_split
    assert driver2_out_split[1][-1] == "4", driver2_out_split
Exemple #3
0
def test_memory_util(ray_start_cluster):
    config = {
        "num_heartbeats_timeout": 10,
        "raylet_heartbeat_period_milliseconds": 100,
        "object_timeout_milliseconds": 200,
    }

    cluster = ray_start_cluster
    # Head node with no resources.
    cluster.add_node(
        num_cpus=0,
        resources={"head": 1},
        _system_config=config,
        enable_object_reconstruction=True,
    )
    ray.init(address=cluster.address)
    # Node to place the initial object.
    node_to_kill = cluster.add_node(num_cpus=1,
                                    resources={"node1": 1},
                                    object_store_memory=10**8)
    cluster.wait_for_nodes()

    @ray.remote
    def large_object(sema=None):
        if sema is not None:
            ray.get(sema.acquire.remote())
        return np.zeros(10**7, dtype=np.uint8)

    @ray.remote
    def dependent_task(x, sema):
        ray.get(sema.acquire.remote())
        return x

    def stats():
        info = memory_summary(cluster.address, line_wrap=False)
        print(info)
        info = info.split("\n")
        reconstructing_waiting = [
            line for line in info
            if "Attempt #2" in line and WAITING_FOR_DEPENDENCIES in line
        ]
        reconstructing_scheduled = [
            line for line in info
            if "Attempt #2" in line and WAITING_FOR_EXECUTION in line
        ]
        reconstructing_finished = [
            line for line in info if "Attempt #2" in line and FINISHED in line
        ]
        return (
            len(reconstructing_waiting),
            len(reconstructing_scheduled),
            len(reconstructing_finished),
        )

    sema = Semaphore.options(resources={"head": 1}).remote(value=0)
    obj = large_object.options(resources={"node1": 1}).remote(sema)
    x = dependent_task.options(resources={"node1": 1}).remote(obj, sema)
    ref = dependent_task.options(resources={"node1": 1}).remote(x, sema)
    ray.get(sema.release.remote())
    ray.get(sema.release.remote())
    ray.get(sema.release.remote())
    ray.get(ref)
    wait_for_condition(lambda: stats() == (0, 0, 0))
    del ref

    cluster.remove_node(node_to_kill, allow_graceful=False)
    node_to_kill = cluster.add_node(num_cpus=1,
                                    resources={"node1": 1},
                                    object_store_memory=10**8)

    ref = dependent_task.remote(x, sema)
    wait_for_condition(lambda: stats() == (1, 1, 0))
    ray.get(sema.release.remote())
    wait_for_condition(lambda: stats() == (0, 1, 1))
    ray.get(sema.release.remote())
    ray.get(sema.release.remote())
    ray.get(ref)
    wait_for_condition(lambda: stats() == (0, 0, 2))