Example #1
0
def test_wait_makes_object_local(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=0)
    cluster.add_node(num_cpus=2)
    ray.init(address=cluster.address)

    @ray.remote
    class Foo:
        def method(self):
            return np.zeros(1024 * 1024)

    a = Foo.remote()

    # Test get makes the object local.
    x_id = a.method.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(x_id)
    ray.get(x_id)
    assert ray.worker.global_worker.core_worker.object_exists(x_id)

    # Test wait makes the object local.
    x_id = a.method.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(x_id)
    ok, _ = ray.wait([x_id])
    assert len(ok) == 1
    assert ray.worker.global_worker.core_worker.object_exists(x_id)
Example #2
0
def test_profiling_api(ray_start_2_cpus):
    @ray.remote
    def f():
        with ray.profiling.profile("custom_event",
                                   extra_data={"name": "custom name"}):
            pass

    ray.put(1)
    object_ref = f.remote()
    ray.wait([object_ref])
    ray.get(object_ref)

    # Wait until all of the profiling information appears in the profile
    # table.
    timeout_seconds = 20
    start_time = time.time()
    while True:
        profile_data = ray.timeline()
        event_types = {event["cat"] for event in profile_data}
        expected_types = [
            "task",
            "task:deserialize_arguments",
            "task:execute",
            "task:store_outputs",
            "wait_for_function",
            "ray.get",
            "ray.put",
            "ray.wait",
            "submit_task",
            "fetch_and_run_function",
            # TODO (Alex) :https://github.com/ray-project/ray/pull/9346
            # "register_remote_function",
            "custom_event",  # This is the custom one from ray.profile.
        ]

        if all(expected_type in event_types
               for expected_type in expected_types):
            break

        if time.time() - start_time > timeout_seconds:
            raise RayTestTimeoutException(
                "Timed out while waiting for information in "
                "profile table. Missing events: {}.".format(
                    set(expected_types) - set(event_types)))

        # The profiling information only flushes once every second.
        time.sleep(1.1)
Example #3
0
 def test_wait():
     ready, _ = ray.wait(
         objects,
         num_returns=len(objects),
         timeout=1000.0,
     )
     assert len(ready) == num_wait_objects
     assert ray.get(ready) == list(range(num_wait_objects))
Example #4
0
def test_actor_large_objects(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def __init__(self):
            pass

        def f(self):
            time.sleep(1)
            return np.zeros(10000000)

    a = Actor.remote()
    obj_ref = a.f.remote()
    assert not ray.worker.global_worker.core_worker.object_exists(obj_ref)
    done, _ = ray.wait([obj_ref])
    assert len(done) == 1
    assert ray.worker.global_worker.core_worker.object_exists(obj_ref)
    assert isinstance(ray.get(obj_ref), np.ndarray)
Example #5
0
def test_schedule_actor_and_normal_task(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(
        memory=1024 ** 3, _system_config={"gcs_actor_scheduling_enabled": True}
    )
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    class Foo:
        def method(self):
            return 2

    @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01)
    def fun(singal1, signal_actor2):
        signal_actor2.send.remote()
        ray.get(singal1.wait.remote())
        return 1

    singal1 = SignalActor.remote()
    signal2 = SignalActor.remote()

    o1 = fun.remote(singal1, signal2)
    # Make sure the normal task is executing.
    ray.get(signal2.wait.remote())

    # The normal task is blocked now.
    # Try to create actor and make sure this actor is not created for the time
    # being.
    foo = Foo.remote()
    o2 = foo.method.remote()
    ready_list, remaining_list = ray.wait([o2], timeout=2)
    assert len(ready_list) == 0 and len(remaining_list) == 1

    # Send a signal to unblock the normal task execution.
    ray.get(singal1.send.remote())

    # Check the result of normal task.
    assert ray.get(o1) == 1

    # Make sure the actor is created.
    assert ray.get(o2) == 2
Example #6
0
def test_actor_recursive(ray_start_regular_shared):
    @ray.remote
    class Actor:
        def __init__(self, delegate=None):
            self.delegate = delegate

        def f(self, x):
            if self.delegate:
                return ray.get(self.delegate.f.remote(x))
            return x * 2

    a = Actor.remote()
    b = Actor.remote(a)
    c = Actor.remote(b)

    result = ray.get([c.f.remote(i) for i in range(100)])
    assert result == [x * 2 for x in range(100)]

    result, _ = ray.wait([c.f.remote(i) for i in range(100)], num_returns=100)
    result = ray.get(result)
    assert result == [x * 2 for x in range(100)]
Example #7
0
def test_worker_lease_reply_with_resources(ray_start_cluster_enabled):
    cluster = ray_start_cluster_enabled
    cluster.add_node(
        memory=2000 * 1024**2,
        num_cpus=1,
        _system_config={
            "gcs_resource_report_poll_period_ms": 1000000,
            "gcs_actor_scheduling_enabled": True,
        },
    )
    node2 = cluster.add_node(memory=1000 * 1024**2, num_cpus=1)
    ray.init(address=cluster.address)
    cluster.wait_for_nodes()

    @ray.remote(memory=1500 * 1024**2, num_cpus=0.01)
    def fun(signal):
        signal.send.remote()
        time.sleep(30)
        return 0

    signal = SignalActor.remote()
    fun.remote(signal)
    # Make sure that the `fun` is running.
    ray.get(signal.wait.remote())

    @ray.remote(memory=800 * 1024**2, num_cpus=0.01)
    class Foo:
        def method(self):
            return ray.worker.global_worker.node.unique_id

    foo1 = Foo.remote()
    o1 = foo1.method.remote()
    ready_list, remaining_list = ray.wait([o1], timeout=10)
    # If RequestWorkerLeaseReply carries normal task resources,
    # GCS will then schedule foo1 to node2. Otherwise,
    # GCS would keep trying to schedule foo1 to
    # node1 and getting rejected.
    assert len(ready_list) == 1 and len(remaining_list) == 0
    assert ray.get(o1) == node2.unique_id
Example #8
0
 def background_thread(self, wait_objects):
     try:
         # Test wait
         ready, _ = ray.wait(
             wait_objects,
             num_returns=len(wait_objects),
             timeout=1000.0,
         )
         assert len(ready) == len(wait_objects)
         for _ in range(20):
             num = 10
             # Test remote call
             results = [echo.remote(i) for i in range(num)]
             assert ray.get(results) == list(range(num))
             # Test put and get
             objects = [ray.put(i) for i in range(num)]
             assert ray.get(objects) == list(range(num))
             time.sleep(random.randint(0, 10) / 1000.0)
     except Exception as e:
         with self.lock:
             self.thread_results.append(e)
     else:
         with self.lock:
             self.thread_results.append("ok")
Example #9
0
def test_wait_cluster(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
    ray.init(address=cluster.address)

    @ray.remote(resources={"RemoteResource": 1})
    def f():
        return

    # Make sure we have enough workers on the remote nodes to execute some
    # tasks.
    tasks = [f.remote() for _ in range(10)]
    start = time.time()
    ray.get(tasks)
    end = time.time()

    # Submit some more tasks that can only be executed on the remote nodes.
    tasks = [f.remote() for _ in range(10)]
    # Sleep for a bit to let the tasks finish.
    time.sleep((end - start) * 2)
    _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0)
    # All remote tasks should have finished.
    assert len(unready) == 0
Example #10
0
def test_wait(ray_start_regular_shared):
    @ray.remote
    def f(delay):
        time.sleep(delay)
        return

    object_refs = [f.remote(0), f.remote(0), f.remote(0), f.remote(0)]
    ready_ids, remaining_ids = ray.wait(object_refs)
    assert len(ready_ids) == 1
    assert len(remaining_ids) == 3
    ready_ids, remaining_ids = ray.wait(object_refs, num_returns=4)
    assert set(ready_ids) == set(object_refs)
    assert remaining_ids == []

    object_refs = [f.remote(0), f.remote(5)]
    ready_ids, remaining_ids = ray.wait(object_refs,
                                        timeout=0.5,
                                        num_returns=2)
    assert len(ready_ids) == 1
    assert len(remaining_ids) == 1

    # Verify that calling wait with duplicate object refs throws an
    # exception.
    x = ray.put(1)
    with pytest.raises(Exception):
        ray.wait([x, x])

    # Make sure it is possible to call wait with an empty list.
    ready_ids, remaining_ids = ray.wait([])
    assert ready_ids == []
    assert remaining_ids == []

    # Test semantics of num_returns with no timeout.
    obj_refs = [ray.put(i) for i in range(10)]
    (found, rest) = ray.wait(obj_refs, num_returns=2)
    assert len(found) == 2
    assert len(rest) == 8

    # Verify that incorrect usage raises a TypeError.
    x = ray.put(1)
    with pytest.raises(TypeError):
        ray.wait(x)
    with pytest.raises(TypeError):
        ray.wait(1)
    with pytest.raises(TypeError):
        ray.wait([1])
Example #11
0
 def g(input_list):
     # The argument input_list should be a list containing one object ref.
     ray.wait([input_list[0]])
Example #12
0
# `ClientObjectRef(...)`
print(ref2)
# `236`
print(ray.get(ref2))

ref3 = fact.remote(20)
# `ClientObjectRef(...)`
print(ref3)
# `2432902008176640000`
print(ray.get(ref3))

# Reuse the cached ClientRemoteFunc object
ref4 = fact.remote(5)
# `120`
print(ray.get(ref4))

ref5 = fact.remote(10)

print([ref2, ref3, ref4, ref5])
# should return ref2, ref3, ref4
res = ray.wait([ref5, ref2, ref3, ref4], num_returns=3)
print(res)
assert [ref2, ref3, ref4] == res[0]
assert [ref5] == res[1]

# should return ref2, ref3, ref4, ref5
res = ray.wait([ref2, ref3, ref4, ref5], num_returns=4)
print(res)
assert [ref2, ref3, ref4, ref5] == res[0]
assert [] == res[1]