def test_wait_makes_object_local(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=0) cluster.add_node(num_cpus=2) ray.init(address=cluster.address) @ray.remote class Foo: def method(self): return np.zeros(1024 * 1024) a = Foo.remote() # Test get makes the object local. x_id = a.method.remote() assert not ray.worker.global_worker.core_worker.object_exists(x_id) ray.get(x_id) assert ray.worker.global_worker.core_worker.object_exists(x_id) # Test wait makes the object local. x_id = a.method.remote() assert not ray.worker.global_worker.core_worker.object_exists(x_id) ok, _ = ray.wait([x_id]) assert len(ok) == 1 assert ray.worker.global_worker.core_worker.object_exists(x_id)
def test_profiling_api(ray_start_2_cpus): @ray.remote def f(): with ray.profiling.profile("custom_event", extra_data={"name": "custom name"}): pass ray.put(1) object_ref = f.remote() ray.wait([object_ref]) ray.get(object_ref) # Wait until all of the profiling information appears in the profile # table. timeout_seconds = 20 start_time = time.time() while True: profile_data = ray.timeline() event_types = {event["cat"] for event in profile_data} expected_types = [ "task", "task:deserialize_arguments", "task:execute", "task:store_outputs", "wait_for_function", "ray.get", "ray.put", "ray.wait", "submit_task", "fetch_and_run_function", # TODO (Alex) :https://github.com/ray-project/ray/pull/9346 # "register_remote_function", "custom_event", # This is the custom one from ray.profile. ] if all(expected_type in event_types for expected_type in expected_types): break if time.time() - start_time > timeout_seconds: raise RayTestTimeoutException( "Timed out while waiting for information in " "profile table. Missing events: {}.".format( set(expected_types) - set(event_types))) # The profiling information only flushes once every second. time.sleep(1.1)
def test_wait(): ready, _ = ray.wait( objects, num_returns=len(objects), timeout=1000.0, ) assert len(ready) == num_wait_objects assert ray.get(ready) == list(range(num_wait_objects))
def test_actor_large_objects(ray_start_regular_shared): @ray.remote class Actor: def __init__(self): pass def f(self): time.sleep(1) return np.zeros(10000000) a = Actor.remote() obj_ref = a.f.remote() assert not ray.worker.global_worker.core_worker.object_exists(obj_ref) done, _ = ray.wait([obj_ref]) assert len(done) == 1 assert ray.worker.global_worker.core_worker.object_exists(obj_ref) assert isinstance(ray.get(obj_ref), np.ndarray)
def test_schedule_actor_and_normal_task(ray_start_cluster): cluster = ray_start_cluster cluster.add_node( memory=1024 ** 3, _system_config={"gcs_actor_scheduling_enabled": True} ) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01) class Foo: def method(self): return 2 @ray.remote(memory=600 * 1024 ** 2, num_cpus=0.01) def fun(singal1, signal_actor2): signal_actor2.send.remote() ray.get(singal1.wait.remote()) return 1 singal1 = SignalActor.remote() signal2 = SignalActor.remote() o1 = fun.remote(singal1, signal2) # Make sure the normal task is executing. ray.get(signal2.wait.remote()) # The normal task is blocked now. # Try to create actor and make sure this actor is not created for the time # being. foo = Foo.remote() o2 = foo.method.remote() ready_list, remaining_list = ray.wait([o2], timeout=2) assert len(ready_list) == 0 and len(remaining_list) == 1 # Send a signal to unblock the normal task execution. ray.get(singal1.send.remote()) # Check the result of normal task. assert ray.get(o1) == 1 # Make sure the actor is created. assert ray.get(o2) == 2
def test_actor_recursive(ray_start_regular_shared): @ray.remote class Actor: def __init__(self, delegate=None): self.delegate = delegate def f(self, x): if self.delegate: return ray.get(self.delegate.f.remote(x)) return x * 2 a = Actor.remote() b = Actor.remote(a) c = Actor.remote(b) result = ray.get([c.f.remote(i) for i in range(100)]) assert result == [x * 2 for x in range(100)] result, _ = ray.wait([c.f.remote(i) for i in range(100)], num_returns=100) result = ray.get(result) assert result == [x * 2 for x in range(100)]
def test_worker_lease_reply_with_resources(ray_start_cluster_enabled): cluster = ray_start_cluster_enabled cluster.add_node( memory=2000 * 1024**2, num_cpus=1, _system_config={ "gcs_resource_report_poll_period_ms": 1000000, "gcs_actor_scheduling_enabled": True, }, ) node2 = cluster.add_node(memory=1000 * 1024**2, num_cpus=1) ray.init(address=cluster.address) cluster.wait_for_nodes() @ray.remote(memory=1500 * 1024**2, num_cpus=0.01) def fun(signal): signal.send.remote() time.sleep(30) return 0 signal = SignalActor.remote() fun.remote(signal) # Make sure that the `fun` is running. ray.get(signal.wait.remote()) @ray.remote(memory=800 * 1024**2, num_cpus=0.01) class Foo: def method(self): return ray.worker.global_worker.node.unique_id foo1 = Foo.remote() o1 = foo1.method.remote() ready_list, remaining_list = ray.wait([o1], timeout=10) # If RequestWorkerLeaseReply carries normal task resources, # GCS will then schedule foo1 to node2. Otherwise, # GCS would keep trying to schedule foo1 to # node1 and getting rejected. assert len(ready_list) == 1 and len(remaining_list) == 0 assert ray.get(o1) == node2.unique_id
def background_thread(self, wait_objects): try: # Test wait ready, _ = ray.wait( wait_objects, num_returns=len(wait_objects), timeout=1000.0, ) assert len(ready) == len(wait_objects) for _ in range(20): num = 10 # Test remote call results = [echo.remote(i) for i in range(num)] assert ray.get(results) == list(range(num)) # Test put and get objects = [ray.put(i) for i in range(num)] assert ray.get(objects) == list(range(num)) time.sleep(random.randint(0, 10) / 1000.0) except Exception as e: with self.lock: self.thread_results.append(e) else: with self.lock: self.thread_results.append("ok")
def test_wait_cluster(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) ray.init(address=cluster.address) @ray.remote(resources={"RemoteResource": 1}) def f(): return # Make sure we have enough workers on the remote nodes to execute some # tasks. tasks = [f.remote() for _ in range(10)] start = time.time() ray.get(tasks) end = time.time() # Submit some more tasks that can only be executed on the remote nodes. tasks = [f.remote() for _ in range(10)] # Sleep for a bit to let the tasks finish. time.sleep((end - start) * 2) _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0) # All remote tasks should have finished. assert len(unready) == 0
def test_wait(ray_start_regular_shared): @ray.remote def f(delay): time.sleep(delay) return object_refs = [f.remote(0), f.remote(0), f.remote(0), f.remote(0)] ready_ids, remaining_ids = ray.wait(object_refs) assert len(ready_ids) == 1 assert len(remaining_ids) == 3 ready_ids, remaining_ids = ray.wait(object_refs, num_returns=4) assert set(ready_ids) == set(object_refs) assert remaining_ids == [] object_refs = [f.remote(0), f.remote(5)] ready_ids, remaining_ids = ray.wait(object_refs, timeout=0.5, num_returns=2) assert len(ready_ids) == 1 assert len(remaining_ids) == 1 # Verify that calling wait with duplicate object refs throws an # exception. x = ray.put(1) with pytest.raises(Exception): ray.wait([x, x]) # Make sure it is possible to call wait with an empty list. ready_ids, remaining_ids = ray.wait([]) assert ready_ids == [] assert remaining_ids == [] # Test semantics of num_returns with no timeout. obj_refs = [ray.put(i) for i in range(10)] (found, rest) = ray.wait(obj_refs, num_returns=2) assert len(found) == 2 assert len(rest) == 8 # Verify that incorrect usage raises a TypeError. x = ray.put(1) with pytest.raises(TypeError): ray.wait(x) with pytest.raises(TypeError): ray.wait(1) with pytest.raises(TypeError): ray.wait([1])
def g(input_list): # The argument input_list should be a list containing one object ref. ray.wait([input_list[0]])
# `ClientObjectRef(...)` print(ref2) # `236` print(ray.get(ref2)) ref3 = fact.remote(20) # `ClientObjectRef(...)` print(ref3) # `2432902008176640000` print(ray.get(ref3)) # Reuse the cached ClientRemoteFunc object ref4 = fact.remote(5) # `120` print(ray.get(ref4)) ref5 = fact.remote(10) print([ref2, ref3, ref4, ref5]) # should return ref2, ref3, ref4 res = ray.wait([ref5, ref2, ref3, ref4], num_returns=3) print(res) assert [ref2, ref3, ref4] == res[0] assert [ref5] == res[1] # should return ref2, ref3, ref4, ref5 res = ray.wait([ref2, ref3, ref4, ref5], num_returns=4) print(res) assert [ref2, ref3, ref4, ref5] == res[0] assert [] == res[1]