def test_wait(ray_start_regular_shared): server = ray_client_server.serve("localhost:50051", test_mode=True) ray.connect("localhost:50051") objectref = ray.put("hello world") ready, remaining = ray.wait([objectref]) assert remaining == [] retval = ray.get(ready[0]) assert retval == "hello world" objectref2 = ray.put(5) ready, remaining = ray.wait([objectref, objectref2]) assert (ready, remaining) == ([objectref], [objectref2]) or \ (ready, remaining) == ([objectref2], [objectref]) ready_retval = ray.get(ready[0]) remaining_retval = ray.get(remaining[0]) assert (ready_retval, remaining_retval) == ("hello world", 5) \ or (ready_retval, remaining_retval) == (5, "hello world") with pytest.raises(Exception): # Reference not in the object store. ray.wait([ClientObjectRef("blabla")]) with pytest.raises(AssertionError): ray.wait("blabla") with pytest.raises(AssertionError): ray.wait(ClientObjectRef("blabla")) with pytest.raises(AssertionError): ray.wait(["blabla"]) ray.disconnect() server.stop(0)
def test_wait(ray_start_regular_shared): with ray_start_client_server() as ray: objectref = ray.put("hello world") ready, remaining = ray.wait([objectref]) assert remaining == [] retval = ray.get(ready[0]) assert retval == "hello world" objectref2 = ray.put(5) ready, remaining = ray.wait([objectref, objectref2]) assert (ready, remaining) == ([objectref], [objectref2]) or \ (ready, remaining) == ([objectref2], [objectref]) ready_retval = ray.get(ready[0]) remaining_retval = ray.get(remaining[0]) assert (ready_retval, remaining_retval) == ("hello world", 5) \ or (ready_retval, remaining_retval) == (5, "hello world") with pytest.raises(Exception): # Reference not in the object store. ray.wait([ClientObjectRef("blabla")]) with pytest.raises(AssertionError): ray.wait("blabla") with pytest.raises(AssertionError): ray.wait(ClientObjectRef("blabla")) with pytest.raises(AssertionError): ray.wait(["blabla"])
def test_wait_makes_object_local(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=0) cluster.add_node(num_cpus=2) ray.init(address=cluster.address) @ray.remote class Foo: def method(self): return np.zeros(1024 * 1024) a = Foo.remote() # Test get makes the object local. x_id = a.method.remote() assert not ray.worker.global_worker.core_worker.object_exists(x_id) ray.get(x_id) assert ray.worker.global_worker.core_worker.object_exists(x_id) # Test wait makes the object local. x_id = a.method.remote() assert not ray.worker.global_worker.core_worker.object_exists(x_id) ok, _ = ray.wait([x_id]) assert len(ok) == 1 assert ray.worker.global_worker.core_worker.object_exists(x_id)
def test_submit_api(shutdown_only): ray.init(num_cpus=2, num_gpus=1, resources={"Custom": 1}) @ray.remote def f(n): return list(range(n)) @ray.remote def g(): return ray.get_gpu_ids() assert f._remote([0], num_returns=0) is None id1 = f._remote(args=[1], num_returns=1) assert ray.get(id1) == [0] id1, id2 = f._remote(args=[2], num_returns=2) assert ray.get([id1, id2]) == [0, 1] id1, id2, id3 = f._remote(args=[3], num_returns=3) assert ray.get([id1, id2, id3]) == [0, 1, 2] assert ray.get( g._remote(args=[], num_cpus=1, num_gpus=1, resources={"Custom": 1})) == [0] infeasible_id = g._remote(args=[], resources={"NonexistentCustom": 1}) assert ray.get(g._remote()) == [] ready_ids, remaining_ids = ray.wait([infeasible_id], timeout=0.05) assert len(ready_ids) == 0 assert len(remaining_ids) == 1 @ray.remote class Actor: def __init__(self, x, y=0): self.x = x self.y = y def method(self, a, b=0): return self.x, self.y, a, b def gpu_ids(self): return ray.get_gpu_ids() @ray.remote class Actor2: def __init__(self): pass def method(self): pass a = Actor._remote(args=[0], kwargs={"y": 1}, num_gpus=1, resources={"Custom": 1}) a2 = Actor2._remote() ray.get(a2.method._remote()) id1, id2, id3, id4 = a.method._remote(args=["test"], kwargs={"b": 2}, num_returns=4) assert ray.get([id1, id2, id3, id4]) == [0, 1, "test", 2]
def test_atexit_handler(ray_start_regular_shared, exit_condition): @ray.remote class A(): def __init__(self, tmpfile, data): import atexit def f(*args, **kwargs): with open(tmpfile, "w") as f: f.write(data) f.flush() atexit.register(f) def ready(self): pass def exit(self): ray.actor.exit_actor() data = "hello" tmpfile = tempfile.NamedTemporaryFile() a = A.remote(tmpfile.name, data) ray.get(a.ready.remote()) if exit_condition == "out_of_scope": del a elif exit_condition == "__ray_terminate__": ray.wait([a.__ray_terminate__.remote()]) elif exit_condition == "ray.actor.exit_actor": ray.wait([a.exit.remote()]) elif exit_condition == "ray.kill": ray.kill(a) else: assert False, "Unrecognized condition" def check_file_written(): with open(tmpfile.name) as f: if f.read() == data: return True return False # ray.kill() should not trigger atexit handlers, all other methods should. if exit_condition == "ray.kill": assert not check_file_written() else: wait_for_condition(check_file_written)
def test_wait(): ready, _ = ray.wait( objects, num_returns=len(objects), timeout=1000.0, ) assert len(ready) == num_wait_objects assert ray.get(ready) == list(range(num_wait_objects))
def test_profiling_api(ray_start_2_cpus): @ray.remote def f(): with ray.profile("custom_event", extra_data={"name": "custom name"}): pass ray.put(1) object_ref = f.remote() ray.wait([object_ref]) ray.get(object_ref) # Wait until all of the profiling information appears in the profile # table. timeout_seconds = 20 start_time = time.time() while True: profile_data = ray.timeline() event_types = {event["cat"] for event in profile_data} expected_types = [ "task", "task:deserialize_arguments", "task:execute", "task:store_outputs", "wait_for_function", "ray.get", "ray.put", "ray.wait", "submit_task", "fetch_and_run_function", # TODO (Alex) :https://github.com/ray-project/ray/pull/9346 # "register_remote_function", "custom_event", # This is the custom one from ray.profile. ] if all(expected_type in event_types for expected_type in expected_types): break if time.time() - start_time > timeout_seconds: raise RayTestTimeoutException( "Timed out while waiting for information in " "profile table. Missing events: {}.".format( set(expected_types) - set(event_types))) # The profiling information only flushes once every second. time.sleep(1.1)
def test_remote_functions(ray_start_regular_shared): server = ray_client_server.serve("localhost:50051") ray.connect("localhost:50051") @ray.remote def plus2(x): return x + 2 @ray.remote def fact(x): print(x, type(fact)) if x <= 0: return 1 # This hits the "nested tasks" issue # https://github.com/ray-project/ray/issues/3644 # So we're on the right track! return ray.get(fact.remote(x - 1)) * x ref2 = plus2.remote(234) # `236` assert ray.get(ref2) == 236 ref3 = fact.remote(20) # `2432902008176640000` assert ray.get(ref3) == 2_432_902_008_176_640_000 # Reuse the cached ClientRemoteFunc object ref4 = fact.remote(5) assert ray.get(ref4) == 120 # Test ray.wait() ref5 = fact.remote(10) # should return ref2, ref3, ref4 res = ray.wait([ref5, ref2, ref3, ref4], num_returns=3) assert [ref2, ref3, ref4] == res[0] assert [ref5] == res[1] assert ray.get(res[0]) == [236, 2_432_902_008_176_640_000, 120] # should return ref2, ref3, ref4, ref5 res = ray.wait([ref2, ref3, ref4, ref5], num_returns=4) assert [ref2, ref3, ref4, ref5] == res[0] assert [] == res[1] assert ray.get(res[0]) == [236, 2_432_902_008_176_640_000, 120, 3628800] ray.disconnect() server.stop(0)
def test_fetch_local(ray_start_cluster_head): cluster = ray_start_cluster_head cluster.add_node(num_cpus=2, object_store_memory=75 * 1024 * 1024) signal_actor = ray.test_utils.SignalActor.remote() @ray.remote def put(): ray.wait([signal_actor.wait.remote()]) return np.random.rand(5 * 1024 * 1024) # 40 MB data local_ref = ray.put(np.random.rand(5 * 1024 * 1024)) remote_ref = put.remote() # Data is not ready in any node (ready_ref, remaining_ref) = ray.wait([remote_ref], timeout=2, fetch_local=False) assert (0, 1) == (len(ready_ref), len(remaining_ref)) ray.wait([signal_actor.send.remote()]) # Data is ready in some node, but not local node. (ready_ref, remaining_ref) = ray.wait([remote_ref], fetch_local=False) assert (1, 0) == (len(ready_ref), len(remaining_ref)) (ready_ref, remaining_ref) = ray.wait([remote_ref], timeout=2, fetch_local=True) assert (0, 1) == (len(ready_ref), len(remaining_ref)) del local_ref (ready_ref, remaining_ref) = ray.wait([remote_ref], fetch_local=True) assert (1, 0) == (len(ready_ref), len(remaining_ref))
def test_wait_timing(shutdown_only): ray.init(num_cpus=2) @ray.remote def f(): time.sleep(1) future = f.remote() start = time.time() ready, not_ready = ray.wait([future], timeout=0.2) assert 0.2 < time.time() - start < 0.3 assert len(ready) == 0 assert len(not_ready) == 1
def test_actor_large_objects(ray_start_regular_shared): @ray.remote class Actor: def __init__(self): pass def f(self): time.sleep(1) return np.zeros(10000000) a = Actor.remote() obj_ref = a.f.remote() assert not ray.worker.global_worker.core_worker.object_exists(obj_ref) done, _ = ray.wait([obj_ref]) assert len(done) == 1 assert ray.worker.global_worker.core_worker.object_exists(obj_ref) assert isinstance(ray.get(obj_ref), np.ndarray)
def test_actor_recursive(ray_start_regular_shared): @ray.remote class Actor: def __init__(self, delegate=None): self.delegate = delegate def f(self, x): if self.delegate: return ray.get(self.delegate.f.remote(x)) return x * 2 a = Actor.remote() b = Actor.remote(a) c = Actor.remote(b) result = ray.get([c.f.remote(i) for i in range(100)]) assert result == [x * 2 for x in range(100)] result, _ = ray.wait([c.f.remote(i) for i in range(100)], num_returns=100) result = ray.get(result) assert result == [x * 2 for x in range(100)]
def test_fair_queueing(shutdown_only): ray.init(num_cpus=1, _system_config={"fair_queueing_enabled": 1}) @ray.remote def h(): return 0 @ray.remote def g(): return ray.get(h.remote()) @ray.remote def f(): return ray.get(g.remote()) # This will never finish without fair queueing of {f, g, h}: # https://github.com/ray-project/ray/issues/3644 ready, _ = ray.wait([f.remote() for _ in range(1000)], timeout=60.0, num_returns=1000) assert len(ready) == 1000, len(ready)
def background_thread(self, wait_objects): try: # Test wait ready, _ = ray.wait( wait_objects, num_returns=len(wait_objects), timeout=1000.0, ) assert len(ready) == len(wait_objects) for _ in range(20): num = 10 # Test remote call results = [echo.remote(i) for i in range(num)] assert ray.get(results) == list(range(num)) # Test put and get objects = [ray.put(i) for i in range(num)] assert ray.get(objects) == list(range(num)) time.sleep(random.randint(0, 10) / 1000.0) except Exception as e: with self.lock: self.thread_results.append(e) else: with self.lock: self.thread_results.append("ok")
def test_wait_cluster(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) ray.init(address=cluster.address) @ray.remote(resources={"RemoteResource": 1}) def f(): return # Make sure we have enough workers on the remote nodes to execute some # tasks. tasks = [f.remote() for _ in range(10)] start = time.time() ray.get(tasks) end = time.time() # Submit some more tasks that can only be executed on the remote nodes. tasks = [f.remote() for _ in range(10)] # Sleep for a bit to let the tasks finish. time.sleep((end - start) * 2) _, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0) # All remote tasks should have finished. assert len(unready) == 0
def test_wait(ray_start_regular_shared): @ray.remote def f(delay): time.sleep(delay) return object_refs = [f.remote(0), f.remote(0), f.remote(0), f.remote(0)] ready_ids, remaining_ids = ray.wait(object_refs) assert len(ready_ids) == 1 assert len(remaining_ids) == 3 ready_ids, remaining_ids = ray.wait(object_refs, num_returns=4) assert set(ready_ids) == set(object_refs) assert remaining_ids == [] object_refs = [f.remote(0), f.remote(5)] ready_ids, remaining_ids = ray.wait( object_refs, timeout=0.5, num_returns=2) assert len(ready_ids) == 1 assert len(remaining_ids) == 1 # Verify that calling wait with duplicate object refs throws an # exception. x = ray.put(1) with pytest.raises(Exception): ray.wait([x, x]) # Make sure it is possible to call wait with an empty list. ready_ids, remaining_ids = ray.wait([]) assert ready_ids == [] assert remaining_ids == [] # Test semantics of num_returns with no timeout. obj_refs = [ray.put(i) for i in range(10)] (found, rest) = ray.wait(obj_refs, num_returns=2) assert len(found) == 2 assert len(rest) == 8 # Verify that incorrect usage raises a TypeError. x = ray.put(1) with pytest.raises(TypeError): ray.wait(x) with pytest.raises(TypeError): ray.wait(1) with pytest.raises(TypeError): ray.wait([1])
# `ClientObjectRef(...)` print(ref2) # `236` print(ray.get(ref2)) ref3 = fact.remote(20) # `ClientObjectRef(...)` print(ref3) # `2432902008176640000` print(ray.get(ref3)) # Reuse the cached ClientRemoteFunc object ref4 = fact.remote(5) # `120` print(ray.get(ref4)) ref5 = fact.remote(10) print([ref2, ref3, ref4, ref5]) # should return ref2, ref3, ref4 res = ray.wait([ref5, ref2, ref3, ref4], num_returns=3) print(res) assert [ref2, ref3, ref4] == res[0] assert [ref5] == res[1] # should return ref2, ref3, ref4, ref5 res = ray.wait([ref2, ref3, ref4, ref5], num_returns=4) print(res) assert [ref2, ref3, ref4, ref5] == res[0] assert [] == res[1]
def put(): ray.wait([signal_actor.wait.remote()]) return np.random.rand(5 * 1024 * 1024) # 40 MB data
def g(input_list): # The argument input_list should be a list containing one object ref. ray.wait([input_list[0]])