def test_kill_cancel_metadata(ray_start_regular): """ Verifies that client worker's terminate_actor and terminate_task methods pass worker's metadata attribute server to the grpc stub's Terminate method. This is done by mocking the grpc stub's Terminate method to raise an exception with argument equal to the key of the metadata. We then verify that the exception is raised when calling ray.kill and ray.cancel. """ with ray_start_client_server(metadata=[("key", "value")]) as ray: @ray.remote class A: pass @ray.remote def f(): time.sleep(1000) class MetadataIsCorrectlyPassedException(Exception): pass def mock_terminate(self, term): raise MetadataIsCorrectlyPassedException(self._metadata[1][0]) # Mock stub's Terminate method to raise an exception. stub = ray.get_context().api.worker.data_client stub.Terminate = mock_terminate.__get__(stub) # Verify the expected exception is raised with ray.kill. # Check that argument of the exception matches "key" from the # metadata above. actor = A.remote() with pytest.raises(MetadataIsCorrectlyPassedException, match="key"): ray.kill(actor) # Verify the expected exception is raised with ray.cancel. task_ref = f.remote() with pytest.raises(MetadataIsCorrectlyPassedException, match="key"): ray.cancel(task_ref)
def test_get_ray_metadata(ray_start_regular_shared): """Test the ClusterInfo client data pathway and API surface """ with ray_start_client_server() as ray: ip_address = ray_start_regular_shared["node_ip_address"] initialized = ray.is_initialized() assert initialized nodes = ray.nodes() assert len(nodes) == 1, nodes assert nodes[0]["NodeManagerAddress"] == ip_address current_node_id = "node:" + ip_address cluster_resources = ray.cluster_resources() available_resources = ray.available_resources() assert cluster_resources["CPU"] == 1.0 assert current_node_id in cluster_resources assert current_node_id in available_resources
def main(results=None): results = results or [] ray_config = {"logging_level": logging.WARNING} def ray_connect_handler(job_config=None, **ray_init_kwargs): from ray._private.client_mode_hook import disable_client_hook with disable_client_hook(): import ray as real_ray if not real_ray.is_initialized(): real_ray.init(**ray_config) for name, obj in inspect.getmembers(sys.modules[__name__]): if not name.startswith("benchmark_"): continue with ray_start_client_server(ray_connect_handler=ray_connect_handler) as ray: obj(ray, results) return results
def test_client_mode_hook_thread_safe(ray_start_regular_shared): with ray_start_client_server(): with enable_client_mode(): assert client_mode_should_convert(auto_init=True) lock = threading.Lock() lock.acquire() q = queue.Queue() def disable(): with disable_client_hook(): q.put(client_mode_should_convert(auto_init=True)) lock.acquire() q.put(client_mode_should_convert(auto_init=True)) t = threading.Thread(target=disable) t.start() assert client_mode_should_convert(auto_init=True) lock.release() t.join() assert q.get() is False, "Threaded disable_client_hook failed to disable" assert q.get() is True, "Threaded disable_client_hook failed to re-enable"
def test_delete_actor(ray_start_regular): with ray_start_client_server() as ray: @ray.remote class Accumulator: def __init__(self): self.acc = 0 def inc(self): self.acc += 1 actor = Accumulator.remote() actor.inc.remote() actor2 = Accumulator.remote() actor2.inc.remote() assert server_actor_ref_count(2)() del actor wait_for_condition(server_actor_ref_count(1), timeout=5)
def main(): ray_config = { "_system_config": { "put_small_object_in_memory_store": True }, "logging_level": logging.WARNING } def ray_connect_handler(job_config=None): from ray._private.client_mode_hook import disable_client_hook with disable_client_hook(): import ray as real_ray if not real_ray.is_initialized(): real_ray.init(**ray_config) for name, obj in inspect.getmembers(sys.modules[__name__]): if not name.startswith("benchmark_"): continue with ray_start_client_server( ray_connect_handler=ray_connect_handler) as ray: obj(ray)
def test_large_remote_call(ray_start_regular_shared): """ Test remote calls with large (multiple chunk) arguments """ with ray_start_client_server() as ray: @ray.remote def f(large_obj): return large_obj.shape @ray.remote def f2(*args): assert args[0] == 123 return args[1].shape @ray.remote def f3(*args, **kwargs): assert args[0] == "a" assert args[1] == "b" return kwargs["large_obj"].shape # 1024x1024x16 f64's =~ 128 MiB. Chunking size is 64 MiB, so guarantees # that transferring argument requires multiple chunks. assert OBJECT_TRANSFER_CHUNK_SIZE < 2**20 * 128 large_obj = np.random.random((1024, 1024, 16)) assert ray.get(f.remote(large_obj)) == (1024, 1024, 16) assert ray.get(f2.remote(123, large_obj)) == (1024, 1024, 16) assert ray.get(f3.remote("a", "b", large_obj=large_obj)) == (1024, 1024, 16) @ray.remote class SomeActor: def __init__(self, large_obj): self.inner = large_obj def some_method(self, large_obj): return large_obj.shape == self.inner.shape a = SomeActor.remote(large_obj) assert ray.get(a.some_method.remote(large_obj))
def test_cancel_chain(ray_start_regular, use_force): with ray_start_client_server() as ray: SignalActor = create_remote_signal_actor(ray) signaler = SignalActor.remote() @ray.remote def wait_for(t): return ray.get(t[0]) obj1 = wait_for.remote([signaler.wait.remote()]) obj2 = wait_for.remote([obj1]) obj3 = wait_for.remote([obj2]) obj4 = wait_for.remote([obj3]) assert len(ray.wait([obj1], timeout=.1)[0]) == 0 ray.cancel(obj1, force=use_force) for ob in [obj1, obj2, obj3, obj4]: with pytest.raises(valid_exceptions(use_force)): ray.get(ob) signaler2 = SignalActor.remote() obj1 = wait_for.remote([signaler2.wait.remote()]) obj2 = wait_for.remote([obj1]) obj3 = wait_for.remote([obj2]) obj4 = wait_for.remote([obj3]) assert len(ray.wait([obj3], timeout=.1)[0]) == 0 ray.cancel(obj3, force=use_force) for ob in [obj3, obj4]: with pytest.raises(valid_exceptions(use_force)): ray.get(ob) with pytest.raises(GetTimeoutError): ray.get(obj1, timeout=.1) with pytest.raises(GetTimeoutError): ray.get(obj2, timeout=.1) signaler2.send.remote() ray.get(obj1)
def test_basic_named_actor(ray_start_regular_shared): """Test that ray.get_actor() can create and return a detached actor. """ with ray_start_client_server() as ray: @ray.remote class Accumulator: def __init__(self): self.x = 0 def inc(self): self.x += 1 def get(self): return self.x # Create the actor actor = Accumulator.options(name="test_acc").remote() actor.inc.remote() actor.inc.remote() # Make sure the get_actor call works new_actor = ray.get_actor("test_acc") new_actor.inc.remote() assert ray.get(new_actor.get.remote()) == 3 del actor actor = Accumulator.options(name="test_acc2", lifetime="detached").remote() actor.inc.remote() del actor detatched_actor = ray.get_actor("test_acc2") for i in range(5): detatched_actor.inc.remote() assert ray.get(detatched_actor.get.remote()) == 6
def test_stdout_log_stream(ray_start_regular_shared): with ray_start_client_server() as ray: log_msgs = [] def test_log(level, msg): log_msgs.append(msg) ray.worker.log_client.stdstream = test_log @ray.remote def print_on_stderr_and_stdout(s): print(s) print(s, file=sys.stderr) time.sleep(1) print_on_stderr_and_stdout.remote("Hello world") time.sleep(1) num_hello = 0 for msg in log_msgs: if "Hello world" in msg: num_hello += 1 assert num_hello == 2, f"Invalid logs: {log_msgs}"
def test_create_remote_before_start(ray_start_regular_shared): """Creates remote objects (as though in a library) before starting the client. """ from ray.util.client import ray @ray.remote class Returner: def doit(self): return "foo" @ray.remote def f(x): return x + 20 # Prints in verbose tests print("Created remote functions") with ray_start_client_server() as ray: assert ray.get(f.remote(3)) == 23 a = Returner.remote() assert ray.get(a.doit.remote()) == "foo"
def test_rllib_integration(ray_start_regular_shared): with ray_start_client_server(): import ray.rllib.agents.dqn as dqn # Confirming the behavior of this context manager. # (Client mode hook not yet enabled.) assert not client_mode_should_convert() # Need to enable this for client APIs to be used. with enable_client_mode(): # Confirming mode hook is enabled. assert client_mode_should_convert() config = dqn.SIMPLE_Q_DEFAULT_CONFIG.copy() # Run locally. config["num_workers"] = 0 # Test with compression. config["compress_observations"] = True num_iterations = 2 trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v1") rw = trainer.workers.local_worker() for i in range(num_iterations): sb = rw.sample() assert sb.count == config["rollout_fragment_length"] trainer.train()
def test_serializing_exceptions(ray_start_regular_shared): with ray_start_client_server() as ray: with pytest.raises( ValueError, match="Failed to look up actor with name 'abc'"): ray.get_actor("abc")
def test_pass_handles(ray_start_regular_shared): """Test that passing client handles to actors and functions to remote actors in functions (on the server or raylet side) works transparently to the caller. """ with ray_start_client_server() as ray: @ray.remote class ExecActor: def exec(self, f, x): return ray.get(f.remote(x)) def exec_exec(self, actor, f, x): return ray.get(actor.exec.remote(f, x)) @ray.remote def fact(x): out = 1 while x > 0: out = out * x x -= 1 return out @ray.remote def func_exec(f, x): return ray.get(f.remote(x)) @ray.remote def func_actor_exec(actor, f, x): return ray.get(actor.exec.remote(f, x)) @ray.remote def sneaky_func_exec(obj, x): return ray.get(obj["f"].remote(x)) @ray.remote def sneaky_actor_exec(obj, x): return ray.get(obj["actor"].exec.remote(obj["f"], x)) def local_fact(x): if x <= 0: return 1 return x * local_fact(x - 1) assert ray.get(fact.remote(7)) == local_fact(7) assert ray.get(func_exec.remote(fact, 8)) == local_fact(8) test_obj = {} test_obj["f"] = fact assert ray.get(sneaky_func_exec.remote(test_obj, 5)) == local_fact(5) actor_handle = ExecActor.remote() assert ray.get(actor_handle.exec.remote(fact, 7)) == local_fact(7) assert ray.get(func_actor_exec.remote(actor_handle, fact, 10)) == local_fact(10) second_actor = ExecActor.remote() assert ray.get(actor_handle.exec_exec.remote(second_actor, fact, 9)) == local_fact(9) test_actor_obj = {} test_actor_obj["actor"] = second_actor test_actor_obj["f"] = fact assert ray.get(sneaky_actor_exec.remote(test_actor_obj, 4)) == local_fact(4)
def start_client_server_4_cpus(): ray.init(num_cpus=4) with ray_start_client_server() as client: yield client
def start_client_server(): with ray_start_client_server() as client: yield client
def test_connection(self): with ray_start_client_server(): assert ray.util.client.ray.is_connected() assert ray.util.client.ray.is_connected() is False
def test_ray_init_valid_keyword_with_client(shutdown_only): with ray_start_client_server() as given_connection: given_connection.disconnect() # logging_level should be passed to the server with ray.init("ray://localhost:50051", logging_level=logging.INFO): pass
def test_serializing_exceptions(ray_start_regular_shared): with ray_start_client_server() as ray: with pytest.raises(ValueError): ray.get_actor("abc")
def test_client_gpu_ids(call_ray_stop_only): import ray ray.init(num_cpus=2) with ray_start_client_server() as ray: assert ray.get_gpu_ids() == []