def test_back_pressure(shutdown_only_with_initialization_check): ray.init() signal_actor = Semaphore.options(max_pending_calls=10).remote(value=0) try: for i in range(10): signal_actor.acquire.remote() except ray.exceptions.PendingCallsLimitExceeded: assert False with pytest.raises(ray.exceptions.PendingCallsLimitExceeded): signal_actor.acquire.remote() @ray.remote def release(signal_actor): ray.get(signal_actor.release.remote()) return 1 # Release signal actor through common task, # because actor tasks will be back pressured for i in range(10): ray.get(release.remote(signal_actor)) # Check whether we can call remote actor normally after # back presssure released. try: signal_actor.acquire.remote() except ray.exceptions.PendingCallsLimitExceeded: assert False ray.shutdown()
def test_multi_driver_logging(ray_start_regular): address_info = ray_start_regular address = address_info["address"] # ray.init(address=address) driver1_wait = Semaphore.options(name="driver1_wait").remote(value=0) driver2_wait = Semaphore.options(name="driver2_wait").remote(value=0) main_wait = Semaphore.options(name="main_wait").remote(value=0) # The creation of an actor is asynchronous. # We need to wait for the completion of the actor creation, # otherwise we can't get the actor by name. ray.get(driver1_wait.locked.remote()) ray.get(driver2_wait.locked.remote()) ray.get(main_wait.locked.remote()) # Params are address, semaphore name, output1, output2 driver_script_template = """ import ray import sys from ray._private.test_utils import Semaphore @ray.remote(num_cpus=0) def remote_print(s, file=None): print(s, file=file) ray.init(address="{}", namespace="default_test_namespace") driver_wait = ray.get_actor("{}") main_wait = ray.get_actor("main_wait") ray.get(main_wait.release.remote()) ray.get(driver_wait.acquire.remote()) s1 = "{}" ray.get(remote_print.remote(s1)) ray.get(main_wait.release.remote()) ray.get(driver_wait.acquire.remote()) s2 = "{}" ray.get(remote_print.remote(s2)) ray.get(main_wait.release.remote()) """ p1 = run_string_as_driver_nonblocking( driver_script_template.format(address, "driver1_wait", "1", "2")) p2 = run_string_as_driver_nonblocking( driver_script_template.format(address, "driver2_wait", "3", "4")) ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) # At this point both of the other drivers are fully initialized. ray.get(driver1_wait.release.remote()) ray.get(driver2_wait.release.remote()) # At this point driver1 should receive '1' and driver2 '3' ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) ray.get(driver1_wait.release.remote()) ray.get(driver2_wait.release.remote()) # At this point driver1 should receive '2' and driver2 '4' ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) driver1_out = p1.stdout.read().decode("ascii") driver2_out = p2.stdout.read().decode("ascii") if sys.platform == "win32": driver1_out = driver1_out.replace("\r", "") driver2_out = driver2_out.replace("\r", "") driver1_out_split = driver1_out.split("\n") driver2_out_split = driver2_out.split("\n") assert driver1_out_split[0][-1] == "1", driver1_out_split assert driver1_out_split[1][-1] == "2", driver1_out_split assert driver2_out_split[0][-1] == "3", driver2_out_split assert driver2_out_split[1][-1] == "4", driver2_out_split
def test_memory_util(ray_start_cluster): config = { "num_heartbeats_timeout": 10, "raylet_heartbeat_period_milliseconds": 100, "object_timeout_milliseconds": 200, } cluster = ray_start_cluster # Head node with no resources. cluster.add_node( num_cpus=0, resources={"head": 1}, _system_config=config, enable_object_reconstruction=True, ) ray.init(address=cluster.address) # Node to place the initial object. node_to_kill = cluster.add_node(num_cpus=1, resources={"node1": 1}, object_store_memory=10**8) cluster.wait_for_nodes() @ray.remote def large_object(sema=None): if sema is not None: ray.get(sema.acquire.remote()) return np.zeros(10**7, dtype=np.uint8) @ray.remote def dependent_task(x, sema): ray.get(sema.acquire.remote()) return x def stats(): info = memory_summary(cluster.address, line_wrap=False) print(info) info = info.split("\n") reconstructing_waiting = [ line for line in info if "Attempt #2" in line and WAITING_FOR_DEPENDENCIES in line ] reconstructing_scheduled = [ line for line in info if "Attempt #2" in line and WAITING_FOR_EXECUTION in line ] reconstructing_finished = [ line for line in info if "Attempt #2" in line and FINISHED in line ] return ( len(reconstructing_waiting), len(reconstructing_scheduled), len(reconstructing_finished), ) sema = Semaphore.options(resources={"head": 1}).remote(value=0) obj = large_object.options(resources={"node1": 1}).remote(sema) x = dependent_task.options(resources={"node1": 1}).remote(obj, sema) ref = dependent_task.options(resources={"node1": 1}).remote(x, sema) ray.get(sema.release.remote()) ray.get(sema.release.remote()) ray.get(sema.release.remote()) ray.get(ref) wait_for_condition(lambda: stats() == (0, 0, 0)) del ref cluster.remove_node(node_to_kill, allow_graceful=False) node_to_kill = cluster.add_node(num_cpus=1, resources={"node1": 1}, object_store_memory=10**8) ref = dependent_task.remote(x, sema) wait_for_condition(lambda: stats() == (1, 1, 0)) ray.get(sema.release.remote()) wait_for_condition(lambda: stats() == (0, 1, 1)) ray.get(sema.release.remote()) ray.get(sema.release.remote()) ray.get(ref) wait_for_condition(lambda: stats() == (0, 0, 2))