def test_max_running_tasks(): counter = Semaphore.remote(0) blocker = Semaphore.remote(0) @ray.remote(num_cpus=0.25) def task(counter, blocker): sleep(300) refs = [ task.remote(counter, blocker) for _ in trange(MAX_RUNNING_TASKS_IN_CLUSTER, desc="Launching tasks") ] max_cpus = ray.cluster_resources()["CPU"] min_cpus_available = max_cpus for _ in trange(int(300 / 0.1), desc="Waiting"): try: cur_cpus = ray.available_resources().get("CPU", 0) min_cpus_available = min(min_cpus_available, cur_cpus) except Exception: # There are race conditions `.get` can fail if a new heartbeat # comes at the same time. pass sleep(0.1) # There are some relevant magic numbers in this check. 10k tasks each # require 1/4 cpus. Therefore, ideally 2.5k cpus will be used. err_str = f"Only {max_cpus - min_cpus_available}/{max_cpus} cpus used." assert max_cpus - min_cpus_available > 2000, err_str for _ in trange(MAX_RUNNING_TASKS_IN_CLUSTER, desc="Ensuring all tasks have finished"): done, refs = ray.wait(refs) assert ray.get(done[0]) is None
def test_for_each_concur_sync(ray_start_regular_shared): main_wait = Semaphore.remote(value=0) test_wait = Semaphore.remote(value=0) def task(x): i, main_wait, test_wait = x ray.get(main_wait.release.remote()) ray.get(test_wait.acquire.remote()) return i + 10 @ray.remote(num_cpus=0.01) def to_list(it): return list(it) it = from_items([(i, main_wait, test_wait) for i in range(8)], num_shards=2) it = it.for_each(task, max_concurrency=2, resources={"num_cpus": 0.01}) list_promise = to_list.remote(it.gather_sync()) for i in range(4): assert i in [0, 1, 2, 3] ray.get(main_wait.acquire.remote()) # There should be exactly 4 tasks executing at this point. assert ray.get(main_wait.locked.remote()) is True, "Too much parallelism" for i in range(8): ray.get(test_wait.release.remote()) assert repr( it) == "ParallelIterator[from_items[tuple, 8, shards=2].for_each()]" result_list = ray.get(list_promise) assert set(result_list) == set(range(10, 18))
def test_hybrid_policy(ray_start_cluster): cluster = ray_start_cluster num_nodes = 2 num_cpus = 10 for _ in range(num_nodes): cluster.add_node(num_cpus=num_cpus, memory=num_cpus) cluster.wait_for_nodes() ray.init(address=cluster.address) # `block_task` ensures that scheduled tasks do not return until all are # running. block_task = Semaphore.remote(0) # `block_driver` ensures that the driver does not allow tasks to continue # until all are running. block_driver = Semaphore.remote(0) # Add the memory resource because the cpu will be released in the ray.get @ray.remote(num_cpus=1, memory=1) def get_node(): ray.get(block_driver.release.remote()) ray.get(block_task.acquire.remote()) return ray.worker.global_worker.current_node_id # Below the hybrid threshold we pack on the local node first. refs = [get_node.remote() for _ in range(5)] ray.get([block_driver.acquire.remote() for _ in refs]) ray.get([block_task.release.remote() for _ in refs]) nodes = ray.get(refs) assert len(set(nodes)) == 1 # We pack the second node to the hybrid threshold. refs = [get_node.remote() for _ in range(10)] ray.get([block_driver.acquire.remote() for _ in refs]) ray.get([block_task.release.remote() for _ in refs]) nodes = ray.get(refs) counter = collections.Counter(nodes) for node_id in counter: print(f"{node_id}: {counter[node_id]}") assert counter[node_id] == 5 # Once all nodes are past the hybrid threshold we round robin. # TODO (Alex): Ideally we could schedule less than 20 nodes here, but the # policy is imperfect if a resource report interrupts the process. refs = [get_node.remote() for _ in range(20)] ray.get([block_driver.acquire.remote() for _ in refs]) ray.get([block_task.release.remote() for _ in refs]) nodes = ray.get(refs) counter = collections.Counter(nodes) for node_id in counter: print(f"{node_id}: {counter[node_id]}") assert counter[node_id] == 10, counter
def test_warning_for_too_many_nested_tasks(shutdown_only): # Check that if we run a workload which requires too many workers to be # started that we will receive a warning. num_cpus = 2 ray.init(num_cpus=num_cpus) p = init_error_pubsub() remote_wait = Semaphore.remote(value=0) nested_wait = Semaphore.remote(value=0) ray.get([ remote_wait.locked.remote(), nested_wait.locked.remote(), ]) @ray.remote def f(): time.sleep(1000) return 1 @ray.remote def h(nested_waits): nested_wait.release.remote() ray.get(nested_waits) ray.get(f.remote()) @ray.remote def g(remote_waits, nested_waits): # Sleep so that the f tasks all get submitted to the scheduler after # the g tasks. remote_wait.release.remote() # wait until every lock is released. ray.get(remote_waits) ray.get(h.remote(nested_waits)) num_root_tasks = num_cpus * 4 # Lock remote task until everything is scheduled. remote_waits = [] nested_waits = [] for _ in range(num_root_tasks): remote_waits.append(remote_wait.acquire.remote()) nested_waits.append(nested_wait.acquire.remote()) [g.remote(remote_waits, nested_waits) for _ in range(num_root_tasks)] errors = get_error_message(p, 1, ray_constants.WORKER_POOL_LARGE_ERROR) assert len(errors) == 1 assert errors[0].type == ray_constants.WORKER_POOL_LARGE_ERROR p.close()
def test_many_queued_tasks(): sema = Semaphore.remote(0) @ray.remote(num_cpus=1) def block(): ray.get(sema.acquire.remote()) @ray.remote(num_cpus=1) def f(): pass num_cpus = int(ray.cluster_resources()["CPU"]) blocked_tasks = [] for _ in range(num_cpus): blocked_tasks.append(block.remote()) print("Submitting many tasks") pending_tasks = [] for _ in trange(MAX_QUEUED_TASKS): pending_tasks.append(f.remote()) # Make sure all the tasks can actually run. for _ in range(num_cpus): sema.release.remote() print("Unblocking tasks") for ref in tqdm(pending_tasks): assert ray.get(ref) is None
def test_for_each_concur(ray_start_regular_shared): main_wait = Semaphore.remote(value=0) test_wait = Semaphore.remote(value=0) def task(x): i, main_wait, test_wait = x ray.get(main_wait.release.remote()) ray.get(test_wait.acquire.remote()) return i + 10 @ray.remote(num_cpus=0.1) def to_list(it): return list(it) it = from_items([(i, main_wait, test_wait) for i in range(8)], num_shards=2) it = it.for_each(task, max_concurrency=2, resources={"num_cpus": 0.1}) for i in range(4): ray.get(main_wait.acquire.remote()) # There should be exactly 4 tasks executing at this point. assert ray.get(main_wait.locked.remote()) is True, "Too much parallelism" # When we finish one task, exactly one more should start. ray.get(test_wait.release.remote()) ray.get(main_wait.acquire.remote()) assert ray.get(main_wait.locked.remote()) is True, "Too much parallelism" # Finish everything and make sure the output matches a regular iterator. for i in range(3): ray.get(test_wait.release.remote()) assert repr( it) == "ParallelIterator[from_items[tuple, 8, shards=2].for_each()]" assert ray.get(to_list.remote(it.gather_sync())) == list(range(10, 18))
def test_multi_driver_logging(ray_start_regular): address_info = ray_start_regular address = address_info["redis_address"] # ray.init(address=address) driver1_wait = Semaphore.options(name="driver1_wait").remote(value=0) driver2_wait = Semaphore.options(name="driver2_wait").remote(value=0) main_wait = Semaphore.options(name="main_wait").remote(value=0) # The creation of an actor is asynchronous. # We need to wait for the completion of the actor creation, # otherwise we can't get the actor by name. ray.get(driver1_wait.locked.remote()) ray.get(driver2_wait.locked.remote()) ray.get(main_wait.locked.remote()) # Params are address, semaphore name, output1, output2 driver_script_template = """ import ray import sys from ray.test_utils import Semaphore @ray.remote(num_cpus=0) def remote_print(s, file=None): print(s, file=file) ray.init(address="{}") driver_wait = ray.get_actor("{}") main_wait = ray.get_actor("main_wait") ray.get(main_wait.release.remote()) ray.get(driver_wait.acquire.remote()) s1 = "{}" ray.get(remote_print.remote(s1)) ray.get(main_wait.release.remote()) ray.get(driver_wait.acquire.remote()) s2 = "{}" ray.get(remote_print.remote(s2)) ray.get(main_wait.release.remote()) """ p1 = run_string_as_driver_nonblocking( driver_script_template.format(address, "driver1_wait", "1", "2")) p2 = run_string_as_driver_nonblocking( driver_script_template.format(address, "driver2_wait", "3", "4")) ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) # At this point both of the other drivers are fully initialized. ray.get(driver1_wait.release.remote()) ray.get(driver2_wait.release.remote()) # At this point driver1 should receive '1' and driver2 '3' ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) ray.get(driver1_wait.release.remote()) ray.get(driver2_wait.release.remote()) # At this point driver1 should receive '2' and driver2 '4' ray.get(main_wait.acquire.remote()) ray.get(main_wait.acquire.remote()) driver1_out = p1.stdout.read().decode("ascii") driver2_out = p2.stdout.read().decode("ascii") if sys.platform == "win32": driver1_out = driver1_out.replace("\r", "") driver2_out = driver2_out.replace("\r", "") driver1_out_split = driver1_out.split("\n") driver2_out_split = driver2_out.split("\n") assert driver1_out_split[0][-1] == "1" assert driver1_out_split[1][-1] == "2" assert driver2_out_split[0][-1] == "3" assert driver2_out_split[1][-1] == "4"