def ray_start_head_local(): # Start the Ray processes on this machine. run_and_get_output([ "ray", "start", "--head", "--node-ip-address=localhost", "--redis-port=6379" ]) yield None # Disconnect from the Ray cluster. ray.shutdown() # Kill the Ray cluster. subprocess.Popen(["ray", "stop"]).wait()
def setUp(self): out = run_and_get_output(["ray", "start", "--head"]) # Get the redis address from the output. redis_substring_prefix = "redis_address=\"" redis_address_location = ( out.find(redis_substring_prefix) + len(redis_substring_prefix)) redis_address = out[redis_address_location:] self.redis_address = redis_address.split("\"")[0]
def testUsingHostnames(self): # Start the Ray processes on this machine. run_and_get_output([ "ray", "start", "--head", "--node-ip-address=localhost", "--redis-port=6379" ]) ray.init(node_ip_address="localhost", redis_address="localhost:6379") @ray.remote def f(): return 1 self.assertEqual(ray.get(f.remote()), 1) # Kill the Ray cluster. subprocess.Popen(["ray", "stop"]).wait()
def setUp(self): out = run_and_get_output(["ray", "start", "--head"]) # Get the redis address from the output. redis_substring_prefix = "redis_address=\"" redis_address_location = (out.find(redis_substring_prefix) + len(redis_substring_prefix)) redis_address = out[redis_address_location:] self.redis_address = redis_address.split("\"")[0]
def testUsingHostnames(self): # Start the Ray processes on this machine. run_and_get_output( ["ray", "start", "--head", "--node-ip-address=localhost", "--redis-port=6379"]) ray.init(node_ip_address="localhost", redis_address="localhost:6379") @ray.remote def f(): return 1 self.assertEqual(ray.get(f.remote()), 1) # Kill the Ray cluster. subprocess.Popen(["ray", "stop"]).wait()
def ray_start_head_with_resources(): out = run_and_get_output( ["ray", "start", "--head", "--num-cpus=1", "--num-gpus=1"]) # Get the redis address from the output. redis_substring_prefix = "redis_address=\"" redis_address_location = ( out.find(redis_substring_prefix) + len(redis_substring_prefix)) redis_address = out[redis_address_location:] redis_address = redis_address.split("\"")[0] yield redis_address # Kill the Ray cluster. subprocess.Popen(["ray", "stop"]).wait()
def ray_start(): # Start the Ray processes. command = [ "ray", "start", "--head", "--with-gateway", "--redis-port=21216", "--use-raylet" ] out = run_and_get_output(command) print(out) time.sleep(2) # Initialize Ray ray.init(redis_address="127.0.0.1:21216", gateway_socat_port=5001, gateway_data_port=5002, use_raylet=True) yield None # The code after the yield will run as teardown code. ray.shutdown() subprocess.Popen(["ray", "stop"])
def testCallingStartRayHead(self): # Test that we can call start-ray.sh with various command line # parameters. TODO(rkn): This test only tests the --head code path. We # should also test the non-head node code path. # Test starting Ray with no arguments. run_and_get_output(["ray", "start", "--head"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a number of workers specified. run_and_get_output(["ray", "start", "--head", "--num-workers", "20"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a redis port specified. run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with redis shard ports specified. run_and_get_output([ "ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382" ]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a node IP address specified. run_and_get_output( ["ray", "start", "--head", "--node-ip-address", "127.0.0.1"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with an object manager port specified. run_and_get_output( ["ray", "start", "--head", "--object-manager-port", "12345"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of CPUs specified. run_and_get_output(["ray", "start", "--head", "--num-cpus", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of GPUs specified. run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the max redis clients specified. run_and_get_output( ["ray", "start", "--head", "--redis-max-clients", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with all arguments specified. run_and_get_output([ "ray", "start", "--head", "--num-workers", "20", "--redis-port", "6379", "--redis-shard-ports", "6380,6381,6382", "--object-manager-port", "12345", "--num-cpus", "100", "--num-gpus", "0", "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}" ]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with invalid arguments. with self.assertRaises(Exception): run_and_get_output([ "ray", "start", "--head", "--redis-address", "127.0.0.1:6379" ]) subprocess.Popen(["ray", "stop"]).wait()
def _testCleanupOnDriverExit(self, num_redis_shards): stdout = run_and_get_output([ "ray", "start", "--head", "--num-redis-shards", str(num_redis_shards), ]) lines = [m.strip() for m in stdout.split("\n")] init_cmd = [m for m in lines if m.startswith("ray.init")] self.assertEqual(1, len(init_cmd)) redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2] def StateSummary(): obj_tbl_len = len(ray.global_state.object_table()) task_tbl_len = len(ray.global_state.task_table()) func_tbl_len = len(ray.global_state.function_table()) return obj_tbl_len, task_tbl_len, func_tbl_len def Driver(success): success.value = True # Start driver. ray.init(redis_address=redis_address) summary_start = StateSummary() if (0, 1) != summary_start[:2]: success.value = False # Two new objects. ray.get(ray.put(1111)) ray.get(ray.put(1111)) if (2, 1, summary_start[2]) != StateSummary(): success.value = False @ray.remote def f(): ray.put(1111) # Yet another object. return 1111 # A returned object as well. # 1 new function. if (2, 1, summary_start[2] + 1) != StateSummary(): success.value = False ray.get(f.remote()) if (4, 2, summary_start[2] + 1) != StateSummary(): success.value = False ray.shutdown() success = multiprocessing.Value('b', False) driver = multiprocessing.Process(target=Driver, args=(success, )) driver.start() # Wait for client to exit. driver.join() time.sleep(5) # Just make sure Driver() is run and succeeded. Note(rkn), if the below # assertion starts failing, then the issue may be that the summary # values computed in the Driver function are being updated slowly and # so the call to StateSummary() is getting outdated values. This could # be fixed by looping until StateSummary() returns the desired values. self.assertTrue(success.value) # Check that objects, tasks, and functions are cleaned up. ray.init(redis_address=redis_address) # The assertion below can fail if the monitor is too slow to clean up # the global state. self.assertEqual((0, 1), StateSummary()[:2]) ray.shutdown() subprocess.Popen(["ray", "stop"]).wait()
def test_calling_start_ray_head(): # Test that we can call start-ray.sh with various command line # parameters. TODO(rkn): This test only tests the --head code path. We # should also test the non-head node code path. # Test starting Ray with no arguments. run_and_get_output(["ray", "start", "--head"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a redis port specified. run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a node IP address specified. run_and_get_output( ["ray", "start", "--head", "--node-ip-address", "127.0.0.1"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the object manager and node manager ports # specified. run_and_get_output([ "ray", "start", "--head", "--object-manager-port", "12345", "--node-manager-port", "54321" ]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of CPUs specified. run_and_get_output(["ray", "start", "--head", "--num-cpus", "2"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of GPUs specified. run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the max redis clients specified. run_and_get_output( ["ray", "start", "--head", "--redis-max-clients", "100"]) subprocess.Popen(["ray", "stop"]).wait() if "RAY_USE_NEW_GCS" not in os.environ: # Test starting Ray with redis shard ports specified. run_and_get_output([ "ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382" ]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with all arguments specified. run_and_get_output([ "ray", "start", "--head", "--redis-port", "6379", "--redis-shard-ports", "6380,6381,6382", "--object-manager-port", "12345", "--num-cpus", "2", "--num-gpus", "0", "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}" ]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with invalid arguments. with pytest.raises(Exception): run_and_get_output( ["ray", "start", "--head", "--redis-address", "127.0.0.1:6379"]) subprocess.Popen(["ray", "stop"]).wait()
def _test_cleanup_on_driver_exit(num_redis_shards): stdout = run_and_get_output([ "ray", "start", "--head", "--num-redis-shards", str(num_redis_shards), ]) lines = [m.strip() for m in stdout.split("\n")] init_cmd = [m for m in lines if m.startswith("ray.init")] assert 1 == len(init_cmd) redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2] max_attempts_before_failing = 100 # Wait for monitor.py to start working. time.sleep(2) def StateSummary(): obj_tbl_len = len(ray.global_state.object_table()) task_tbl_len = len(ray.global_state.task_table()) func_tbl_len = len(ray.global_state.function_table()) return obj_tbl_len, task_tbl_len, func_tbl_len def Driver(success): success.value = True # Start driver. ray.init(redis_address=redis_address) summary_start = StateSummary() if (0, 1) != summary_start[:2]: success.value = False # Two new objects. ray.get(ray.put(1111)) ray.get(ray.put(1111)) attempts = 0 while (2, 1, summary_start[2]) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break @ray.remote def f(): ray.put(1111) # Yet another object. return 1111 # A returned object as well. # 1 new function. attempts = 0 while (2, 1, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.get(f.remote()) attempts = 0 while (4, 2, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.shutdown() success = multiprocessing.Value('b', False) driver = multiprocessing.Process(target=Driver, args=(success, )) driver.start() # Wait for client to exit. driver.join() # Just make sure Driver() is run and succeeded. assert success.value # Check that objects, tasks, and functions are cleaned up. ray.init(redis_address=redis_address) attempts = 0 while (0, 1) != StateSummary()[:2]: time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: break assert (0, 1) == StateSummary()[:2] ray.shutdown() subprocess.Popen(["ray", "stop"]).wait()
def _test_cleanup_on_driver_exit(num_redis_shards): stdout = run_and_get_output([ "ray", "start", "--head", "--num-redis-shards", str(num_redis_shards), ]) lines = [m.strip() for m in stdout.split("\n")] init_cmd = [m for m in lines if m.startswith("ray.init")] assert 1 == len(init_cmd) redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2] def StateSummary(): obj_tbl_len = len(ray.global_state.object_table()) task_tbl_len = len(ray.global_state.task_table()) func_tbl_len = len(ray.global_state.function_table()) return obj_tbl_len, task_tbl_len, func_tbl_len def Driver(success): success.value = True # Start driver. ray.init(redis_address=redis_address) summary_start = StateSummary() if (0, 1) != summary_start[:2]: success.value = False max_attempts_before_failing = 100 # Two new objects. ray.get(ray.put(1111)) ray.get(ray.put(1111)) attempts = 0 while (2, 1, summary_start[2]) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break @ray.remote def f(): ray.put(1111) # Yet another object. return 1111 # A returned object as well. # 1 new function. attempts = 0 while (2, 1, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.get(f.remote()) attempts = 0 while (4, 2, summary_start[2] + 1) != StateSummary(): time.sleep(0.1) attempts += 1 if attempts == max_attempts_before_failing: success.value = False break ray.shutdown() success = multiprocessing.Value('b', False) driver = multiprocessing.Process(target=Driver, args=(success, )) driver.start() # Wait for client to exit. driver.join() time.sleep(3) # Just make sure Driver() is run and succeeded. Note(rkn), if the below # assertion starts failing, then the issue may be that the summary # values computed in the Driver function are being updated slowly and # so the call to StateSummary() is getting outdated values. This could # be fixed by looping until StateSummary() returns the desired values. assert success.value # Check that objects, tasks, and functions are cleaned up. ray.init(redis_address=redis_address) # The assertion below can fail if the monitor is too slow to clean up # the global state. assert (0, 1) == StateSummary()[:2] ray.shutdown() subprocess.Popen(["ray", "stop"]).wait()
def testCallingStartRayHead(self): # Test that we can call start-ray.sh with various command line # parameters. TODO(rkn): This test only tests the --head code path. We # should also test the non-head node code path. # Test starting Ray with no arguments. run_and_get_output(["ray", "start", "--head"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a number of workers specified. run_and_get_output(["ray", "start", "--head", "--num-workers", "20"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a redis port specified. run_and_get_output(["ray", "start", "--head", "--redis-port", "6379"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with redis shard ports specified. run_and_get_output(["ray", "start", "--head", "--redis-shard-ports", "6380,6381,6382"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with a node IP address specified. run_and_get_output(["ray", "start", "--head", "--node-ip-address", "127.0.0.1"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with an object manager port specified. run_and_get_output(["ray", "start", "--head", "--object-manager-port", "12345"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of CPUs specified. run_and_get_output(["ray", "start", "--head", "--num-cpus", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the number of GPUs specified. run_and_get_output(["ray", "start", "--head", "--num-gpus", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with the max redis clients specified. run_and_get_output(["ray", "start", "--head", "--redis-max-clients", "100"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with all arguments specified. run_and_get_output(["ray", "start", "--head", "--num-workers", "20", "--redis-port", "6379", "--redis-shard-ports", "6380,6381,6382", "--object-manager-port", "12345", "--num-cpus", "100", "--num-gpus", "0", "--redis-max-clients", "100", "--resources", "{\"Custom\": 1}"]) subprocess.Popen(["ray", "stop"]).wait() # Test starting Ray with invalid arguments. with self.assertRaises(Exception): run_and_get_output(["ray", "start", "--head", "--redis-address", "127.0.0.1:6379"]) subprocess.Popen(["ray", "stop"]).wait()