Example #1
0
def call_ray_start_with_external_redis(request):
    ports = getattr(request, "param", "6379")
    port_list = ports.split(",")
    for port in port_list:
        temp_dir = ray._private.utils.get_ray_temp_dir()
        _start_redis_instance(REDIS_EXECUTABLE,
                              temp_dir,
                              int(port),
                              password="******")
    address_str = ",".join(map(lambda x: "localhost:" + x, port_list))
    cmd = f"ray start --head --address={address_str} --redis-password=123"
    subprocess.call(cmd.split(" "))

    yield address_str.split(",")[0]

    # Disconnect from the Ray cluster.
    ray.shutdown()
    # Kill the Ray cluster.
    subprocess.check_call(["ray", "stop"])
Example #2
0
def _setup_redis(request):
    # Setup external Redis and env var for initialization.
    param = getattr(request, "param", {})

    external_redis_ports = param.get("external_redis_ports")
    if external_redis_ports is None:
        with socket.socket() as s:
            s.bind(("", 0))
            port = s.getsockname()[1]
        external_redis_ports = [port]
    else:
        del param["external_redis_ports"]
    processes = []
    for port in external_redis_ports:
        temp_dir = ray._private.utils.get_ray_temp_dir()
        port, proc = _start_redis_instance(
            REDIS_EXECUTABLE,
            temp_dir,
            port,
            password=ray_constants.REDIS_DEFAULT_PASSWORD,
        )
        processes.append(proc)
        wait_for_redis_to_start("127.0.0.1", port,
                                ray_constants.REDIS_DEFAULT_PASSWORD)
    address_str = ",".join(
        map(lambda x: f"127.0.0.1:{x}", external_redis_ports))
    import os

    old_addr = os.environ.get("RAY_REDIS_ADDRESS")
    os.environ["RAY_REDIS_ADDRESS"] = address_str
    yield
    if old_addr is not None:
        os.environ["RAY_REDIS_ADDRESS"] = old_addr
    else:
        del os.environ["RAY_REDIS_ADDRESS"]
    for proc in processes:
        proc.process.terminate()
Example #3
0
def test_calling_start_ray_head(call_ray_stop_only):

    # Test that we can call ray start with various command line
    # parameters.

    # Test starting Ray with a redis port specified.
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a node IP address specified.
    check_call_ray(
        ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a system config parameter set.
    check_call_ray([
        "start",
        "--head",
        "--system-config",
        '{"metrics_report_interval_ms":100}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the object manager and node manager ports
    # specified.
    check_call_ray([
        "start",
        "--head",
        "--object-manager-port",
        "22345",
        "--node-manager-port",
        "54321",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the worker port range specified.
    check_call_ray([
        "start",
        "--head",
        "--min-worker-port",
        "51000",
        "--max-worker-port",
        "51050",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with a worker port list.
    check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"])
    check_call_ray(["stop"])

    # Test starting Ray with a non-int in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "10002,a"])
    check_call_ray(["stop"])

    # Test starting Ray with an invalid port in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "100"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of CPUs specified.
    check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of GPUs specified.
    check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with redis shard ports specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port",
        "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with all arguments specified.
    check_call_ray([
        "start",
        "--head",
        "--redis-shard-ports",
        "6380,6381,6382",
        "--object-manager-port",
        "22345",
        "--num-cpus",
        "2",
        "--num-gpus",
        "0",
        "--resources",
        '{"Custom": 1}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    temp_dir = ray._private.utils.get_ray_temp_dir()

    # Test starting Ray with RAY_REDIS_ADDRESS env.
    _, proc = _start_redis_instance(
        REDIS_EXECUTABLE,
        temp_dir,
        8888,
        password=ray_constants.REDIS_DEFAULT_PASSWORD)
    os.environ["RAY_REDIS_ADDRESS"] = "127.0.0.1:8888"
    check_call_ray(["start", "--head"])
    check_call_ray(["stop"])
    proc.process.terminate()
    del os.environ["RAY_REDIS_ADDRESS"]

    # Test --block. Killing a child process should cause the command to exit.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])

    blocked.poll()
    assert blocked.returncode is None
    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    kill_process_by_name("raylet", SIGKILL=True)
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"

    # Test --block. Killing the command should clean up all child processes.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])
    blocked.poll()
    assert blocked.returncode is None

    # Include GCS, autoscaler monitor, client server, dashboard, raylet and
    # log_monitor.py
    num_children = 6
    if not detect_fate_sharing_support():
        # Account for ray_process_reaper.py
        num_children += 1
    # Check a set of child process commands & scripts instead?
    wait_for_children_of_pid(blocked.pid,
                             num_children=num_children,
                             timeout=30)

    blocked.terminate()
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"