Exemple #1
0
    def __init__(
        self,
        address: Optional[str],
        *,
        session_dir: Optional[str] = None,
        redis_password: Optional[str] = None,
        runtime_env_agent_port: int = 0,
    ):
        self.servers: Dict[str, SpecificServer] = dict()
        self.server_lock = RLock()
        self._address = address
        self._redis_password = redis_password
        self._free_ports: List[int] = list(
            range(MIN_SPECIFIC_SERVER_PORT, MAX_SPECIFIC_SERVER_PORT)
        )

        self._runtime_env_channel = ray._private.utils.init_grpc_channel(
            f"127.0.0.1:{runtime_env_agent_port}"
        )
        self._runtime_env_stub = (
            runtime_env_agent_pb2_grpc.RuntimeEnvServiceStub(  # noqa: E501
                self._runtime_env_channel
            )
        )

        self._check_thread = Thread(target=self._check_processes, daemon=True)
        self._check_thread.start()

        self.fate_share = bool(detect_fate_sharing_support())
        self._node: Optional[ray.node.Node] = None
        atexit.register(self._cleanup)
Exemple #2
0
    def __init__(self, redis_address):
        self.servers: Dict[str, SpecificServer] = dict()
        self.server_lock = Lock()
        self.redis_address = redis_address
        self._free_ports: List[int] = list(
            range(MIN_SPECIFIC_SERVER_PORT, MAX_SPECIFIC_SERVER_PORT))

        self._check_thread = Thread(target=self._check_processes, daemon=True)
        self._check_thread.start()

        self.fate_share = bool(detect_fate_sharing_support())
Exemple #3
0
    def __init__(self, redis_address: str, session_dir: Optional[str] = None):
        self.servers: Dict[str, SpecificServer] = dict()
        self.server_lock = RLock()
        self.redis_address = redis_address
        self._free_ports: List[int] = list(
            range(MIN_SPECIFIC_SERVER_PORT, MAX_SPECIFIC_SERVER_PORT))

        self._check_thread = Thread(target=self._check_processes, daemon=True)
        self._check_thread.start()

        self.fate_share = bool(detect_fate_sharing_support())
        self._session_dir: str = session_dir or ""
        atexit.register(self._cleanup)
Exemple #4
0
def test_calling_start_ray_head(call_ray_stop_only):

    # Test that we can call ray start with various command line
    # parameters.

    # Test starting Ray with a redis port specified.
    check_call_ray(["start", "--head", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a node IP address specified.
    check_call_ray(
        ["start", "--head", "--node-ip-address", "127.0.0.1", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with a system config parameter set.
    check_call_ray([
        "start",
        "--head",
        "--system-config",
        '{"metrics_report_interval_ms":100}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the object manager and node manager ports
    # specified.
    check_call_ray([
        "start",
        "--head",
        "--object-manager-port",
        "22345",
        "--node-manager-port",
        "54321",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with the worker port range specified.
    check_call_ray([
        "start",
        "--head",
        "--min-worker-port",
        "51000",
        "--max-worker-port",
        "51050",
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    # Test starting Ray with a worker port list.
    check_call_ray(["start", "--head", "--worker-port-list", "10002,10003"])
    check_call_ray(["stop"])

    # Test starting Ray with a non-int in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "10002,a"])
    check_call_ray(["stop"])

    # Test starting Ray with an invalid port in the worker port list.
    with pytest.raises(subprocess.CalledProcessError):
        check_call_ray(["start", "--head", "--worker-port-list", "100"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of CPUs specified.
    check_call_ray(["start", "--head", "--num-cpus", "2", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with the number of GPUs specified.
    check_call_ray(["start", "--head", "--num-gpus", "100", "--port", "0"])
    check_call_ray(["stop"])

    # Test starting Ray with redis shard ports specified.
    check_call_ray([
        "start", "--head", "--redis-shard-ports", "6380,6381,6382", "--port",
        "0"
    ])
    check_call_ray(["stop"])

    # Test starting Ray with all arguments specified.
    check_call_ray([
        "start",
        "--head",
        "--redis-shard-ports",
        "6380,6381,6382",
        "--object-manager-port",
        "22345",
        "--num-cpus",
        "2",
        "--num-gpus",
        "0",
        "--resources",
        '{"Custom": 1}',
        "--port",
        "0",
    ])
    check_call_ray(["stop"])

    temp_dir = ray._private.utils.get_ray_temp_dir()

    # Test starting Ray with RAY_REDIS_ADDRESS env.
    _, proc = _start_redis_instance(
        REDIS_EXECUTABLE,
        temp_dir,
        8888,
        password=ray_constants.REDIS_DEFAULT_PASSWORD)
    os.environ["RAY_REDIS_ADDRESS"] = "127.0.0.1:8888"
    check_call_ray(["start", "--head"])
    check_call_ray(["stop"])
    proc.process.terminate()
    del os.environ["RAY_REDIS_ADDRESS"]

    # Test --block. Killing a child process should cause the command to exit.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])

    blocked.poll()
    assert blocked.returncode is None
    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    # Make sure ray cluster is up
    run_string_as_driver("""
import ray
from time import sleep
for i in range(0, 5):
    try:
        ray.init(address='auto')
        break
    except:
        sleep(1)
""")

    kill_process_by_name("raylet", SIGKILL=True)
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"

    # Test --block. Killing the command should clean up all child processes.
    blocked = subprocess.Popen(
        ["ray", "start", "--head", "--block", "--port", "0"])
    blocked.poll()
    assert blocked.returncode is None

    # Include GCS, autoscaler monitor, client server, dashboard, raylet and
    # log_monitor.py
    num_children = 6
    if not detect_fate_sharing_support():
        # Account for ray_process_reaper.py
        num_children += 1
    # Check a set of child process commands & scripts instead?
    wait_for_children_of_pid(blocked.pid,
                             num_children=num_children,
                             timeout=30)

    blocked.terminate()
    wait_for_children_of_pid_to_exit(blocked.pid, timeout=30)
    blocked.wait()
    assert blocked.returncode != 0, "ray start shouldn't return 0 on bad exit"