Beispiel #1
0
def test_ray_client(call_ray_start):
    from ray.util.client import ray as ray_client
    ray.client("localhost:20000").connect()

    @ray.remote
    def f():
        return "hello client"

    assert ray_client.get(f.remote()) == "hello client"
Beispiel #2
0
def test_non_existent_modules():
    exception = None
    try:
        ray.client("badmodule://address")
    except RuntimeError as e:
        exception = e

    assert exception is not None, "Bad Module did not raise RuntimeException"
    assert "does not exist" in str(exception)
Beispiel #3
0
def test_runtime_install_error_message(call_ray_start):
    """
    Check that an error while preparing the runtime environment for the client
    server yields an actionable, clear error on the *client side*.
    """
    with pytest.raises(ConnectionAbortedError) as excinfo:
        ray.client("localhost:25031").env({"pip": ["ray-this-doesnt-exist"]}).connect()
    assert "No matching distribution found for ray-this-doesnt-exist" in str(
        excinfo.value
    ), str(excinfo.value)

    ray.util.disconnect()
Beispiel #4
0
def test_multiple_clients_use_different_drivers(call_ray_start):
    """
    Test that each client uses a separate JobIDs and namespaces.
    """
    with ray.client("localhost:25001").connect():
        job_id_one = ray.get_runtime_context().job_id
        namespace_one = ray.get_runtime_context().namespace
    with ray.client("localhost:25001").connect():
        job_id_two = ray.get_runtime_context().job_id
        namespace_two = ray.get_runtime_context().namespace

    assert job_id_one != job_id_two
    assert namespace_one != namespace_two
def test_conda_pip_filepaths_remote(call_ray_start, tmp_path):
    """Test that pip and conda filepaths work, simulating a remote cluster."""

    working_dir = tmp_path / "requirements"
    working_dir.mkdir()

    pip_file = working_dir / "requirements.txt"
    requirements_txt = """
    pip-install-test==0.5
    """
    pip_file.write_text(requirements_txt)
    runtime_env_pip = {"pip": str(pip_file)}

    conda_file = working_dir / "environment.yml"
    conda_dict = {"dependencies": ["pip", {"pip": ["pip-install-test==0.5"]}]}
    conda_str = yaml.dump(conda_dict)
    conda_file.write_text(conda_str)
    runtime_env_conda = {"conda": str(conda_file)}

    @ray.remote
    def f():
        import pip_install_test  # noqa

        return True

    with ray.client("localhost:24001").connect():
        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed in a client that doesn't
            # use the runtime_env
            ray.get(f.remote())

    # pip and conda files should be parsed when the function is declared.
    f_pip = f.options(runtime_env=runtime_env_pip)
    f_conda = f.options(runtime_env=runtime_env_conda)

    # Remove the pip and conda files from the local filesystem. This is
    # necessary to simulate the files not being present on the remote cluster,
    # because in this single-machine test, the cluster has the same filesystem.
    os.remove(pip_file)
    os.remove(conda_file)

    # Test with and without a working_dir.
    client_envs = [{}, {"working_dir": str(working_dir)}]
    for runtime_env in client_envs:
        with ray.client("localhost:24001").env(runtime_env).connect():
            with pytest.raises(ModuleNotFoundError):
                # Ensure pip-install-test is not installed on the test machine
                import pip_install_test  # noqa
            assert ray.get(f_pip.remote()), str(runtime_env)
            assert ray.get(f_conda.remote()), str(runtime_env)
Beispiel #6
0
def client_connect_to_k8s(port="10001"):
    command = f"kubectl -n {NAMESPACE}"\
        f" port-forward service/example-cluster-ray-head {port}:{port}"
    command = command.split()
    print(">>>Port-forwarding head service.")
    proc = subprocess.Popen(command)
    # Wait a bit for the port-forwarding connection to be
    # established.
    time.sleep(10)
    ray.client(f"127.0.0.1:{port}").connect()
    try:
        yield proc
    finally:
        ray.shutdown()
        proc.kill()
def test_client_tasks_and_actors_inherit_from_driver(conda_envs,
                                                     call_ray_start):
    @ray.remote
    def get_tf_version():
        import tensorflow as tf
        return tf.__version__

    @ray.remote
    class TfVersionActor:
        def get_tf_version(self):
            import tensorflow as tf
            return tf.__version__

    tf_versions = ["2.2.0", "2.3.0"]
    for i, tf_version in enumerate(tf_versions):
        runtime_env = {"conda": f"tf-{tf_version}"}
        with ray.client("localhost:24001").env(runtime_env).connect():
            assert ray.get(get_tf_version.remote()) == tf_version
            actor_handle = TfVersionActor.remote()
            assert ray.get(actor_handle.get_tf_version.remote()) == tf_version

            # Ensure that we can have a second client connect using the other
            # conda environment.
            other_tf_version = tf_versions[(i + 1) % 2]
            run_string_as_driver(
                check_remote_client_conda.format(tf_version=other_tf_version))
Beispiel #8
0
def test_correct_num_clients(call_ray_start):
    """
    Checks that the returned value of `num_clients` correctly tracks clients
    connecting and disconnecting.
    """
    info = ray.client("localhost:25005").connect()
    assert info._num_clients == 1
    run_string_as_driver(check_we_are_second.format(num_clients=2))
    ray.util.disconnect()
    run_string_as_driver(check_we_are_second.format(num_clients=1))
def test_client_working_dir_filepath(call_ray_start, tmp_path):
    """Test that pip and conda relative filepaths work with working_dir."""

    working_dir = tmp_path / "requirements"
    working_dir.mkdir()

    pip_file = working_dir / "requirements.txt"
    requirements_txt = """
    pip-install-test==0.5
    """
    pip_file.write_text(requirements_txt)
    runtime_env_pip = {
        "working_dir": str(working_dir),
        "pip": "requirements.txt"
    }

    conda_file = working_dir / "environment.yml"
    conda_dict = {"dependencies": ["pip", {"pip": ["pip-install-test==0.5"]}]}
    conda_str = yaml.dump(conda_dict)
    conda_file.write_text(conda_str)
    runtime_env_conda = {
        "working_dir": str(working_dir),
        "conda": "environment.yml"
    }

    @ray.remote
    def f():
        import pip_install_test  # noqa
        return True

    with ray.client("localhost:24001").connect():
        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed in a client that doesn't
            # use the runtime_env
            ray.get(f.remote())

    for runtime_env in [runtime_env_pip, runtime_env_conda]:
        with ray.client("localhost:24001").env(runtime_env).connect():
            with pytest.raises(ModuleNotFoundError):
                # Ensure pip-install-test is not installed on the test machine
                import pip_install_test  # noqa
            assert ray.get(f.remote())
Beispiel #10
0
def test_address_resolution(ray_start_regular_shared):
    server = ray_client_server.serve("localhost:50055")
    with ray.client("localhost:50055").connect():
        assert ray.util.client.ray.is_connected()

    try:
        os.environ["RAY_ADDRESS"] = "local"
        with ray.client("localhost:50055").connect():
            # client(...) takes precedence of RAY_ADDRESS=local
            assert ray.util.client.ray.is_connected()

        ray.client(None).connect()
        assert ray.worker.global_worker.node.is_head()

    finally:
        if os.environ["RAY_ADDRESS"]:
            del os.environ["RAY_ADDRESS"]

        server.stop(0)
        subprocess.check_output("ray stop --force", shell=True)
Beispiel #11
0
def test_address_resolution(call_ray_stop_only):
    subprocess.check_output("ray start --head --ray-client-server-port=50055",
                            shell=True)

    with ray.client("localhost:50055").connect():
        assert ray.util.client.ray.is_connected()

    try:
        os.environ["RAY_ADDRESS"] = "local"
        with ray.client("localhost:50055").connect():
            # client(...) takes precedence of RAY_ADDRESS=local
            assert ray.util.client.ray.is_connected()

        with pytest.raises(Exception):
            # This tries to call `ray.init(address="local") which
            # breaks.`
            ray.client(None).connect()

    finally:
        if os.environ.get("RAY_ADDRESS"):
            del os.environ["RAY_ADDRESS"]
Beispiel #12
0
def test_module_lacks_client_builder():
    mock_importlib = Mock()

    def mock_import_module(module_string):
        if module_string == "ray":
            return ray
        else:
            # Mock() does not have a `ClientBuilder` in its scope
            return Mock()

    mock_importlib.import_module = mock_import_module
    with patch("ray.client_builder.importlib", mock_importlib):
        assert isinstance(ray.client(""), ray.ClientBuilder)
        assert isinstance(ray.client("ray://"), ray.ClientBuilder)
        exception = None
        try:
            ray.client("othermodule://")
        except AssertionError as e:
            exception = e
        assert exception is not None, ("Module without ClientBuilder did not "
                                       "raise AssertionError")
        assert "does not have ClientBuilder" in str(exception)
Beispiel #13
0
def test_connect_to_cluster(ray_start_regular_shared):
    server = ray_client_server.serve("localhost:50055")
    client_info = ray.client("localhost:50055").connect()

    assert client_info.dashboard_url == ray.worker.get_dashboard_url()
    python_version = ".".join([str(x) for x in list(sys.version_info)[:3]])
    assert client_info.python_version == python_version
    assert client_info.ray_version == ray.__version__
    assert client_info.ray_commit == ray.__commit__
    protocol_version = ray.util.client.CURRENT_PROTOCOL_VERSION
    assert client_info.protocol_version == protocol_version

    server.stop(0)
Beispiel #14
0
    def _check_connected(self):
        """Ensure that the object has been initialized before it is used.

        This lazily initializes clients needed for state accessors.

        Raises:
            RuntimeError: An exception is raised if ray.init() has not been
                called yet.
        """
        if (self.redis_address is not None
                and self.global_state_accessor is None):
            self._really_init_global_state()

        # _really_init_global_state should have set self.global_state_accessor
        if self.global_state_accessor is None:
            if os.environ.get("RAY_ENABLE_AUTO_CONNECT", "") == "1":
                ray.client().connect()
                # Retry connect!
                return self._check_connected()
            raise ray.exceptions.RaySystemError(
                "Ray has not been started yet. You can start Ray with "
                "'ray.init()'.")
Beispiel #15
0
def test_connect_to_cluster(ray_start_regular_shared):
    server = ray_client_server.serve("localhost:50055")
    with ray.client("localhost:50055").connect() as client_context:
        assert client_context.dashboard_url == ray.worker.get_dashboard_url()
        python_version = ".".join([str(x) for x in list(sys.version_info)[:3]])
        assert client_context.python_version == python_version
        assert client_context.ray_version == ray.__version__
        assert client_context.ray_commit == ray.__commit__
        protocol_version = ray.util.client.CURRENT_PROTOCOL_VERSION
        assert client_context.protocol_version == protocol_version

    server.stop(0)
    subprocess.check_output("ray stop --force", shell=True)
Beispiel #16
0
def get_remote_model(remote_model_checkpoint_path):
    if is_anyscale_connect():
        # Download training results to local client.
        local_dir = "~/ray_results"
        # TODO(matt): remove the following line when Anyscale Connect
        # supports tilde expansion.
        local_dir = os.path.expanduser(local_dir)
        remote_dir = "/home/ray/ray_results/"
        ray.client().download_results(local_dir=local_dir,
                                      remote_dir=remote_dir)

        # Compute local path.
        rel_model_checkpoint_path = os.path.relpath(
            remote_model_checkpoint_path, remote_dir)
        local_model_checkpoint_path = os.path.join(local_dir,
                                                   rel_model_checkpoint_path)

        # Load model reference.
        return get_model(local_model_checkpoint_path)
    else:
        get_best_model_remote = ray.remote(get_model)
        return ray.get(
            get_best_model_remote.remote(remote_model_checkpoint_path))
def test_conda_create_ray_client(call_ray_start):
    """Tests dynamic conda env creation in RayClient."""

    runtime_env = {
        "conda": {
            "dependencies": [
                "pip", {
                    "pip": [
                        "pip-install-test==0.5", "opentelemetry-api==1.0.0rc1",
                        "opentelemetry-sdk==1.0.0rc1"
                    ]
                }
            ]
        }
    }
    try:
        ray.client("localhost:24001").env(runtime_env).connect()

        @ray.remote
        def f():
            import pip_install_test  # noqa
            return True

        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed on the test machine
            import pip_install_test  # noqa
        assert ray.get(f.remote())

        ray.util.disconnect()
        ray.client("localhost:24001").connect()
        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed in a client that doesn't
            # use the runtime_env
            ray.get(f.remote())
    finally:
        ray.util.disconnect()
        ray._private.client_mode_hook._explicitly_disable_client_mode()
def test_conda_create_ray_client(call_ray_start):
    """Tests dynamic conda env creation in RayClient."""

    runtime_env = {
        "conda": {"dependencies": ["pip", {"pip": ["pip-install-test==0.5"]}]}
    }

    @ray.remote
    def f():
        import pip_install_test  # noqa

        return True

    with ray.client("localhost:24001").env(runtime_env).connect():
        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed on the test machine
            import pip_install_test  # noqa
        assert ray.get(f.remote())

    with ray.client("localhost:24001").connect():
        with pytest.raises(ModuleNotFoundError):
            # Ensure pip-install-test is not installed in a client that doesn't
            # use the runtime_env
            ray.get(f.remote())
Beispiel #19
0
def test_client_tasks_and_actors_inherit_from_driver(conda_envs,
                                                     call_ray_start):
    for i, package_version in enumerate(REQUEST_VERSIONS):
        runtime_env = {"conda": f"package-{package_version}"}
        with ray.client("localhost:24001").env(runtime_env).connect():
            assert ray.get(get_requests_version.remote()) == package_version
            actor_handle = VersionActor.remote()
            assert ray.get(
                actor_handle.get_requests_version.remote()) == package_version

            # Ensure that we can have a second client connect using the other
            # conda environment.
            other_package_version = REQUEST_VERSIONS[(i + 1) % 2]
            run_string_as_driver(
                check_remote_client_conda.format(
                    package_version=other_package_version))
Beispiel #20
0
def test_namespace():
    """
    Most of the "checks" in this test case rely on the fact that
    `run_string_as_driver` will throw an exception if the driver string exits
    with a non-zero exit code (e.g. when the driver scripts throws an
    exception). Since all of these drivers start named, detached actors, the
    most likely failure case would be a collision of named actors if they're
    put in the same namespace.

    This test checks that:
    * When two drivers don't specify a namespace, they are placed in different
      anonymous namespaces.
    * When two drivers specify a namespace, they collide.
    * The namespace name (as provided by the runtime context) is correct.
    """
    cluster = Cluster()
    cluster.add_node(num_cpus=4, ray_client_server_port=50055)
    cluster.wait_for_nodes(1)

    template = """
import ray
ray.client("localhost:50055").namespace({namespace}).connect()

@ray.remote
class Foo:
    def ping(self):
        return "pong"

a = Foo.options(lifetime="detached", name="abc").remote()
ray.get(a.ping.remote())
print(ray.get_runtime_context().namespace)
    """

    anon_driver = template.format(namespace="None")
    run_string_as_driver(anon_driver)
    # This second run will fail if the actors don't run in separate anonymous
    # namespaces.
    run_string_as_driver(anon_driver)

    run_in_namespace = template.format(namespace="'namespace'")
    script_namespace = run_string_as_driver(run_in_namespace)
    # The second run fails because the actors are run in the same namespace.
    with pytest.raises(subprocess.CalledProcessError):
        run_string_as_driver(run_in_namespace)

    assert script_namespace.strip() == "namespace"
    subprocess.check_output("ray stop --force", shell=True)
Beispiel #21
0
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    parser.add_argument("--ray-address",
                        type=str,
                        default=None,
                        help="Address to use for Ray. "
                        "Use \"auto\" for cluster. "
                        "Defaults to None for local.")
    parser.add_argument("--server-address",
                        type=str,
                        default=None,
                        required=False,
                        help="The address of server to connect to if using "
                        "Ray Client.")

    args, _ = parser.parse_known_args()

    if args.smoke_test:
        ray.init()
    elif args.server_address:
        ray.client(args.server_address).connect()
    else:
        ray.init(args.ray_address)

    if args.smoke_test:
        tune_transformer(num_samples=1, gpus_per_trial=0, smoke_test=True)
    else:
        # You can change the number of GPUs here:
        tune_transformer(num_samples=8, gpus_per_trial=1)
Beispiel #22
0
        node_keys = [key for key in resources if "node" in key]
        num_nodes = sum(resources[node_key] for node_key in node_keys)
        if num_nodes < expected:
            print("{} nodes have joined so far, waiting for {} more.".format(
                num_nodes, expected - num_nodes))
            sys.stdout.flush()
            time.sleep(1)
        else:
            break


def main():
    wait_for_nodes(3)

    # Check that objects can be transferred from each node to each other node.
    for i in range(10):
        print("Iteration {}".format(i))
        results = [
            gethostname.remote(gethostname.remote(())) for _ in range(100)
        ]
        print(Counter(ray.get(results)))
        sys.stdout.flush()

    print("Success!")
    sys.stdout.flush()


if __name__ == "__main__":
    ray.client("example-cluster-ray-head:10001").connect()
    main()
Beispiel #23
0
        if label == prediction:
            correct += 1

    print("Labels = {}. Predictions = {}. {} out of {} are correct.".format(
        list(labels), predictions, correct, num_to_test))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        default=False,
                        help="Finish quickly for testing.")
    args = parser.parse_args()

    ray.client("anyscale://").connect()
    num_workers = 2
    use_gpu = True

    print("Training model.")
    analysis = train_mnist(args.smoke_test, num_workers, use_gpu)

    print("Retrieving best model.")
    best_checkpoint = analysis.best_checkpoint
    model_id = get_best_model(best_checkpoint)

    print("Setting up Serve.")
    setup_serve(model_id)

    print("Testing Prediction Service.")
    test_predictions(args.smoke_test)
Beispiel #24
0
def test_local_clusters():
    """
    This tests the various behaviors of connecting to local clusters:

    * Using `ray.client("local").connect() ` should always create a new
      cluster.
    * Using `ray.cleint().connectIO` should create a new cluster if it doesn't
      connect to an existing one.
    * Using `ray.client().connect()` should only connect to a cluster if it
      was created with `ray start --head`, not from a python program.

    It does tests if two calls are in the same cluster by trying to create an
    actor with the same name in the same namespace, which will error and cause
    the script have a non-zero exit, which throws an exception.
    """
    driver_template = """
import ray
info = ray.client({address}).namespace("").connect()

@ray.remote
class Foo:
    def ping(self):
        return "pong"

a = Foo.options(name="abc", lifetime="detached").remote()
ray.get(a.ping.remote())

import time
while True:
    time.sleep(30)

"""
    blocking_local_script = driver_template.format(
        address="'local'", blocking=True)
    blocking_noaddr_script = driver_template.format(address="", blocking=True)

    # This should start a cluster.
    p1 = run_string_as_driver_nonblocking(blocking_local_script)
    # ray.client("local").connect() should start a second cluster.
    p2 = run_string_as_driver_nonblocking(blocking_local_script)
    # ray.client().connect() shouldn't connect to a cluster started by
    # ray.client("local").connect() so it should create a third one.
    p3 = run_string_as_driver_nonblocking(blocking_noaddr_script)
    # ray.client().connect() shouldn't connect to a cluster started by
    # ray.client().connect() so it should create a fourth one.
    p4 = run_string_as_driver_nonblocking(blocking_noaddr_script)

    wait_for_condition(
        lambda: len(ray._private.services.find_redis_address()) == 4,
        retry_interval_ms=1000)

    p1.kill()
    p2.kill()
    p3.kill()
    p4.kill()
    # Prevent flakiness since fatesharing takes some time.
    subprocess.check_output("ray stop --force", shell=True)

    # Since there's a cluster started with `ray start --head`
    # we should connect to it instead.
    subprocess.check_output("ray start --head", shell=True)
    # The assertion in the driver should cause the script to fail if we start
    # a new cluster instead of connecting.
    run_string_as_driver("""
import ray
ray.client().connect()
assert len(ray._private.services.find_redis_address()) == 1
    """)
    # ray.client("local").connect() should always create a new cluster even if
    # there's one running.
    p1 = run_string_as_driver_nonblocking(blocking_local_script)
    wait_for_condition(
        lambda: len(ray._private.services.find_redis_address()) == 2,
        retry_interval_ms=1000)
    p1.kill()
    subprocess.check_output("ray stop --force", shell=True)
Beispiel #25
0
def test_e2e_complex(call_ray_start, tmp_path):
    """Test multiple runtime_env options across multiple client connections.

    1.  Run a Ray Client job with both working_dir and pip specified. Check the
        environment using imports and file reads in tasks and actors.
    2.  On the same cluster, run a job as above but using the Ray Summit
        2021 demo's pip requirements.txt.  Also, check that per-task and
        per-actor pip requirements work, all using the job's working_dir.
    """
    # Create a file to use to test working_dir
    specific_path = tmp_path / "test"
    specific_path.write_text("Hello")

    with ray.client(f"localhost:{CLIENT_SERVER_PORT}").env({
            "working_dir":
            str(tmp_path),
            "pip": ["pip-install-test"]
    }).connect():

        # Test that a task is started in the working_dir.
        @ray.remote
        def test_read():
            return Path("./test").read_text()

        assert ray.get(test_read.remote()) == "Hello"

        # Check a task has the job's pip requirements and working_dir.
        @ray.remote
        def test_pip():
            import pip_install_test  # noqa
            import ray  # noqa

            return Path("./test").read_text()

        assert ray.get(test_pip.remote()) == "Hello"

        # Check an actor has the job's pip requirements and working_dir.
        @ray.remote
        class TestActor:
            def test(self):
                import pip_install_test  # noqa

                return Path("./test").read_text()

        a = TestActor.remote()
        assert ray.get(a.test.remote()) == "Hello"

    # pip requirements file from Ray Summit 2021 demo.
    requirement_path = tmp_path / "requirements.txt"
    requirement_path.write_text("\n".join([
        "ray[serve, tune]",
        "texthero",
        "PyGithub",
        "xgboost_ray",
        "pandas==1.1",  # pandas 1.2.4 in the demo, but not supported on py36
        "typer",
        "aiofiles",
    ]))

    # Start a new job on the same cluster using the Summit 2021 requirements.
    with ray.client(f"localhost:{CLIENT_SERVER_PORT}").env({
            "working_dir":
            str(tmp_path),
            "pip":
            "requirements.txt"
    }).connect():

        @ray.remote
        def test_read():
            return Path("./test").read_text()

        assert ray.get(test_read.remote()) == "Hello"

        # Check that a task has the job's pip requirements and working_dir.
        @ray.remote
        def test_import():
            import ray  # noqa
            from ray import serve  # noqa
            from ray import tune  # noqa
            import typer  # noqa
            import xgboost_ray  # noqa

            return Path("./test").read_text()

        assert ray.get(test_import.remote()) == "Hello"

        # Check that an actor has the job's pip requirements and working_dir.
        @ray.remote
        class TestActor:
            def test(self):
                import ray  # noqa
                from ray import serve  # noqa
                from ray import tune  # noqa
                import typer  # noqa
                import xgboost_ray  # noqa

                return Path("./test").read_text()

        a = TestActor.options(runtime_env={"pip": "requirements.txt"}).remote()
        assert ray.get(a.test.remote()) == "Hello"

        # Check that per-task pip specification works and that the job's
        # working_dir is still inherited.
        @ray.remote
        def test_pip():
            import pip_install_test  # noqa

            return Path("./test").read_text()

        assert ray.get(
            test_pip.options(runtime_env={
                "pip": ["pip-install-test"]
            }).remote()) == "Hello"

        # Check that pip_install_test is not in the job's pip requirements.
        with pytest.raises(ray.exceptions.RayTaskError) as excinfo:
            ray.get(test_pip.remote())
        assert "ModuleNotFoundError" in str(excinfo.value)

        # Check that per-actor pip specification works and that the job's
        # working_dir is still inherited.
        @ray.remote
        class TestActor:
            def test(self):
                import pip_install_test  # noqa

                return Path("./test").read_text()

        a = TestActor.options(runtime_env={
            "pip": ["pip-install-test"]
        }).remote()
        assert ray.get(a.test.remote()) == "Hello"
Beispiel #26
0
"""
import json
import os
import time

import ray
from ray.test_utils import wait_for_num_nodes
from xgboost_ray import RayParams

from ray.util.xgboost.release_test_util import train_ray

if __name__ == "__main__":
    addr = os.environ.get("RAY_ADDRESS")
    job_name = os.environ.get("RAY_JOB_NAME", "train_small")
    if addr.startswith("anyscale://"):
        ray.client(address=addr).job_name(job_name).connect()
    else:
        ray.init(address="auto")

    wait_for_num_nodes(
        int(os.environ.get("RAY_RELEASE_MIN_WORKERS", 0)) + 1, 600)

    output = os.environ["TEST_OUTPUT_JSON"]
    state = os.environ["TEST_STATE_JSON"]
    ray_params = RayParams(elastic_training=False,
                           max_actor_restarts=2,
                           num_actors=4,
                           cpus_per_actor=4,
                           gpus_per_actor=0)

    start = time.time()
Beispiel #27
0
def test_client_deprecation_warn():
    """
    Tests that calling ray.client directly raises a deprecation warning with
    a copy pasteable replacement for the client().connect() call converted
    to ray.init style.
    """
    # Test warning when local client mode is used
    with warnings.catch_warnings(record=True) as w:
        ray.client().connect()
        assert any(
            has_client_deprecation_warn(warning, "ray.init()")
            for warning in w)
        ray.shutdown()

    with warnings.catch_warnings(record=True) as w:
        ray.client().namespace("nmspc").env({"pip": ["requests"]}).connect()
    expected = 'ray.init(namespace="nmspc", runtime_env=<your_runtime_env>)'  # noqa E501
    assert any(
        has_client_deprecation_warn(warning, expected)  # noqa E501
        for warning in w)
    ray.shutdown()

    server = ray_client_server.serve("localhost:50055")

    # Test warning when namespace and runtime env aren't specified
    with warnings.catch_warnings(record=True) as w:
        with ray.client("localhost:50055").connect():
            pass
    assert any(
        has_client_deprecation_warn(
            warning, 'ray.init("ray://localhost:50055")') for warning in w)

    # Test warning when just namespace specified
    with warnings.catch_warnings(record=True) as w:
        with ray.client("localhost:50055").namespace("nmspc").connect():
            pass
    assert any(
        has_client_deprecation_warn(
            warning, 'ray.init("ray://localhost:50055", namespace="nmspc")')
        for warning in w)

    # Test that passing namespace through env doesn't add namespace to the
    # replacement
    with warnings.catch_warnings(record=True) as w, \
            patch.dict(os.environ, {"RAY_NAMESPACE": "aksdj"}):
        with ray.client("localhost:50055").connect():
            pass
    assert any(
        has_client_deprecation_warn(
            warning, 'ray.init("ray://localhost:50055")') for warning in w)

    # Skip actually connecting on these, since updating the runtime env is
    # time consuming
    with patch("ray.util.client_connect.connect", mock_connect):
        # Test warning when just runtime_env specified
        with warnings.catch_warnings(record=True) as w:
            try:
                ray.client("localhost:50055") \
                    .env({"pip": ["requests"]}).connect()
            except ConnectionError:
                pass
        expected = 'ray.init("ray://localhost:50055", runtime_env=<your_runtime_env>)'  # noqa E501
        assert any(
            has_client_deprecation_warn(warning, expected) for warning in w)

        # Test warning works if both runtime env and namespace specified
        with warnings.catch_warnings(record=True) as w:
            try:
                ray.client("localhost:50055").namespace("nmspc") \
                    .env({"pip": ["requests"]}).connect()
            except ConnectionError:
                pass
        expected = 'ray.init("ray://localhost:50055", namespace="nmspc", runtime_env=<your_runtime_env>)'  # noqa E501
        assert any(
            has_client_deprecation_warn(warning, expected) for warning in w)

        # We don't expect namespace to appear in the warning message, since
        # it was configured through an env var
        with warnings.catch_warnings(record=True) as w, \
                patch.dict(os.environ, {"RAY_NAMESPACE": "abcdef"}):
            try:
                ray.client("localhost:50055") \
                    .env({"pip": ["requests"]}).connect()
            except ConnectionError:
                pass
        expected = 'ray.init("ray://localhost:50055", runtime_env=<your_runtime_env>)'  # noqa E501
        assert any(
            has_client_deprecation_warn(warning, expected) for warning in w)

    # cleanup
    server.stop(0)
    subprocess.check_output("ray stop --force", shell=True)
Beispiel #28
0
    ],
    indirect=True,
)
def test_correct_num_clients(call_ray_start):
    """
    Checks that the returned value of `num_clients` correctly tracks clients
    connecting and disconnecting.
    """
    info = ray.client("localhost:25005").connect()
    assert info._num_clients == 1
    run_string_as_driver(check_we_are_second.format(num_clients=2))
    ray.util.disconnect()
    run_string_as_driver(check_we_are_second.format(num_clients=1))


check_connection = """
import ray
ray.client("localhost:25010").connect()
assert ray.util.client.ray.worker.log_client.log_thread.is_alive()
"""


@pytest.mark.skipif(
    sys.platform != "linux",
    reason="PSUtil does not work the same on windows & MacOS if flaky.",
)
def test_delay_in_rewriting_environment(shutdown_only):
    """
    Check that a delay in `ray_client_server_env_prep` does not break
    a Client connecting.
    """
Beispiel #29
0
 def wrapped(*a, **kw):
     if not ray.is_initialized():
         ray.client().connect()
     return f(*a, **kw)
Beispiel #30
0
                               num_actors=4,
                               cpus_per_actor=2),
          num_boost_round=100,
          evals=evallist)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing.")
    args = parser.parse_args()

    start = time.time()

    client_builder = ray.client()
    if is_anyscale_connect():
        job_name = os.environ.get("RAY_JOB_NAME", "modin_xgboost_test")
        client_builder.job_name(job_name)
    client_builder.connect()

    main()

    taken = time.time() - start
    result = {
        "time_taken": taken,
    }
    test_output_json = os.environ.get("TEST_OUTPUT_JSON",
                                      "/tmp/modin_xgboost_test.json")
    with open(test_output_json, "wt") as f:
        json.dump(result, f)