Exemple #1
0
def run(
    config_or_import_path: str,
    runtime_env: str,
    runtime_env_json: str,
    working_dir: str,
    app_dir: str,
    address: str,
    host: str,
    port: int,
    blocking: bool,
):
    sys.path.insert(0, app_dir)

    final_runtime_env = parse_runtime_env_args(
        runtime_env=runtime_env,
        runtime_env_json=runtime_env_json,
        working_dir=working_dir,
    )

    app_or_node = None
    if pathlib.Path(config_or_import_path).is_file():
        config_path = config_or_import_path
        cli_logger.print(f"Deploying from config file: '{config_path}'.")
        with open(config_path, "r") as config_file:
            app_or_node = Application.from_yaml(config_file)
    else:
        import_path = config_or_import_path
        cli_logger.print(f"Deploying from import path: '{import_path}'.")
        app_or_node = import_attr(import_path)

    # Setting the runtime_env here will set defaults for the deployments.
    ray.init(address=address, namespace="serve", runtime_env=final_runtime_env)

    try:
        serve.run(app_or_node, host=host, port=port)
        cli_logger.success("Deployed successfully.")

        if blocking:
            while True:
                # Block, letting Ray print logs to the terminal.
                time.sleep(10)

    except KeyboardInterrupt:
        cli_logger.info("Got KeyboardInterrupt, shutting down...")
        serve.shutdown()
        sys.exit()
Exemple #2
0
def test_dedicated_cpu(controller_cpu, num_proxy_cpus, ray_cluster):
    cluster = ray_cluster
    num_cluster_cpus = 8
    head_node = cluster.add_node(num_cpus=num_cluster_cpus)

    ray.init(head_node.address)
    wait_for_condition(lambda: ray.cluster_resources().get("CPU") == num_cluster_cpus)

    num_cpus_used = int(controller_cpu) + num_proxy_cpus

    serve.start(
        dedicated_cpu=controller_cpu, http_options=HTTPOptions(num_cpus=num_proxy_cpus)
    )
    available_cpus = num_cluster_cpus - num_cpus_used
    wait_for_condition(lambda: (ray.available_resources().get("CPU") == available_cpus))
    serve.shutdown()
    ray.shutdown()
Exemple #3
0
def test_no_http(ray_shutdown):
    # The following should have the same effect.
    options = [
        {
            "http_host": None
        },
        {
            "http_options": {
                "host": None
            }
        },
        {
            "http_options": {
                "location": None
            }
        },
        {
            "http_options": {
                "location": "NoServer"
            }
        },
    ]

    ray.init(num_cpus=16)
    for i, option in enumerate(options):
        print(f"[{i+1}/{len(options)}] Running with {option}")
        serve.start(**option)

        # Only controller actor should exist
        live_actors = [
            actor for actor in ray.state.actors().values()
            if actor["State"] == ray.gcs_utils.ActorTableData.ALIVE
        ]
        assert len(live_actors) == 1
        controller = serve.api._global_client._controller
        assert len(ray.get(controller.get_http_proxies.remote())) == 0

        # Test that the handle still works.
        @serve.deployment
        def hello(*args):
            return "hello"

        hello.deploy()

        assert ray.get(hello.get_handle().remote()) == "hello"
        serve.shutdown()
Exemple #4
0
def test_serve_application_to_schema_to_serve_application():
    @serve.deployment(
        num_replicas=1,
        route_prefix="/hello",
    )
    def f1():
        # The body of this function doesn't matter. See the comment in
        # test_deployment_to_schema_to_deployment.
        pass

    @serve.deployment(
        num_replicas=2,
        route_prefix="/hi",
    )
    def f2():
        pass

    f1._func_or_class = "ray.serve.tests.test_schema.global_f"
    f2._func_or_class = "ray.serve.tests.test_schema.global_f"

    deployments = schema_to_serve_application(serve_application_to_schema([f1, f2]))

    assert deployments[0].num_replicas == 1
    assert deployments[0].route_prefix == "/hello"
    assert deployments[1].num_replicas == 2
    assert deployments[1].route_prefix == "/hi"

    serve.start()

    deployments[0].deploy()
    deployments[1].deploy()
    assert ray.get(deployments[0].get_handle().remote()) == "Hello world!"
    assert requests.get("http://localhost:8000/hello").text == "Hello world!"
    assert ray.get(deployments[1].get_handle().remote()) == "Hello world!"
    assert requests.get("http://localhost:8000/hi").text == "Hello world!"

    # Check statuses
    statuses = serve_application_status_to_schema(get_deployment_statuses()).statuses
    deployment_names = {"f1", "f2"}
    for deployment_status in statuses:
        assert deployment_status.status in {"UPDATING", "HEALTHY"}
        assert deployment_status.name in deployment_names
        deployment_names.remove(deployment_status.name)
    assert len(deployment_names) == 0

    serve.shutdown()
Exemple #5
0
def test_shutdown(ray_shutdown):
    ray.init(num_cpus=16)
    serve.start(http_options=dict(port=8003))

    @serve.deployment
    def f():
        pass

    serve.run(f.bind())

    serve_controller_name = serve.context._global_client._controller_name
    actor_names = [
        serve_controller_name,
        format_actor_name(
            SERVE_PROXY_NAME,
            serve.context._global_client._controller_name,
            get_all_node_ids()[0][0],
        ),
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name, namespace=SERVE_NAMESPACE)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_deployments()

    def check_dead():
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name, namespace=SERVE_NAMESPACE)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)
Exemple #6
0
def test_deployment_to_schema_to_deployment():
    @serve.deployment(
        num_replicas=3,
        route_prefix="/hello",
        ray_actor_options={
            "runtime_env": {
                "working_dir": (
                    "https://github.com/shrekris-anyscale/"
                    "test_module/archive/HEAD.zip"
                ),
                "py_modules": [
                    (
                        "https://github.com/shrekris-anyscale/"
                        "test_deploy_group/archive/HEAD.zip"
                    ),
                ],
            }
        },
    )
    def f():
        # The body of this function doesn't matter. It gets replaced by
        # global_f() when the import path in f._func_or_class is overwritten.
        # This function is used as a convenience to apply the @serve.deployment
        # decorator without converting global_f() into a Deployment object.
        pass

    f._func_or_class = "ray.serve.tests.test_schema.global_f"

    deployment = schema_to_deployment(deployment_to_schema(f))

    assert deployment.num_replicas == 3
    assert deployment.route_prefix == "/hello"
    assert deployment.ray_actor_options["runtime_env"]["working_dir"] == (
        "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip"
    )
    assert deployment.ray_actor_options["runtime_env"]["py_modules"] == [
        "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip",
        "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip",
    ]

    serve.start()
    deployment.deploy()
    assert ray.get(deployment.get_handle().remote()) == "Hello world!"
    assert requests.get("http://localhost:8000/hello").text == "Hello world!"
    serve.shutdown()
Exemple #7
0
def test_serve_namespace(shutdown_ray, detached, ray_namespace):
    """Test that Serve starts in SERVE_NAMESPACE regardless of driver namespace."""

    with ray.init(namespace=ray_namespace):

        @serve.deployment
        def f(*args):
            return "got f"

        serve.run(f.bind())

        actors = ray.util.list_named_actors(all_namespaces=True)

        assert len(actors) == 3
        assert all(actor["namespace"] == SERVE_NAMESPACE for actor in actors)
        assert requests.get("http://localhost:8000/f").text == "got f"

        serve.shutdown()
def test_get_serve_status(shutdown_ray):

    ray.init()
    client = serve.start()

    @serve.deployment
    def f(*args):
        return "Hello world"

    f.deploy()

    status_info_1 = client.get_serve_status()
    assert status_info_1.app_status.status == "RUNNING"
    assert status_info_1.deployment_statuses[0].name == "f"
    assert status_info_1.deployment_statuses[0].status in {"UPDATING", "HEALTHY"}

    serve.shutdown()
    ray.shutdown()
Exemple #9
0
def test_standalone_actor_outside_serve():
    # https://github.com/ray-project/ray/issues/20066

    ray.init(num_cpus=8, namespace="serve")

    @ray.remote
    class MyActor:
        def ready(self):
            return

    a = MyActor.options(name="my_actor").remote()
    ray.get(a.ready.remote())

    serve.start()
    serve.shutdown()

    ray.get(a.ready.remote())
    ray.shutdown()
Exemple #10
0
def test_controller_deserialization_args_and_kwargs():
    """Ensures init_args and init_kwargs stay serialized in controller."""

    ray.init()
    client = serve.start()

    class PidBasedString(str):
        pass

    def generate_pid_based_deserializer(pid, raw_deserializer):
        """Cannot be deserialized by the process with specified pid."""
        def deserializer(*args):

            import os

            if os.getpid() == pid:
                raise RuntimeError("Cannot be deserialized by this process!")
            else:
                return raw_deserializer(*args)

        return deserializer

    PidBasedString.__reduce__ = generate_pid_based_deserializer(
        ray.get(client._controller.get_pid.remote()),
        PidBasedString.__reduce__)

    @serve.deployment
    class Echo:
        def __init__(self, arg_str, kwarg_str="failed"):
            self.arg_str = arg_str
            self.kwarg_str = kwarg_str

        def __call__(self, request):
            return self.arg_str + self.kwarg_str

    serve.run(
        Echo.bind(PidBasedString("hello "),
                  kwarg_str=PidBasedString("world!")))

    assert requests.get("http://localhost:8000/Echo").text == "hello world!"

    serve.shutdown()
    ray.shutdown()
Exemple #11
0
def test_run_graph_task_uses_zero_cpus():
    """Check that the run_graph() task uses zero CPUs."""

    ray.init(num_cpus=2)
    client = serve.start(detached=True)

    config = {"import_path": "ray.serve.tests.test_standalone.WaiterNode"}
    config = ServeApplicationSchema.parse_obj(config)
    client.deploy_app(config)

    with pytest.raises(RuntimeError):
        wait_for_condition(lambda: ray.available_resources()["CPU"] < 1.9,
                           timeout=5)

    wait_for_condition(lambda: requests.get("http://localhost:8000/Waiter").
                       text == "May I take your order?")

    serve.shutdown()
    ray.shutdown()
Exemple #12
0
def test_update_num_replicas_anonymous_namespace(shutdown_ray, detached):
    """Test updating num_replicas with anonymous namespace."""

    ray.init()
    serve.start(detached=detached)

    @serve.deployment(num_replicas=1)
    def f(*args):
        return "got f"

    f.deploy()

    num_actors = len(ray.util.list_named_actors(all_namespaces=True))

    for _ in range(5):
        f.deploy()
        assert num_actors == len(ray.util.list_named_actors(all_namespaces=True))

    serve.shutdown()
Exemple #13
0
    def test_controller_recover_and_deploy(self, client: ServeControllerClient):
        """Ensure that in-progress deploy can finish even after controller dies."""

        config = ServeApplicationSchema.parse_obj(self.get_test_config())
        client.deploy_app(config)

        # Wait for app to deploy
        wait_for_condition(
            lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json()
            == "4 pizzas please!"
        )
        wait_for_condition(
            lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json()
            == "9 pizzas please!"
        )
        deployment_timestamp = client.get_serve_status().app_status.deployment_timestamp

        # Delete all deployments, but don't update config
        client.delete_deployments(
            ["Router", "Multiplier", "Adder", "create_order", "DAGDriver"]
        )

        ray.kill(client._controller, no_restart=False)

        # When controller restarts, it should redeploy config automatically
        wait_for_condition(
            lambda: requests.post("http://localhost:8000/", json=["ADD", 2]).json()
            == "4 pizzas please!"
        )
        wait_for_condition(
            lambda: requests.post("http://localhost:8000/", json=["MUL", 3]).json()
            == "9 pizzas please!"
        )
        assert (
            deployment_timestamp
            == client.get_serve_status().app_status.deployment_timestamp
        )

        serve.shutdown()
        client = serve.start(detached=True)

        # Ensure config checkpoint has been deleted
        assert client.get_serve_status().app_status.deployment_timestamp == 0
Exemple #14
0
def test_deploy_with_overriden_namespace(shutdown_ray, detached):
    """Test deployments with overriden namespace."""

    ray_namespace = "ray_namespace"
    controller_namespace = "controller_namespace"

    ray.init(namespace=ray_namespace)
    serve.start(detached=detached, _override_controller_namespace=controller_namespace)

    for iteration in range(2):

        @serve.deployment
        def f(*args):
            return f"{iteration}"

        f.deploy()
        assert requests.get("http://localhost:8000/f").text == f"{iteration}"

    serve.shutdown()
Exemple #15
0
def test_shutdown(ray_shutdown):
    ray.init(num_cpus=16)
    serve.start(http_port=8003)

    @serve.deployment
    def f():
        pass

    f.deploy()

    actor_names = [
        serve.api._global_client._controller_name,
        format_actor_name(SERVE_PROXY_NAME,
                          serve.api._global_client._controller_name,
                          get_all_node_ids()[0][0])
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_backends()

    def check_dead():
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)
Exemple #16
0
def test_serve_shutdown(ray_shutdown):
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    A.deploy()

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    A.deploy()

    assert len(serve.list_deployments()) == 1
Exemple #17
0
def test_serve_namespace(ray_start_stop):
    """
    Check that the Dashboard's Serve can interact with the Python API
    when they both start in the "serve namespace"
    """

    one = dict(
        name="one",
        num_replicas=1,
        route_prefix="/one",
        ray_actor_options={"runtime_env": {"py_modules": [test_module_uri]}},
        import_path="test_module.test.one",
    )
    put_response = requests.put(GET_OR_PUT_URL, json={"deployments": [one]}, timeout=30)
    assert put_response.status_code == 200
    ray.init(address="auto", namespace="serve")
    serve.start()
    deployments = serve.list_deployments()
    assert len(deployments) == 1
    assert "one" in deployments
    serve.shutdown()
Exemple #18
0
def test_http_root_url(ray_shutdown):
    @serve.deployment
    def f(_):
        pass

    root_url = "https://my.domain.dev/prefix"

    port = new_port()
    os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url
    serve.start(http_options=dict(port=port))
    f.deploy()
    assert f.url == root_url + "/f"
    serve.shutdown()
    del os.environ[SERVE_ROOT_URL_ENV_KEY]

    port = new_port()
    serve.start(http_options=dict(port=port))
    f.deploy()
    assert f.url != root_url + "/f"
    assert f.url == f"http://127.0.0.1:{port}/f"
    serve.shutdown()
Exemple #19
0
def test_serve_shutdown(ray_shutdown):
    ray.init(namespace="serve")
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1
Exemple #20
0
def test_fixed_number_proxies(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)
    cluster.add_node(num_cpus=4)

    ray.init(head_node.address)
    node_ids = ray._private.state.node_ids()
    assert len(node_ids) == 3

    with pytest.raises(
        pydantic.ValidationError,
        match="you must specify the `fixed_number_replicas` parameter.",
    ):
        serve.start(
            http_options={
                "location": "FixedNumber",
            }
        )

    serve.start(
        http_options={
            "port": new_port(),
            "location": "FixedNumber",
            "fixed_number_replicas": 2,
        }
    )

    # Only the controller and two http proxy should be started.
    controller_handle = get_global_client()._controller
    node_to_http_actors = ray.get(controller_handle.get_http_proxies.remote())
    assert len(node_to_http_actors) == 2

    proxy_names_bytes = ray.get(controller_handle.get_http_proxy_names.remote())
    proxy_names = ActorNameList.FromString(proxy_names_bytes)
    assert len(proxy_names.names) == 2

    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()
Exemple #21
0
def test_http_root_url(ray_shutdown):
    @serve.deployment
    def f(_):
        pass

    root_url = "https://my.domain.dev/prefix"

    port = new_port()
    os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url == root_url + "/f"
    serve.shutdown()
    ray.shutdown()
    del os.environ[SERVE_ROOT_URL_ENV_KEY]

    port = new_port()
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url != root_url + "/f"
    assert f.url == f"http://127.0.0.1:{port}/f"
    serve.shutdown()
    ray.shutdown()

    ray.init(runtime_env={"env_vars": {SERVE_ROOT_URL_ENV_KEY: root_url}})
    port = new_port()
    serve.start(http_options=dict(port=port))
    serve.run(f.bind())
    assert f.url == root_url + "/f"
    serve.shutdown()
    ray.shutdown()
Exemple #22
0
def test_controller_recover_and_delete():
    """Ensure that in-progress deletion can finish even after controller dies."""

    ray.init()
    client = serve.start()

    @serve.deployment(
        num_replicas=50,
        ray_actor_options={"num_cpus": 0.001},
    )
    def f():
        pass

    f.deploy()

    actors = ray.util.list_named_actors(all_namespaces=True)
    client.delete_deployments(["f"], blocking=False)

    wait_for_condition(
        lambda: len(ray.util.list_named_actors(all_namespaces=True)) < len(actors)
    )
    ray.kill(client._controller, no_restart=False)

    # There should still be replicas remaining
    assert len(ray.util.list_named_actors(all_namespaces=True)) > 2

    # All replicas should be removed once the controller revives
    wait_for_condition(
        lambda: len(ray.util.list_named_actors(all_namespaces=True)) == len(actors) - 50
    )

    # The deployment should be deleted, meaning its state should not be stored
    # in the DeploymentStateManager. This can be checked by attempting to
    # retrieve the deployment's status through the controller.
    assert client.get_serve_status().get_deployment_status("f") is None

    serve.shutdown()
    ray.shutdown()
Exemple #23
0
def test_idempotence_after_controller_death(ray_start_stop, use_command: bool):
    """Check that CLI is idempotent even if controller dies."""

    config_file_name = os.path.join(os.path.dirname(__file__),
                                    "test_config_files", "basic_graph.yaml")
    success_message_fragment = b"Sent deploy request successfully!"
    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    ray.init(address="auto", namespace=SERVE_NAMESPACE)
    serve.start(detached=True)
    wait_for_condition(
        lambda: len(ray.util.list_named_actors(all_namespaces=True)) == 4,
        timeout=15)

    # Kill controller
    if use_command:
        subprocess.check_output(["serve", "shutdown", "-y"])
    else:
        serve.shutdown()

    status_response = subprocess.check_output(["serve", "status"])
    status_info = yaml.safe_load(status_response)

    assert len(status_info["deployment_statuses"]) == 0

    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    # Restore testing controller
    serve.start(detached=True)
    wait_for_condition(
        lambda: len(ray.util.list_named_actors(all_namespaces=True)) == 4,
        timeout=15)
    serve.shutdown()
    ray.shutdown()
Exemple #24
0
def test_status_schema_helpers():
    @serve.deployment(
        num_replicas=1,
        route_prefix="/hello",
    )
    def f1():
        # The body of this function doesn't matter. See the comment in
        # test_deployment_to_schema_to_deployment.
        pass

    @serve.deployment(
        num_replicas=2,
        route_prefix="/hi",
    )
    def f2():
        pass

    f1._func_or_class = "ray.serve.tests.test_schema.global_f"
    f2._func_or_class = "ray.serve.tests.test_schema.global_f"

    serve.start()

    f1.deploy()
    f2.deploy()

    # Check statuses
    statuses = serve_application_status_to_schema(
        get_deployment_statuses()).statuses
    deployment_names = {"f1", "f2"}
    for deployment_status in statuses:
        assert deployment_status.status in {"UPDATING", "HEALTHY"}
        assert deployment_status.name in deployment_names
        deployment_names.remove(deployment_status.name)
    assert len(deployment_names) == 0

    serve.shutdown()
Exemple #25
0
def test_idempotence_after_controller_death(ray_start_stop, use_command: bool):
    """Check that CLI is idempotent even if controller dies."""

    config_file_name = os.path.join(os.path.dirname(__file__),
                                    "test_config_files",
                                    "two_deployments.yaml")
    success_message_fragment = b"Sent deploy request successfully!"
    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    ray.init(address="auto", namespace="serve")
    serve.start(detached=True)
    assert len(serve.list_deployments()) == 2

    # Kill controller
    if use_command:
        subprocess.check_output(["serve", "shutdown"])
    else:
        serve.shutdown()

    info_response = subprocess.check_output(["serve", "config"])
    info = yaml.safe_load(info_response)

    assert "deployments" in info
    assert len(info["deployments"]) == 0

    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    # Restore testing controller
    serve.start(detached=True)
    assert len(serve.list_deployments()) == 2
    serve.shutdown()
    ray.shutdown()
Exemple #26
0
def ray_cluster():
    cluster = Cluster()
    yield Cluster()
    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()
Exemple #27
0
def ray_shutdown():
    yield
    serve.shutdown()
    ray.shutdown()
Exemple #28
0
 def crash():
     subprocess.call(["ray", "stop", "--force"])
     ray.shutdown()
     serve.shutdown()
Exemple #29
0
def shutdown_ray():
    yield
    serve.shutdown()
    ray.shutdown()
Exemple #30
0
def shutdown():
    serve.api._connect()
    serve.shutdown()