Beispiel #1
0
def test_init_args(serve_instance):
    name = "test"

    @serve.deployment(name=name)
    class D:
        def __init__(self, val):
            self._val = val

        def __call__(self, *arg):
            return self._val, os.getpid()

    D.deploy("1")
    val1, pid1 = ray.get(D.get_handle().remote())
    assert val1 == "1"

    del D

    D2 = serve.get_deployment(name=name)
    D2.deploy()
    val2, pid2 = ray.get(D2.get_handle().remote())
    assert val2 == "1"
    assert pid2 != pid1

    D2 = serve.get_deployment(name=name)
    D2.deploy("2")
    val3, pid3 = ray.get(D2.get_handle().remote())
    assert val3 == "2"
    assert pid3 != pid2
Beispiel #2
0
 def delete_deployment(self, name):
     if any(name == d["name"] for d in self.list_deployments()):
         serve.get_deployment(name).delete()
     # self.client.delete_endpoint(name)
     # self.client.delete_backend(name)
         logger.info("Deleted model with name: {}".format(name))
     logger.info("Model with name {} does not exist.".format(name))
Beispiel #3
0
 def update_deployment(self, name, model_uri=None, flavor=None, config=None):
     if model_uri is None:
         serve.get_deployment(name).options(**config).deploy()
         # self.client.update_backend_config(name, config)
     else:
         self.delete_deployment(name)
         self.create_deployment(name, model_uri, flavor, config)
     return {"name": name, "config": config, "flavor": "python_function"}
Beispiel #4
0
    def test_get_after_delete(self, serve_instance):
        @serve.deployment(version="1")
        def d(*args):
            return "1", os.getpid()

        d.deploy()
        del d

        d2 = serve.get_deployment("d")
        d2.delete()
        del d2

        with pytest.raises(KeyError):
            serve.get_deployment("d")
Beispiel #5
0
    def create_deployment(self):
        if len(self.deployments) == self.max_deployments:
            deployment_to_delete = self.deployments.pop()
            serve.get_deployment(deployment_to_delete).delete()

        new_name = "".join(
            [random.choice(string.ascii_letters) for _ in range(10)])

        @serve.deployment(name=new_name)
        def handler(self, *args):
            return new_name

        handler.deploy()

        self.deployments.append(new_name)
Beispiel #6
0
 def remote(self, *args, **kwargs):
     if not self.handle:
         handle = serve.get_deployment(self.deployment_name).get_handle()
         self.handle = handle.options(
             method_name=self.handle_options.method_name)
     # TODO (jiaodong): Polish async handles later for serve pipeline
     return self.handle.remote(*args, **kwargs)
Beispiel #7
0
    def call():
        if use_handle:
            handle = serve.get_deployment(name).get_handle()
            ret = ray.get(handle.handler.remote())
        else:
            ret = requests.get(f"http://localhost:8000/{name}").text

        return ret.split("|")[0], ret.split("|")[1]
Beispiel #8
0
    def test_import_path_deployment_decorated(self, serve_instance):
        func = serve.deployment(
            name="decorated_func",
        )("ray.serve.tests.test_application.decorated_func")

        clss = serve.deployment(
            name="decorated_clss",
        )("ray.serve.tests.test_application.DecoratedClass")

        deployments = [func, clss]
        responses = ["got decorated func", "got decorated class"]

        self.deploy_and_check_responses(deployments, responses)

        # Check that non-default decorated values were overwritten
        assert serve.get_deployment("decorated_func").max_concurrent_queries != 17
        assert serve.get_deployment("decorated_clss").max_concurrent_queries != 17
Beispiel #9
0
    def test_basic_get(self, serve_instance):
        @serve.deployment(version="1")
        def d(*args):
            return "1", os.getpid()

        with pytest.raises(KeyError):
            serve.get_deployment("d")

        d.deploy()
        val1, pid1 = ray.get(d.get_handle().remote())
        assert val1 == "1"

        del d

        d2 = serve.get_deployment("d")
        val2, pid2 = ray.get(d2.get_handle().remote())
        assert val2 == "1"
        assert pid2 == pid1
Beispiel #10
0
    def call(block=False):
        if use_handle:
            handle = serve.get_deployment(name).get_handle()
            ret = ray.get(handle.handler.remote(block))
        else:
            ret = requests.get(
                f"http://localhost:8000/{name}", params={"block": block}
            ).text

        return ret.split("|")[0], ret.split("|")[1]
Beispiel #11
0
def test_basic_get(serve_instance):
    name = "test"

    @serve.deployment(name=name, version="1")
    def d(*args):
        return "1", os.getpid()

    with pytest.raises(KeyError):
        serve.get_deployment(name=name)

    handle = serve.run(d.bind())
    val1, pid1 = ray.get(handle.remote())
    assert val1 == "1"

    del d

    d2 = serve.get_deployment(name=name)
    val2, pid2 = ray.get(d2.get_handle().remote())
    assert val2 == "1"
    assert pid2 == pid1
Beispiel #12
0
def test_controller_starts_java_replica(shutdown_only):  # noqa: F811
    ray.init(
        num_cpus=8,
        namespace="default_test_namespace",
        # A dummy code search path to enable cross language.
        job_config=JobConfig(code_search_path=["."]),
    )
    client = serve.start(detached=True)

    controller = client._controller

    config = DeploymentConfig()
    config.deployment_language = JAVA
    config.is_cross_language = True

    replica_config = ReplicaConfig.create(
        "io.ray.serve.util.ExampleEchoDeployment",
        init_args=["my_prefix "],
    )

    # Deploy it
    deployment_name = "my_java"
    updating = ray.get(
        controller.deploy.remote(
            name=deployment_name,
            deployment_config_proto_bytes=config.to_proto_bytes(),
            replica_config_proto_bytes=replica_config.to_proto_bytes(),
            route_prefix=None,
            deployer_job_id=ray.get_runtime_context().job_id,
        )
    )
    assert updating
    client._wait_for_deployment_healthy(deployment_name)

    # Let's try to call it!
    all_handles = ray.get(controller._all_running_replicas.remote())
    backend_handle = all_handles["my_java"][0].actor_handle
    out = backend_handle.handleRequest.remote(
        RequestMetadata(
            request_id="id-1",
            endpoint="endpoint",
            call_method="call",
        ).SerializeToString(),
        RequestWrapper(body=msgpack_serialize("hello")).SerializeToString(),
    )
    assert ray.get(out) == "my_prefix hello"

    handle = serve.get_deployment("my_java").get_handle()
    handle_out = handle.remote("hello handle")
    assert ray.get(handle_out) == "my_prefix hello handle"

    ray.get(controller.delete_deployment.remote(deployment_name))
    client._wait_for_deployment_deleted(deployment_name)
Beispiel #13
0
def test_repeated_get_handle_cached(serve_instance):
    @serve.deployment
    def f(_):
        return ""

    f.deploy()

    handle_sets = {f.get_handle() for _ in range(100)}
    assert len(handle_sets) == 1

    handle_sets = {serve.get_deployment("f").get_handle() for _ in range(100)}
    assert len(handle_sets) == 1
Beispiel #14
0
def _validate_consistent_python_output(
    deployment, dag, handle_by_name, input=None, output=None
):
    """Assert same input lead to same outputs across the following:
    1) Deployment handle returned from Deployment instance get_handle()
    2) Original executable Ray DAG
    3) Deployment handle return from serve public API get_deployment()
    """
    deployment_handle = deployment.get_handle()
    assert ray.get(deployment_handle.remote(input)) == output
    assert ray.get(dag.execute(input)) == output
    handle_by_name = serve.get_deployment(handle_by_name).get_handle()
    assert ray.get(handle_by_name.remote(input)) == output
Beispiel #15
0
    def test_deploy_empty_version(self, serve_instance):
        @serve.deployment
        def d(*args):
            return "1", os.getpid()

        d.deploy()
        val1, pid1 = ray.get(d.get_handle().remote())
        assert val1 == "1"

        del d

        d2 = serve.get_deployment("d")
        d2.deploy()
        val2, pid2 = ray.get(d2.get_handle().remote())
        assert val2 == "1"
        assert pid2 != pid1
Beispiel #16
0
def test_new_driver(serve_instance):
    script = """
import ray
ray.init(address="{}")

from ray import serve

@serve.deployment
def driver(starlette_request):
    return "OK!"

driver.deploy()
""".format(ray.worker._global_node._redis_address)
    ray.test_utils.run_string_as_driver(script)

    handle = serve.get_deployment("driver").get_handle()
    assert ray.get(handle.remote()) == "OK!"
Beispiel #17
0
def test_new_driver(serve_instance):
    script = """
import ray
ray.init(address="{}", namespace="default_test_namespace")

from ray import serve

@serve.deployment
def driver():
    return "OK!"

driver.deploy()
""".format(ray.worker._global_node.address)
    run_string_as_driver(script)

    handle = serve.get_deployment("driver").get_handle()
    assert ray.get(handle.remote()) == "OK!"
Beispiel #18
0
    def test_scale_replicas(self, serve_instance):
        @serve.deployment
        def d(*args):
            return os.getpid()

        def check_num_replicas(num):
            handle = serve.get_deployment("d").get_handle()
            assert len(set(ray.get([handle.remote()
                                    for _ in range(50)]))) == num

        d.deploy()
        check_num_replicas(1)
        del d

        d2 = serve.get_deployment("d")
        d2.options(num_replicas=2).deploy()
        check_num_replicas(2)
Beispiel #19
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance)

    start = """
import ray
ray.util.connect("{}")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    serve.connect()

    deploy = """
import ray
ray.util.connect("{}")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_backends()
    assert "test1" in serve.list_endpoints()
    assert requests.get("http://localhost:8000/hello").text == "hello"

    delete = """
import ray
ray.util.connect("{}")

from ray import serve

serve.get_deployment("test1").delete()
""".format(ray_client_instance)
    run_string_as_driver(delete)

    assert "test1" not in serve.list_backends()
    assert "test1" not in serve.list_endpoints()
Beispiel #20
0
def test_deploy_new_version(serve_instance):
    name = "test"

    @serve.deployment(name=name, version="1")
    def d(*args):
        return "1", os.getpid()

    d.deploy()
    val1, pid1 = ray.get(d.get_handle().remote())
    assert val1 == "1"

    del d

    d2 = serve.get_deployment(name=name)
    d2.options(version="2").deploy()
    val2, pid2 = ray.get(d2.get_handle().remote())
    assert val2 == "1"
    assert pid2 != pid1
Beispiel #21
0
def test_single_class_with_valid_ray_options(serve_instance):
    model = Model.options(num_cpus=1, memory=1000)._bind(2, ratio=0.3)
    ray_dag = model.forward._bind(InputNode())

    serve_root_dag = ray_dag._apply_recursive(transform_ray_dag_to_serve_dag)
    deployments = extract_deployments_from_serve_dag(serve_root_dag)
    assert len(deployments) == 1
    deployments[0].deploy()
    _validate_consistent_output(deployments[0],
                                ray_dag,
                                deployments[0].name,
                                input=1,
                                output=0.6)

    deployment = serve.get_deployment(deployments[0].name)
    assert deployment.ray_actor_options.get("num_cpus") == 1
    assert deployment.ray_actor_options.get("memory") == 1000
    assert deployment.ray_actor_options.get("runtime_env") == {}
Beispiel #22
0
def test_single_class_with_valid_ray_options(serve_instance):
    with InputNode() as dag_input:
        model = Model.options(num_cpus=1, memory=1000).bind(2, ratio=0.3)
        ray_dag = model.forward.bind(dag_input)

    with _DAGNodeNameGenerator() as node_name_generator:
        serve_root_dag = ray_dag.apply_recursive(
            lambda node: transform_ray_dag_to_serve_dag(node, node_name_generator)
        )
    deployments = extract_deployments_from_serve_dag(serve_root_dag)
    assert len(deployments) == 1
    deployments[0].deploy()
    _validate_consistent_python_output(
        deployments[0], ray_dag, deployments[0].name, input=1, output=0.6
    )

    deployment = serve.get_deployment(deployments[0].name)
    assert deployment.ray_actor_options.get("num_cpus") == 1
    assert deployment.ray_actor_options.get("memory") == 1000
    assert deployment.ray_actor_options.get("runtime_env") == {}
Beispiel #23
0
def test_handle_cache_out_of_scope(serve_instance):
    # https://github.com/ray-project/ray/issues/18980
    initial_num_cached = len(get_global_client().handle_cache)

    @serve.deployment(name="f")
    def f():
        return "hi"

    f.deploy()
    handle = serve.get_deployment("f").get_handle()

    handle_cache = get_global_client().handle_cache
    assert len(handle_cache) == initial_num_cached + 1

    def sender_where_handle_goes_out_of_scope():
        f = serve.get_deployment("f").get_handle()
        assert f is handle
        assert ray.get(f.remote()) == "hi"

    [sender_where_handle_goes_out_of_scope() for _ in range(30)]
    assert len(handle_cache) == initial_num_cached + 1
Beispiel #24
0
 def get_deployment(self, name, use_list_api):
     if use_list_api:
         return serve.list_deployments()[name]
     else:
         return serve.get_deployment(name)
Beispiel #25
0
 def __init__(self):
     self.handle = serve.get_deployment(deployment_name).get_handle(
         sync=False)
Beispiel #26
0
def test_deploy(ray_start_stop):
    # Deploys some valid config files and checks that the deployments work

    # Initialize serve in test to enable calling serve.list_deployments()
    ray.init(address="auto", namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE)
    serve.start(detached=True)

    # Create absolute file names to YAML config files
    three_deployments = os.path.join(
        os.path.dirname(__file__), "test_config_files", "three_deployments.yaml"
    )
    two_deployments = os.path.join(
        os.path.dirname(__file__), "test_config_files", "two_deployments.yaml"
    )
    deny_deployment = os.path.join(
        os.path.dirname(__file__), "test_config_files", "deny_access.yaml"
    )

    # Dictionary mapping test config file names to expected deployment names
    # and configurations. These should match the values specified in the YAML
    # files.
    configs = {
        three_deployments: {
            "shallow": {
                "num_replicas": 1,
                "response": "Hello shallow world!",
            },
            "deep": {
                "num_replicas": 1,
                "response": "Hello deep world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
        two_deployments: {
            "shallow": {
                "num_replicas": 3,
                "response": "Hello shallow world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
    }

    request_url = "http://localhost:8000/"
    success_message_fragment = b"Sent deploy request successfully!"

    # Check idempotence:
    for _ in range(2):
        for config_file_name, expected_deployments in configs.items():
            deploy_response = subprocess.check_output(
                ["serve", "deploy", config_file_name]
            )
            assert success_message_fragment in deploy_response

            for name, deployment_config in expected_deployments.items():
                wait_for_condition(
                    lambda: (
                        requests.get(f"{request_url}{name}").text
                        == deployment_config["response"]
                    ),
                    timeout=15,
                )

            running_deployments = serve.list_deployments()

            # Check that running deployment names match expected deployment names
            assert set(running_deployments.keys()) == expected_deployments.keys()

            for name, deployment in running_deployments.items():
                assert (
                    deployment.num_replicas
                    == expected_deployments[name]["num_replicas"]
                )

        # Deploy a deployment without HTTP access
        deploy_response = subprocess.check_output(["serve", "deploy", deny_deployment])
        assert success_message_fragment in deploy_response

        wait_for_condition(
            lambda: requests.get(f"{request_url}shallow").status_code == 404, timeout=15
        )
        assert (
            ray.get(serve.get_deployment("shallow").get_handle().remote())
            == "Hello shallow world!"
        )

    ray.shutdown()
Beispiel #27
0
 async def __call__(self, req):
     if self.handle is None:
         self.handle = serve.get_deployment(
             deployment_name).get_handle(sync=False)
     obj_ref = await self.handle.remote(req)
     return await obj_ref
Beispiel #28
0
from ray import serve
import os
import ray

ray.init(address="auto", namespace="serve")
num_replicas = os.environ.get("NUM_REPLICAS", "1")
serve.get_deployment("rl").options(num_replicas=int(num_replicas)).deploy()
Beispiel #29
0
    # See code below for method 1.
    Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
    Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")

    # Get the current list of deployments
    print(serve.list_deployments())

    print("ServerHandle API responses: " + "--" * 5)

    # Method 1) Access each deployment using the ServerHandle API
    for _ in range(2):
        for d_name in ["rep-1", "rep-2"]:
            # Get handle to the each deployment and invoke its method.
            # Which replica the request is dispatched to is determined
            # by the Router actor.
            handle = serve.get_deployment(d_name).get_handle()
            print(f"handle name : {d_name}")
            print(f"prediction  : {ray.get(handle.remote(random()))}")
            print("-" * 2)

    print("HTTP responses: " + "--" * 5)

    # Method 2) Access deployment via HTTP Request
    for _ in range(2):
        for d_name in ["rep-1", "rep-2"]:
            # Send HTTP request along with data payload
            url = f"http://127.0.0.1:8000/{d_name}"
            print(f"handle name : {d_name}")
            print(
                f"prediction  : {requests.get(url, params= {'data': random()}).text}"
            )
Beispiel #30
0
 def check_num_replicas(num):
     handle = serve.get_deployment(name=name).get_handle()
     assert len(set(ray.get([handle.remote() for _ in range(50)]))) == num