Exemple #1
0
def test_start_idempotent(serve_instance):
    @serve.deployment(name="start")
    def func(*args):
        pass

    func.deploy()

    assert "start" in serve.list_deployments()
    serve.start(detached=True)
    serve.start()
    serve.start(detached=True)
    serve.start()
    assert "start" in serve.list_deployments()
Exemple #2
0
    def run(self, logger: Union[Logger, _CliLogger] = logger):
        """Deploys all deployments in this Application and logs status.

        This function keeps looping and printing status, so it must be manually
        killed (e.g. using ctrl-C). When it recieves a kill signal, it tears
        down all deployments that it deployed. If there are no deployments
        left, it also shuts down the rest of Serve, including the controller.
        This is meant to help interactive development.

        Args:
            logger: Any Python object that implements the standard logger
                interface.
        """

        try:
            serve.start(detached=True)
            self.deploy()

            logger.info("\nDeployed successfully!\n")

            while True:
                statuses = serve_application_status_to_schema(
                    get_deployment_statuses()
                ).json(indent=4)
                logger.info(f"{statuses}")
                time.sleep(10)

        except KeyboardInterrupt:
            logger.info("Got SIGINT (KeyboardInterrupt). Removing deployments.")
            for deployment in self._deployments.values():
                deployment.delete()
            if len(serve.list_deployments()) == 0:
                logger.info("No deployments left. Shutting down Serve.")
                serve.shutdown()
            sys.exit()
Exemple #3
0
def serve_instance(_shared_serve_instance):
    yield _shared_serve_instance
    # Clear all state between tests to avoid naming collisions.
    for deployment in serve.list_deployments().values():
        deployment.delete()
    # Clear the ServeHandle cache between tests to avoid them piling up.
    _shared_serve_instance.handle_cache.clear()
Exemple #4
0
def test_serve_namespace(ray_start_stop):
    """
    Check that the Dashboard's Serve can interact with the Python API
    when they both start in the "serve namespace"
    """

    one = dict(
        name="one",
        num_replicas=1,
        route_prefix="/one",
        ray_actor_options={"runtime_env": {
            "py_modules": [test_module_uri]
        }},
        import_path="test_module.test.one",
    )
    put_response = requests.put(GET_OR_PUT_URL,
                                json={"deployments": [one]},
                                timeout=30)
    assert put_response.status_code == 200
    ray.init(address="auto", namespace="serve")
    serve.start()
    deployments = serve.list_deployments()
    assert len(deployments) == 1
    assert "one" in deployments
    serve.shutdown()
Exemple #5
0
 async def get_all_deployments(self, req: Request) -> Response:
     deployments = list(serve.list_deployments().values())
     serve_application_schema = serve_application_to_schema(
         deployments=deployments)
     return Response(
         text=serve_application_schema.json(),
         content_type="application/json",
     )
Exemple #6
0
def serve_instance(_shared_serve_instance):
    yield _shared_serve_instance
    # Clear all state between tests to avoid naming collisions.
    _shared_serve_instance.delete_deployments(serve.list_deployments().keys())
    # Clear the ServeHandle cache between tests to avoid them piling up.
    _shared_serve_instance.handle_cache.clear()
    # Clear deployment generation shared state between tests
    DeploymentNameGenerator.reset()
Exemple #7
0
def test_shutdown(ray_shutdown):
    ray.init(num_cpus=16)
    serve.start(http_options=dict(port=8003))

    @serve.deployment
    def f():
        pass

    serve.run(f.bind())

    serve_controller_name = serve.context._global_client._controller_name
    actor_names = [
        serve_controller_name,
        format_actor_name(
            SERVE_PROXY_NAME,
            serve.context._global_client._controller_name,
            get_all_node_ids()[0][0],
        ),
    ]

    def check_alive():
        alive = True
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name, namespace=SERVE_NAMESPACE)
            except ValueError:
                alive = False
        return alive

    wait_for_condition(check_alive)

    serve.shutdown()
    with pytest.raises(RayServeException):
        serve.list_deployments()

    def check_dead():
        for actor_name in actor_names:
            try:
                ray.get_actor(actor_name, namespace=SERVE_NAMESPACE)
                return False
            except ValueError:
                pass
        return True

    wait_for_condition(check_dead)
Exemple #8
0
def test_run_delete_old_deployments(serve_instance):
    """Check that serve.run() can remove all old deployments"""
    @serve.deployment(name="f", route_prefix="/test1")
    def f():
        return "got f"

    @serve.deployment(name="g", route_prefix="/test2")
    def g():
        return "got g"

    ingress_handle = serve.run(f.bind())
    assert ray.get(ingress_handle.remote()) == "got f"

    ingress_handle = serve.run(g.bind())
    assert ray.get(ingress_handle.remote()) == "got g"

    assert "g" in serve.list_deployments()
    assert "f" not in serve.list_deployments()
Exemple #9
0
def main(num_replicas: Optional[int], trial_length: Optional[str],
         max_batch_size: Optional[int]):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    smoke_test = os.environ.get("IS_SMOKE_TEST", "1")
    if smoke_test == "1":
        num_replicas = num_replicas or DEFAULT_SMOKE_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(
            f"Running local / smoke test with {num_replicas} replicas ..\n")

        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(num_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes ..\n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        num_replicas = num_replicas or DEFAULT_FULL_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with {num_replicas} replicas ..\n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with {num_replicas} target replicas ....\n")
    deploy_replicas(num_replicas, max_batch_size)

    logger.info("Warming up cluster ....\n")
    warm_up_one_cluster.remote(10, http_host, http_port, "echo")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_endpoints = list(serve.list_deployments().keys())
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/single_deployment_1k_noop_replica.json")
Exemple #10
0
def test_serve_shutdown(ray_shutdown):
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    A.deploy()

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    A.deploy()

    assert len(serve.list_deployments()) == 1
Exemple #11
0
 async def get_all_deployments(self, req: Request) -> Response:
     deployments = list(serve.list_deployments().values())
     # statuses = get_deployment_statuses()
     # serve_application_schema = serve_application_to_schema(
     #     deployments=deployments, statuses=statuses
     # )
     serve_application_schema = serve_application_to_schema(deployments=deployments)
     return Response(
         text=json.dumps(serve_application_schema.json()),
         content_type="application/json",
     )
        def get_random_async_handle(self):
            # sync get_handle() and expected to be called only a few times
            # during deployment warmup so each deployment has reference to
            # all other handles to send recursive inference call
            if len(self.all_deployment_async_handles) < len(all_deployment_names):
                deployments = list(serve.list_deployments().values())
                self.all_deployment_async_handles = [
                    deployment.get_handle(sync=False) for deployment in deployments
                ]

            return random.choice(self.all_deployment_async_handles)
Exemple #13
0
def test_serve_shutdown(ray_shutdown):
    ray.init(namespace="serve")
    serve.start(detached=True)

    @serve.deployment
    class A:
        def __call__(self, *args):
            return "hi"

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1

    serve.shutdown()
    serve.start(detached=True)

    assert len(serve.list_deployments()) == 0

    serve.run(A.bind())

    assert len(serve.list_deployments()) == 1
Exemple #14
0
def test_connect(detached, ray_shutdown):
    # Check that you can make API calls from within a deployment for both
    # detached and non-detached instances.
    ray.init(num_cpus=16, namespace="serve")
    serve.start(detached=detached)

    @serve.deployment
    def connect_in_deployment(*args):
        connect_in_deployment.options(name="deployment-ception").deploy()

    connect_in_deployment.deploy()
    ray.get(connect_in_deployment.get_handle().remote())
    assert "deployment-ception" in serve.list_deployments()
Exemple #15
0
    async def put_all_deployments(self, req: Request) -> Response:
        app = Application.from_dict(await req.json())
        serve.run(app, _blocking=False)

        new_names = set()
        for deployment in app.deployments.values():
            new_names.add(deployment.name)

        all_deployments = serve.list_deployments()
        all_names = set(all_deployments.keys())
        names_to_delete = all_names.difference(new_names)
        internal_get_global_client().delete_deployments(names_to_delete)

        return Response()
Exemple #16
0
    async def put_all_deployments(self, req: Request) -> Response:
        app = Application.from_dict(await req.json())
        app.deploy(blocking=False)

        new_names = set()
        for deployment in app:
            new_names.add(deployment.name)

        all_deployments = serve.list_deployments()
        all_names = set(all_deployments.keys())
        names_to_delete = all_names.difference(new_names)
        for name in names_to_delete:
            all_deployments[name].delete()

        return Response()
Exemple #17
0
def test_idempotence_after_controller_death(ray_start_stop, use_command: bool):
    """Check that CLI is idempotent even if controller dies."""

    config_file_name = os.path.join(os.path.dirname(__file__),
                                    "test_config_files",
                                    "two_deployments.yaml")
    success_message_fragment = b"Sent deploy request successfully!"
    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    ray.init(address="auto", namespace="serve")
    serve.start(detached=True)
    assert len(serve.list_deployments()) == 2

    # Kill controller
    if use_command:
        subprocess.check_output(["serve", "shutdown"])
    else:
        serve.shutdown()

    info_response = subprocess.check_output(["serve", "config"])
    info = yaml.safe_load(info_response)

    assert "deployments" in info
    assert len(info["deployments"]) == 0

    deploy_response = subprocess.check_output(
        ["serve", "deploy", config_file_name])
    assert success_message_fragment in deploy_response

    # Restore testing controller
    serve.start(detached=True)
    assert len(serve.list_deployments()) == 2
    serve.shutdown()
    ray.shutdown()
Exemple #18
0
    async def put_all_deployments(self, req: Request) -> Response:
        serve_application_text = await req.text()
        serve_application_schema = ServeApplicationSchema.parse_raw(
            serve_application_text, content_type="application/json")
        deployments = schema_to_serve_application(serve_application_schema)

        deploy_group(deployments, _blocking=False)

        new_names = set()
        for deployment in serve_application_schema.deployments:
            new_names.add(deployment.name)

        all_deployments = serve.list_deployments()
        all_names = set(all_deployments.keys())
        names_to_delete = all_names.difference(new_names)
        for name in names_to_delete:
            all_deployments[name].delete()

        return Response()
Exemple #19
0
 def get_deployment(self, name, use_list_api):
     if use_list_api:
         return serve.list_deployments()[name]
     else:
         return serve.get_deployment(name)
Exemple #20
0
def test_deploy(ray_start_stop):
    # Deploys some valid config files and checks that the deployments work

    # Initialize serve in test to enable calling serve.list_deployments()
    ray.init(address="auto", namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE)
    serve.start(detached=True)

    # Create absolute file names to YAML config files
    three_deployments = os.path.join(
        os.path.dirname(__file__), "test_config_files", "three_deployments.yaml"
    )
    two_deployments = os.path.join(
        os.path.dirname(__file__), "test_config_files", "two_deployments.yaml"
    )
    deny_deployment = os.path.join(
        os.path.dirname(__file__), "test_config_files", "deny_access.yaml"
    )

    # Dictionary mapping test config file names to expected deployment names
    # and configurations. These should match the values specified in the YAML
    # files.
    configs = {
        three_deployments: {
            "shallow": {
                "num_replicas": 1,
                "response": "Hello shallow world!",
            },
            "deep": {
                "num_replicas": 1,
                "response": "Hello deep world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
        two_deployments: {
            "shallow": {
                "num_replicas": 3,
                "response": "Hello shallow world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
    }

    request_url = "http://localhost:8000/"
    success_message_fragment = b"Sent deploy request successfully!"

    # Check idempotence:
    for _ in range(2):
        for config_file_name, expected_deployments in configs.items():
            deploy_response = subprocess.check_output(
                ["serve", "deploy", config_file_name]
            )
            assert success_message_fragment in deploy_response

            for name, deployment_config in expected_deployments.items():
                wait_for_condition(
                    lambda: (
                        requests.get(f"{request_url}{name}").text
                        == deployment_config["response"]
                    ),
                    timeout=15,
                )

            running_deployments = serve.list_deployments()

            # Check that running deployment names match expected deployment names
            assert set(running_deployments.keys()) == expected_deployments.keys()

            for name, deployment in running_deployments.items():
                assert (
                    deployment.num_replicas
                    == expected_deployments[name]["num_replicas"]
                )

        # Deploy a deployment without HTTP access
        deploy_response = subprocess.check_output(["serve", "deploy", deny_deployment])
        assert success_message_fragment in deploy_response

        wait_for_condition(
            lambda: requests.get(f"{request_url}shallow").status_code == 404, timeout=15
        )
        assert (
            ray.get(serve.get_deployment("shallow").get_handle().remote())
            == "Hello shallow world!"
        )

    ray.shutdown()
def main(
    min_replicas: Optional[int],
    max_replicas: Optional[int],
    num_deployments: Optional[int],
    trial_length: Optional[str],
):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        min_replicas = min_replicas or DEFAULT_SMOKE_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_SMOKE_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_SMOKE_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(f"Running smoke test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(max_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes .. \n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        min_replicas = min_replicas or DEFAULT_FULL_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_FULL_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_FULL_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with min {min_replicas} and max {max_replicas}"
                f"target replicas ....\n")
    setup_multi_deployment_replicas(min_replicas, max_replicas,
                                    num_deployments)

    logger.info("Warming up cluster ....\n")
    endpoint_refs = []
    all_endpoints = list(serve.list_deployments().keys())
    for endpoint in all_endpoints:
        endpoint_refs.append(
            warm_up_one_cluster.options(num_cpus=0).remote(
                10, http_host, http_port, endpoint))
    for endpoint in ray.get(endpoint_refs):
        logger.info(f"Finished warming up {endpoint}")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/autoscaling_multi_deployment.json")
Exemple #22
0
 async def get_all_deployments(self, req: Request) -> Response:
     app = Application(list(serve.list_deployments().values()))
     return Response(
         text=json.dumps(app.to_dict()),
         content_type="application/json",
     )
Exemple #23
0
def serve_instance(_shared_serve_instance):
    yield _shared_serve_instance
    # Clear all state between tests to avoid naming collisions.
    for deployment in serve.list_deployments().values():
        deployment.delete()
if __name__ == '__main__':

    # Start a Ray Serve instance. This will automatically start
    # or connect to an existing Ray cluster.
    serve.start()

    # Create two distinct deployments of the same class as
    # two replicas. Associate each deployment with a unique 'name'.
    # This name can be used as to fetch its respective serve handle.
    # See code below for method 1.
    Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
    Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")

    # Get the current list of deployments
    print(serve.list_deployments())

    print("ServerHandle API responses: " + "--" * 5)

    # Method 1) Access each deployment using the ServerHandle API
    for _ in range(2):
        for d_name in ["rep-1", "rep-2"]:
            # Get handle to the each deployment and invoke its method.
            # Which replica the request is dispatched to is determined
            # by the Router actor.
            handle = serve.get_deployment(d_name).get_handle()
            print(f"handle name : {d_name}")
            print(f"prediction  : {ray.get(handle.remote(random()))}")
            print("-" * 2)

    print("HTTP responses: " + "--" * 5)
Exemple #25
0
 def list_deployments(self, **kwargs):
     return [{"name": name, "info": info} for name, info in serve.list_deployments().items()]
Exemple #26
0
 def get_deployment(self, name):
     try:
         return {"name": name, "info": serve.list_deployments()[name]}
     except KeyError:
         raise MlflowException(f"No deployment with name {name} found")
Exemple #27
0
def test_deploy(ray_start_stop):
    # Deploys two valid config files and checks that the deployments work

    # Initialize serve in test to enable calling serve.list_deployments()
    ray.init(address="auto", namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE)
    serve.start(detached=True)

    # Create absolute file names to YAML config files
    three_deployments = os.path.join(os.path.dirname(__file__),
                                     "test_config_files",
                                     "three_deployments.yaml")
    two_deployments = os.path.join(os.path.dirname(__file__),
                                   "test_config_files", "two_deployments.yaml")

    # Dictionary mapping test config file names to expected deployment names
    # and configurations. These should match the values specified in the YAML
    # files.
    configs = {
        three_deployments: {
            "shallow": {
                "num_replicas": 1,
                "response": "Hello shallow world!",
            },
            "deep": {
                "num_replicas": 1,
                "response": "Hello deep world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
        two_deployments: {
            "shallow": {
                "num_replicas": 3,
                "response": "Hello shallow world!",
            },
            "one": {
                "num_replicas": 2,
                "response": "2",
            },
        },
    }

    request_url = "http://localhost:8000/"
    success_message_fragment = b"Sent deploy request successfully!"
    for config_file_name, expected_deployments in configs.items():
        deploy_response = subprocess.check_output(
            ["serve", "deploy", config_file_name])
        assert success_message_fragment in deploy_response

        running_deployments = serve.list_deployments()

        # Check that running deployment names match expected deployment names
        assert set(running_deployments.keys()) == expected_deployments.keys()

        for name, deployment in running_deployments.items():
            assert deployment.num_replicas == expected_deployments[name][
                "num_replicas"]

        for name, deployment_config in expected_deployments.items():
            assert (requests.get(f"{request_url}{name}").text ==
                    deployment_config["response"])
Exemple #28
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance, namespace="default_test_namespace")

    start = """
import ray
ray.util.connect("{}", namespace="default_test_namespace")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    deploy = """
import ray
ray.util.connect("{}", namespace="default_test_namespace")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_deployments()
    assert requests.get("http://*****:*****@app.get("/")
def hello():
    return "hello"

@serve.deployment
@serve.ingress(app)
class A:
    pass

A.deploy()
""".format(ray_client_instance)
    run_string_as_driver(fastapi)

    assert requests.get("http://localhost:8000/A").json() == "hello"

    serve.shutdown()
    ray.util.disconnect()