Esempio n. 1
0
def test_list_endpoints(serve_instance):
    serve.init()

    def f():
        pass

    serve.create_endpoint("endpoint", "/api", methods=["GET", "POST"])
    serve.create_endpoint("endpoint2", methods=["POST"])
    serve.create_backend("backend", f)
    serve.set_traffic("endpoint2", {"backend": 1.0})

    endpoints = serve.list_endpoints()
    assert "endpoint" in endpoints
    assert endpoints["endpoint"] == {
        "route": "/api",
        "methods": ["GET", "POST"],
        "traffic": {}
    }

    assert "endpoint2" in endpoints
    assert endpoints["endpoint2"] == {
        "route": None,
        "methods": ["POST"],
        "traffic": {
            "backend": 1.0
        }
    }

    serve.delete_endpoint("endpoint")
    assert "endpoint2" in serve.list_endpoints()

    serve.delete_endpoint("endpoint2")
    assert len(serve.list_endpoints()) == 0
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False):
    print("Serving checkpoint: {}".format(checkpoint))

    checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint,
                                                    config, metrics)

    serve.init()
    backend_name = "mnist:day_{}".format(day)

    serve.create_backend(backend_name, MNISTBackend, checkpoint_path, config,
                         metrics, gpu)

    if "mnist" not in serve.list_endpoints():
        # First time we serve a model - create endpoint
        serve.create_endpoint("mnist",
                              backend=backend_name,
                              route="/mnist",
                              methods=["POST"])
    else:
        # The endpoint already exists, route all traffic to the new model
        # Here you could also implement an incremental rollout, where only
        # a part of the traffic is sent to the new backend and the
        # rest is sent to the existing backends.
        serve.set_traffic("mnist", {backend_name: 1.0})

    # Delete previous existing backends
    for existing_backend in serve.list_backends():
        if existing_backend.startswith("mnist:day") and \
           existing_backend != backend_name:
            serve.delete_backend(existing_backend)

    return True
Esempio n. 3
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance)

    start = """
import ray
ray.util.connect("{}")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    serve.connect()

    deploy = """
import ray
ray.util.connect("{}")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_backends()
    assert "test1" in serve.list_endpoints()
    assert requests.get("http://localhost:8000/hello").text == "hello"

    delete = """
import ray
ray.util.connect("{}")

from ray import serve

serve.get_deployment("test1").delete()
""".format(ray_client_instance)
    run_string_as_driver(delete)

    assert "test1" not in serve.list_backends()
    assert "test1" not in serve.list_endpoints()
def main(num_replicas: Optional[int], trial_length: Optional[str],
         max_batch_size: Optional[int]):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    smoke_test = os.environ.get("IS_SMOKE_TEST", "1")
    if smoke_test == "1":
        num_replicas = num_replicas or DEFAULT_SMOKE_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(
            f"Running local / smoke test with {num_replicas} replicas ..\n")

        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(num_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes ..\n")
        serve_client = setup_local_single_node_cluster(num_nodes)
    else:
        num_replicas = num_replicas or DEFAULT_FULL_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with {num_replicas} replicas ..\n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with {num_replicas} target replicas ....\n")
    deploy_replicas(num_replicas, max_batch_size)

    logger.info("Warming up cluster ....\n")
    warm_up_one_cluster.remote(10, http_host, http_port, "echo")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_endpoints = list(serve.list_endpoints().keys())
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/single_deployment_1k_noop_replica.json")
Esempio n. 5
0
def test_list_endpoints(serve_instance):
    def f():
        pass

    serve.create_backend("backend", f)
    serve.create_backend("backend2", f)
    serve.create_backend("backend3", f)
    serve.create_endpoint("endpoint",
                          backend="backend",
                          route="/api",
                          methods=["GET", "POST"])
    serve.create_endpoint("endpoint2", backend="backend2", methods=["POST"])
    serve.shadow_traffic("endpoint", "backend3", 0.5)

    endpoints = serve.list_endpoints()
    assert "endpoint" in endpoints
    assert endpoints["endpoint"] == {
        "route": "/api",
        "methods": ["GET", "POST"],
        "traffic": {
            "backend": 1.0
        },
        "shadows": {
            "backend3": 0.5
        },
        "python_methods": [],
    }

    assert "endpoint2" in endpoints
    assert endpoints["endpoint2"] == {
        "route": None,
        "methods": ["POST"],
        "traffic": {
            "backend2": 1.0
        },
        "shadows": {},
        "python_methods": [],
    }

    serve.delete_endpoint("endpoint")
    assert "endpoint2" in serve.list_endpoints()

    serve.delete_endpoint("endpoint2")
    assert len(serve.list_endpoints()) == 0
    def fetch_resources(self):
        if not self.client:
            return {}

        try:
            configs = serve.list_endpoints().items()
        except AttributeError:
            return {}

        backend_to_endpoints = {}
        for endpoint_name, endpoint_config in configs:
            backend = list(endpoint_config["traffic"].keys())[0]
            backend_to_endpoints.setdefault(backend, []).append(endpoint_name)

        if not len(backend_to_endpoints.keys()):
            return {}

        resources = ray.get(self.counter.read.remote())

        if self.tui:
            self.tui.resources_by_endpoint = resources
        return resources
Esempio n. 7
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance, namespace="")

    start = """
import ray
ray.util.connect("{}", namespace="")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    serve.connect()

    deploy = """
import ray
ray.util.connect("{}", namespace="")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_backends()
    assert "test1" in serve.list_endpoints()
    assert requests.get("http://*****:*****@app.get("/")
def hello():
    return "hello"

@serve.deployment
@serve.ingress(app)
class A:
    pass

A.deploy()
""".format(ray_client_instance)
    run_string_as_driver(fastapi)

    assert requests.get("http://localhost:8000/A").json() == "hello"