async def main():
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        setup_local_single_node_cluster(1)
    else:
        setup_anyscale_cluster()

    result_json = await benchmark_main()
    logger.info(result_json)
    save_test_results(result_json, default_output_file="/tmp/micro_benchmark.json")
def main(
    min_replicas: Optional[int],
    max_replicas: Optional[int],
    num_deployments: Optional[int],
    trial_length: Optional[str],
):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        min_replicas = min_replicas or DEFAULT_SMOKE_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_SMOKE_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_SMOKE_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(f"Running smoke test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(max_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes .. \n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        min_replicas = min_replicas or DEFAULT_FULL_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_FULL_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_FULL_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with min {min_replicas} and max {max_replicas}"
                f"target replicas ....\n")
    setup_multi_deployment_replicas(min_replicas, max_replicas,
                                    num_deployments)

    logger.info("Warming up cluster ....\n")
    endpoint_refs = []
    all_endpoints = list(serve.list_deployments().keys())
    for endpoint in all_endpoints:
        endpoint_refs.append(
            warm_up_one_cluster.options(num_cpus=0).remote(
                10, http_host, http_port, endpoint))
    for endpoint in ray.get(endpoint_refs):
        logger.info(f"Finished warming up {endpoint}")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/autoscaling_multi_deployment.json")
def main():
    # Setup local cluster, note this cluster setup is the same for both
    # local and product ray cluster env.
    # Each test uses different ray namespace, thus kv storage key for each
    # checkpoint is different to avoid collision.
    namespace = uuid.uuid4().hex

    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        path = Path("checkpoint.db")
        checkpoint_path = f"file://{path}"
        if path.exists():
            path.unlink()
    else:
        checkpoint_path = (
            "s3://serve-nightly-tests/fault-tolerant-test-checkpoint"  # noqa: E501
        )

    _, cluster = setup_local_single_node_cluster(
        1, checkpoint_path=checkpoint_path, namespace=namespace)

    # Deploy for the first time
    @serve.deployment(num_replicas=DEFAULT_NUM_REPLICAS)
    def hello():
        return serve.get_replica_context().deployment

    for name in ["hello", "world"]:
        hello.options(name=name).deploy()

        for _ in range(5):
            response = request_with_retries(f"/{name}/", timeout=3)
            assert response.text == name

    logger.info("Initial deployment successful with working endpoint.")

    # Kill current cluster, recover from remote checkpoint and ensure endpoint
    # is still available with expected results

    ray.kill(serve.context._global_client._controller, no_restart=True)
    ray.shutdown()
    cluster.shutdown()
    serve.context.set_global_client(None)

    # Start another ray cluster with same namespace to resume from previous
    # checkpoints with no new deploy() call.
    setup_local_single_node_cluster(1,
                                    checkpoint_path=checkpoint_path,
                                    namespace=namespace)

    for name in ["hello", "world"]:
        for _ in range(5):
            response = request_with_retries(f"/{name}/", timeout=3)
            assert response.text == name

    logger.info("Deployment recovery from s3 checkpoint is successful "
                "with working endpoint.")

    # Delete dangling checkpoints. If script failed before this step, it's up
    # to the TTL policy on s3 to clean up, but won't lead to collision with
    # subsequent tests since each test run in different uuid namespace.
    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()

    # Checkpoints in S3 bucket are moved after 7 days with explicit lifecycle
    # rules. Each checkpoint is ~260 Bytes in size from this test.

    # Save results
    save_test_results(
        {"result": "success"},
        default_output_file="/tmp/serve_cluster_fault_tolerance.json",
    )
def main(
    num_replicas: Optional[int],
    trial_length: Optional[str],
    max_batch_size: Optional[int],
):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        num_replicas = num_replicas or DEFAULT_SMOKE_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(
            f"Running local / smoke test with {num_replicas} replicas ..\n")

        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(num_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes ..\n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        num_replicas = num_replicas or DEFAULT_FULL_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with {num_replicas} replicas ..\n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with {num_replicas} target replicas ....\n")
    all_endpoints = deploy_replicas(num_replicas, max_batch_size)

    logger.info("Warming up cluster ...\n")
    run_wrk_on_all_nodes(
        DEFAULT_SMOKE_TEST_TRIAL_LENGTH,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints,
        ignore_output=True,
    )

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/single_deployment_1k_noop_replica.json",
    )
Exemple #5
0
def main():
    # Setup local cluster, note this cluster setup is the same for both
    # local and product ray cluster env.
    # Each test uses different ray namespace, thus kv storage key for each
    # checkpoint is different to avoid collision.
    namespace = uuid.uuid4().hex

    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        checkpoint_path = "file://checkpoint.db"
    else:
        checkpoint_path = (
            "gs://kazi_test/test/fault-tolerant-test-checkpoint"  # noqa: E501
        )

    _, cluster = setup_local_single_node_cluster(
        1, checkpoint_path=checkpoint_path, namespace=namespace
    )

    # Deploy for the first time
    @serve.deployment(name="echo", num_replicas=DEFAULT_NUM_REPLICAS)
    class Echo:
        def __init__(self):
            return True

        def __call__(self, request):
            return "hii"

    Echo.deploy()

    # Ensure endpoint is working
    for _ in range(5):
        response = request_with_retries("/echo/", timeout=3)
        assert response.text == "hii"

    logger.info("Initial deployment successful with working endpoint.")

    # Kill current cluster, recover from remote checkpoint and ensure endpoint
    # is still available with expected results

    ray.kill(serve.context._global_client._controller, no_restart=True)
    ray.shutdown()
    cluster.shutdown()
    serve.context.set_global_client(None)

    # Start another ray cluster with same namespace to resume from previous
    # checkpoints with no new deploy() call.
    setup_local_single_node_cluster(
        1, checkpoint_path=checkpoint_path, namespace=namespace
    )

    for _ in range(5):
        response = request_with_retries("/echo/", timeout=3)
        assert response.text == "hii"

    logger.info(
        "Deployment recovery from Google Cloud Storage checkpoint "
        "is successful with working endpoint."
    )

    # Delete dangling checkpoints. If script failed before this step, it's up
    # to the TTL policy on GCS to clean up, but won't lead to collision with
    # subsequent tests since each test run in different uuid namespace.
    serve.shutdown()
    ray.shutdown()
    cluster.shutdown()

    # Checkpoints in GCS bucket are moved after 7 days with explicit lifecycle
    # rules. Each checkpoint is ~260 Bytes in size from this test.

    # Save results
    save_test_results(
        {"result": "success"},
        default_output_file="/tmp/serve_cluster_fault_tolerance.json",
    )