Ejemplos de setup_anyscale_cluster en Python, ejemplos de serve_test_cluster_utils.setup_anyscale_cluster en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: serve_micro_benchmark.py Proyecto: vishalbelsare/ray

async def main():
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        setup_local_single_node_cluster(1)
    else:
        setup_anyscale_cluster()

    result_json = await benchmark_main()
    logger.info(result_json)
    save_test_results(result_json, default_output_file="/tmp/micro_benchmark.json")

Ejemplo n.º 2

0

Mostrar archivo

def main(num_replicas: Optional[int], trial_length: Optional[str],
         max_batch_size: Optional[int]):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    smoke_test = os.environ.get("IS_SMOKE_TEST", "1")
    if smoke_test == "1":
        num_replicas = num_replicas or DEFAULT_SMOKE_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(
            f"Running local / smoke test with {num_replicas} replicas ..\n")

        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(num_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes ..\n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        num_replicas = num_replicas or DEFAULT_FULL_TEST_NUM_REPLICA
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with {num_replicas} replicas ..\n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with {num_replicas} target replicas ....\n")
    deploy_replicas(num_replicas, max_batch_size)

    logger.info("Warming up cluster ....\n")
    warm_up_one_cluster.remote(10, http_host, http_port, "echo")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_endpoints = list(serve.list_deployments().keys())
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/single_deployment_1k_noop_replica.json")

Ejemplo n.º 3

0

Mostrar archivo

Archivo: autoscaling_multi_deployment.py Proyecto: tchordia/ray

def main(
    min_replicas: Optional[int],
    max_replicas: Optional[int],
    num_deployments: Optional[int],
    trial_length: Optional[str],
):
    # Give default cluster parameter values based on smoke_test config
    # if user provided values explicitly, use them instead.
    # IS_SMOKE_TEST is set by args of releaser's e2e.py
    if is_smoke_test():
        min_replicas = min_replicas or DEFAULT_SMOKE_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_SMOKE_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_SMOKE_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_SMOKE_TEST_TRIAL_LENGTH
        logger.info(f"Running smoke test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        # Choose cluster setup based on user config. Local test uses Cluster()
        # to mock actors that requires # of nodes to be specified, but ray
        # client doesn't need to
        num_nodes = int(math.ceil(max_replicas / NUM_CPU_PER_NODE))
        logger.info(
            f"Setting up local ray cluster with {num_nodes} nodes .. \n")
        serve_client = setup_local_single_node_cluster(num_nodes)[0]
    else:
        min_replicas = min_replicas or DEFAULT_FULL_TEST_MIN_NUM_REPLICA
        max_replicas = max_replicas or DEFAULT_FULL_TEST_MAX_NUM_REPLICA
        num_deployments = num_deployments or DEFAULT_FULL_TEST_NUM_DEPLOYMENTS
        trial_length = trial_length or DEFAULT_FULL_TEST_TRIAL_LENGTH
        logger.info(f"Running full test with min {min_replicas} and max "
                    f"{max_replicas} replicas, {num_deployments} deployments "
                    f".. \n")
        logger.info("Setting up anyscale ray cluster .. \n")
        serve_client = setup_anyscale_cluster()

    http_host = str(serve_client._http_config.host)
    http_port = str(serve_client._http_config.port)
    logger.info(f"Ray serve http_host: {http_host}, http_port: {http_port}")

    logger.info(f"Deploying with min {min_replicas} and max {max_replicas}"
                f"target replicas ....\n")
    setup_multi_deployment_replicas(min_replicas, max_replicas,
                                    num_deployments)

    logger.info("Warming up cluster ....\n")
    endpoint_refs = []
    all_endpoints = list(serve.list_deployments().keys())
    for endpoint in all_endpoints:
        endpoint_refs.append(
            warm_up_one_cluster.options(num_cpus=0).remote(
                10, http_host, http_port, endpoint))
    for endpoint in ray.get(endpoint_refs):
        logger.info(f"Finished warming up {endpoint}")

    logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n")
    # For detailed discussion, see https://github.com/wg/wrk/issues/205
    # TODO:(jiaodong) What's the best number to use here ?
    all_metrics, all_wrk_stdout = run_wrk_on_all_nodes(
        trial_length,
        NUM_CONNECTIONS,
        http_host,
        http_port,
        all_endpoints=all_endpoints)

    aggregated_metrics = aggregate_all_metrics(all_metrics)
    logger.info("Wrk stdout on each node: ")
    for wrk_stdout in all_wrk_stdout:
        logger.info(wrk_stdout)
    logger.info("Final aggregated metrics: ")
    for key, val in aggregated_metrics.items():
        logger.info(f"{key}: {val}")
    save_test_results(
        aggregated_metrics,
        default_output_file="/tmp/autoscaling_multi_deployment.json")

Ejemplo n.º 4

0

Mostrar archivo

Archivo: deployment_graph_wide_ensemble.py Proyecto: tchordia/ray

def main(
    fanout_degree: Optional[int],
    init_delay_secs: Optional[int],
    compute_delay_secs: Optional[int],
    num_requests_per_client: Optional[int],
    num_clients: Optional[int],
    throughput_trial_duration_secs: Optional[int],
    local_test: Optional[bool],
):
    if local_test:
        setup_local_single_node_cluster(1, num_cpu_per_node=8)
    else:
        setup_anyscale_cluster()

    serve_dag = test_wide_fanout_deployment_graph(
        fanout_degree,
        init_delay_secs=init_delay_secs,
        compute_delay_secs=compute_delay_secs,
    )
    dag_handle = serve.run(serve_dag)

    # 0 + 1 + 2 + 3 + 4 + ... + (fanout_degree - 1)
    expected = ((0 + fanout_degree - 1) * fanout_degree) / 2
    assert ray.get(dag_handle.predict.remote(0)) == expected
    loop = asyncio.get_event_loop()

    throughput_mean_tps, throughput_std_tps = loop.run_until_complete(
        benchmark_throughput_tps(
            dag_handle,
            expected,
            duration_secs=throughput_trial_duration_secs,
            num_clients=num_clients,
        ))
    latency_mean_ms, latency_std_ms = loop.run_until_complete(
        benchmark_latency_ms(
            dag_handle,
            expected,
            num_requests=num_requests_per_client,
            num_clients=num_clients,
        ))
    print(f"fanout_degree: {fanout_degree}, num_clients: {num_clients}")
    print(f"latency_mean_ms: {latency_mean_ms}, "
          f"latency_std_ms: {latency_std_ms}")
    print(f"throughput_mean_tps: {throughput_mean_tps}, "
          f"throughput_std_tps: {throughput_std_tps}")

    results = {
        "fanout_degree": fanout_degree,
        "init_delay_secs": init_delay_secs,
        "compute_delay_secs": compute_delay_secs,
        "local_test": local_test,
    }
    results["perf_metrics"] = [
        {
            "perf_metric_name": "throughput_mean_tps",
            "perf_metric_value": throughput_mean_tps,
            "perf_metric_type": "THROUGHPUT",
        },
        {
            "perf_metric_name": "throughput_std_tps",
            "perf_metric_value": throughput_std_tps,
            "perf_metric_type": "THROUGHPUT",
        },
        {
            "perf_metric_name": "latency_mean_ms",
            "perf_metric_value": latency_mean_ms,
            "perf_metric_type": "LATENCY",
        },
        {
            "perf_metric_name": "latency_std_ms",
            "perf_metric_value": latency_std_ms,
            "perf_metric_type": "LATENCY",
        },
    ]
    save_test_results(results)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: deployment_graph_long_chain.py Proyecto: tchordia/ray

def main(
    chain_length: Optional[int],
    init_delay_secs: Optional[int],
    compute_delay_secs: Optional[int],
    num_requests_per_client: Optional[int],
    num_clients: Optional[int],
    throughput_trial_duration_secs: Optional[int],
    local_test: Optional[bool],
):
    if local_test:
        setup_local_single_node_cluster(1, num_cpu_per_node=8)
    else:
        setup_anyscale_cluster()

    serve_dag = test_long_chain_deployment_graph(
        chain_length,
        init_delay_secs=init_delay_secs,
        compute_delay_secs=compute_delay_secs,
    )
    dag_handle = serve.run(serve_dag)
    assert ray.get(dag_handle.predict.remote(0)) == chain_length
    loop = asyncio.get_event_loop()

    throughput_mean_tps, throughput_std_tps = loop.run_until_complete(
        benchmark_throughput_tps(
            dag_handle,
            chain_length,
            duration_secs=throughput_trial_duration_secs,
            num_clients=num_clients,
        )
    )
    latency_mean_ms, latency_std_ms = loop.run_until_complete(
        benchmark_latency_ms(
            dag_handle,
            chain_length,
            num_requests=num_requests_per_client,
            num_clients=num_clients,
        )
    )
    print(f"chain_length: {chain_length}, num_clients: {num_clients}")
    print(f"latency_mean_ms: {latency_mean_ms}, " f"latency_std_ms: {latency_std_ms}")
    print(
        f"throughput_mean_tps: {throughput_mean_tps}, "
        f"throughput_std_tps: {throughput_std_tps}"
    )

    results = {
        "chain_length": chain_length,
        "init_delay_secs": init_delay_secs,
        "compute_delay_secs": compute_delay_secs,
        "local_test": local_test,
    }
    results["perf_metrics"] = [
        {
            "perf_metric_name": "throughput_mean_tps",
            "perf_metric_value": throughput_mean_tps,
            "perf_metric_type": "THROUGHPUT",
        },
        {
            "perf_metric_name": "throughput_std_tps",
            "perf_metric_value": throughput_std_tps,
            "perf_metric_type": "THROUGHPUT",
        },
        {
            "perf_metric_name": "latency_mean_ms",
            "perf_metric_value": latency_mean_ms,
            "perf_metric_type": "LATENCY",
        },
        {
            "perf_metric_name": "latency_std_ms",
            "perf_metric_value": latency_std_ms,
            "perf_metric_type": "LATENCY",
        },
    ]
    save_test_results(results)