예제 #1
0
def test_startup_error_yields_clean_result(shutdown_only):
    """
    Check that an error while preparing the environment yields an actionable,
    clear error on the *client side*.
    """
    ray_instance = ray.init()
    server = proxier.serve_proxier("localhost:25030",
                                   ray_instance["redis_address"],
                                   session_dir=ray_instance["session_dir"])

    def raise_not_rewrite(input: JobConfig):
        raise RuntimeError("WEIRD_ERROR")

    with patch.object(proxier, "ray_client_server_env_prep",
                      raise_not_rewrite):
        run_string_as_driver(get_error)

    server.stop(0)
예제 #2
0
def test_delay_in_rewriting_environment(shutdown_only):
    """
    Check that a delay in `ray_client_server_env_prep` does not break
    a Client connecting.
    """
    proxier.LOGSTREAM_RETRIES = 3
    proxier.LOGSTREAM_RETRY_INTERVAL_SEC = 1
    ray_instance = ray.init()
    server = proxier.serve_proxier("localhost:25010",
                                   ray_instance["redis_address"],
                                   session_dir=ray_instance["session_dir"])

    def delay_in_rewrite(_input: JobConfig):
        time.sleep(6)
        return _input

    with patch.object(proxier, "ray_client_server_env_prep", delay_in_rewrite):
        run_string_as_driver(check_connection)
    server.stop(0)
예제 #3
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--host",
                        type=str,
                        default="0.0.0.0",
                        help="Host IP to bind to")
    parser.add_argument("-p",
                        "--port",
                        type=int,
                        default=10001,
                        help="Port to bind to")
    parser.add_argument(
        "--mode",
        type=str,
        choices=["proxy", "legacy", "specific-server"],
        default="proxy",
    )
    parser.add_argument("--address",
                        required=False,
                        type=str,
                        help="Address to use to connect to Ray")
    parser.add_argument(
        "--redis-password",
        required=False,
        type=str,
        help="Password for connecting to Redis",
    )
    parser.add_argument(
        "--metrics-agent-port",
        required=False,
        type=int,
        default=0,
        help="The port to use for connecting to the runtime_env agent.",
    )
    args, _ = parser.parse_known_args()
    setup_logger(ray_constants.LOGGER_LEVEL, ray_constants.LOGGER_FORMAT)

    ray_connect_handler = create_ray_handler(args.address, args.redis_password)

    hostport = "%s:%d" % (args.host, args.port)
    logger.info(f"Starting Ray Client server on {hostport}")
    if args.mode == "proxy":
        server = serve_proxier(
            hostport,
            args.address,
            redis_password=args.redis_password,
            runtime_env_agent_port=args.metrics_agent_port,
        )
    else:
        server = serve(hostport, ray_connect_handler)

    try:
        idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S
        while True:
            health_report = {
                "time": time.time(),
            }

            try:
                if not ray.experimental.internal_kv._internal_kv_initialized():
                    gcs_client = try_create_gcs_client(args.address,
                                                       args.redis_password)
                    ray.experimental.internal_kv._initialize_internal_kv(
                        gcs_client)
                ray.experimental.internal_kv._internal_kv_put(
                    "ray_client_server",
                    json.dumps(health_report),
                    namespace=ray_constants.KV_NAMESPACE_HEALTHCHECK,
                )
            except Exception as e:
                logger.error(f"[{args.mode}] Failed to put health check "
                             f"on {args.address}")
                logger.exception(e)

            time.sleep(1)
            if args.mode == "specific-server":
                if server.data_servicer.num_clients > 0:
                    idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S
                else:
                    idle_checks_remaining -= 1
                if idle_checks_remaining == 0:
                    raise KeyboardInterrupt()
                if (idle_checks_remaining % 5 == 0 and idle_checks_remaining !=
                        TIMEOUT_FOR_SPECIFIC_SERVER_S):
                    logger.info(
                        f"{idle_checks_remaining} idle checks before shutdown."
                    )

    except KeyboardInterrupt:
        server.stop(0)
예제 #4
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--host",
                        type=str,
                        default="0.0.0.0",
                        help="Host IP to bind to")
    parser.add_argument("-p",
                        "--port",
                        type=int,
                        default=10001,
                        help="Port to bind to")
    parser.add_argument("--mode",
                        type=str,
                        choices=["proxy", "legacy", "specific-server"],
                        default="proxy")
    parser.add_argument("--redis-address",
                        required=False,
                        type=str,
                        help="Address to use to connect to Ray")
    parser.add_argument("--redis-password",
                        required=False,
                        type=str,
                        help="Password for connecting to Redis")
    parser.add_argument(
        "--worker-shim-pid",
        required=False,
        type=int,
        default=0,
        help="The PID of the process for setup worker runtime env.")
    args = parser.parse_args()
    logging.basicConfig(level="INFO")

    # This redis client is used for health checking. We can't use `internal_kv`
    # because it requires `ray.init` to be called, which only connect handlers
    # should do.
    redis_client = None

    ray_connect_handler = create_ray_handler(args.redis_address,
                                             args.redis_password)

    hostport = "%s:%d" % (args.host, args.port)
    logger.info(f"Starting Ray Client server on {hostport}")
    if args.mode == "proxy":
        server = serve_proxier(hostport,
                               args.redis_address,
                               redis_password=args.redis_password)
    else:
        server = serve(hostport, ray_connect_handler)

    try:
        idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S
        while True:
            health_report = {
                "time": time.time(),
            }

            try:
                if not redis_client:
                    redis_client = try_create_redis_client(
                        args.redis_address, args.redis_password)
                redis_client.hset("healthcheck:ray_client_server", "value",
                                  json.dumps(health_report))
            except Exception as e:
                logger.error(f"[{args.mode}] Failed to put health check "
                             f"on {args.redis_address}")
                logger.exception(e)

            time.sleep(1)
            if args.mode == "specific-server":
                if server.data_servicer.num_clients > 0:
                    idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S
                else:
                    idle_checks_remaining -= 1
                if idle_checks_remaining == 0:
                    raise KeyboardInterrupt()
                if (idle_checks_remaining % 5 == 0 and idle_checks_remaining !=
                        TIMEOUT_FOR_SPECIFIC_SERVER_S):
                    logger.info(
                        f"{idle_checks_remaining} idle checks before shutdown."
                    )

    except KeyboardInterrupt:
        server.stop(0)