def test_startup_error_yields_clean_result(shutdown_only): """ Check that an error while preparing the environment yields an actionable, clear error on the *client side*. """ ray_instance = ray.init() server = proxier.serve_proxier("localhost:25030", ray_instance["redis_address"], session_dir=ray_instance["session_dir"]) def raise_not_rewrite(input: JobConfig): raise RuntimeError("WEIRD_ERROR") with patch.object(proxier, "ray_client_server_env_prep", raise_not_rewrite): run_string_as_driver(get_error) server.stop(0)
def test_delay_in_rewriting_environment(shutdown_only): """ Check that a delay in `ray_client_server_env_prep` does not break a Client connecting. """ proxier.LOGSTREAM_RETRIES = 3 proxier.LOGSTREAM_RETRY_INTERVAL_SEC = 1 ray_instance = ray.init() server = proxier.serve_proxier("localhost:25010", ray_instance["redis_address"], session_dir=ray_instance["session_dir"]) def delay_in_rewrite(_input: JobConfig): time.sleep(6) return _input with patch.object(proxier, "ray_client_server_env_prep", delay_in_rewrite): run_string_as_driver(check_connection) server.stop(0)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0", help="Host IP to bind to") parser.add_argument("-p", "--port", type=int, default=10001, help="Port to bind to") parser.add_argument( "--mode", type=str, choices=["proxy", "legacy", "specific-server"], default="proxy", ) parser.add_argument("--address", required=False, type=str, help="Address to use to connect to Ray") parser.add_argument( "--redis-password", required=False, type=str, help="Password for connecting to Redis", ) parser.add_argument( "--metrics-agent-port", required=False, type=int, default=0, help="The port to use for connecting to the runtime_env agent.", ) args, _ = parser.parse_known_args() setup_logger(ray_constants.LOGGER_LEVEL, ray_constants.LOGGER_FORMAT) ray_connect_handler = create_ray_handler(args.address, args.redis_password) hostport = "%s:%d" % (args.host, args.port) logger.info(f"Starting Ray Client server on {hostport}") if args.mode == "proxy": server = serve_proxier( hostport, args.address, redis_password=args.redis_password, runtime_env_agent_port=args.metrics_agent_port, ) else: server = serve(hostport, ray_connect_handler) try: idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S while True: health_report = { "time": time.time(), } try: if not ray.experimental.internal_kv._internal_kv_initialized(): gcs_client = try_create_gcs_client(args.address, args.redis_password) ray.experimental.internal_kv._initialize_internal_kv( gcs_client) ray.experimental.internal_kv._internal_kv_put( "ray_client_server", json.dumps(health_report), namespace=ray_constants.KV_NAMESPACE_HEALTHCHECK, ) except Exception as e: logger.error(f"[{args.mode}] Failed to put health check " f"on {args.address}") logger.exception(e) time.sleep(1) if args.mode == "specific-server": if server.data_servicer.num_clients > 0: idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S else: idle_checks_remaining -= 1 if idle_checks_remaining == 0: raise KeyboardInterrupt() if (idle_checks_remaining % 5 == 0 and idle_checks_remaining != TIMEOUT_FOR_SPECIFIC_SERVER_S): logger.info( f"{idle_checks_remaining} idle checks before shutdown." ) except KeyboardInterrupt: server.stop(0)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0", help="Host IP to bind to") parser.add_argument("-p", "--port", type=int, default=10001, help="Port to bind to") parser.add_argument("--mode", type=str, choices=["proxy", "legacy", "specific-server"], default="proxy") parser.add_argument("--redis-address", required=False, type=str, help="Address to use to connect to Ray") parser.add_argument("--redis-password", required=False, type=str, help="Password for connecting to Redis") parser.add_argument( "--worker-shim-pid", required=False, type=int, default=0, help="The PID of the process for setup worker runtime env.") args = parser.parse_args() logging.basicConfig(level="INFO") # This redis client is used for health checking. We can't use `internal_kv` # because it requires `ray.init` to be called, which only connect handlers # should do. redis_client = None ray_connect_handler = create_ray_handler(args.redis_address, args.redis_password) hostport = "%s:%d" % (args.host, args.port) logger.info(f"Starting Ray Client server on {hostport}") if args.mode == "proxy": server = serve_proxier(hostport, args.redis_address, redis_password=args.redis_password) else: server = serve(hostport, ray_connect_handler) try: idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S while True: health_report = { "time": time.time(), } try: if not redis_client: redis_client = try_create_redis_client( args.redis_address, args.redis_password) redis_client.hset("healthcheck:ray_client_server", "value", json.dumps(health_report)) except Exception as e: logger.error(f"[{args.mode}] Failed to put health check " f"on {args.redis_address}") logger.exception(e) time.sleep(1) if args.mode == "specific-server": if server.data_servicer.num_clients > 0: idle_checks_remaining = TIMEOUT_FOR_SPECIFIC_SERVER_S else: idle_checks_remaining -= 1 if idle_checks_remaining == 0: raise KeyboardInterrupt() if (idle_checks_remaining % 5 == 0 and idle_checks_remaining != TIMEOUT_FOR_SPECIFIC_SERVER_S): logger.info( f"{idle_checks_remaining} idle checks before shutdown." ) except KeyboardInterrupt: server.stop(0)