def __init__(self, redis_address, autoscaling_config, redis_password=None, prefix_cluster_info=False): # Initialize the Redis clients. ray.state.state._initialize_global_state(redis_address, redis_password=redis_password) self.redis = ray._private.services.create_redis_client( redis_address, password=redis_password) (ip, port) = redis_address.split(":") self.gcs_client = connect_to_gcs(ip, int(port), redis_password) # Initialize the gcs stub for getting all node resource usage. gcs_address = self.redis.get("GcsServerAddress").decode("utf-8") options = (("grpc.enable_http_proxy", 0), ) gcs_channel = grpc.insecure_channel(gcs_address, options=options) self.gcs_node_resources_stub = \ gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel) # Set the redis client and mode so _internal_kv works for autoscaler. worker = ray.worker.global_worker worker.redis_client = self.redis worker.gcs_client = self.gcs_client worker.mode = 0 head_node_ip = redis_address.split(":")[0] self.load_metrics = LoadMetrics(local_ip=head_node_ip) self.last_avail_resources = None self.event_summarizer = EventSummarizer() self.prefix_cluster_info = prefix_cluster_info self.autoscaling_config = autoscaling_config self.autoscaler = None logger.info("Monitor: Started")
def __init__(self, redis_address, autoscaling_config, redis_password=None, prefix_cluster_info=False, monitor_ip=None, stop_event: Optional[Event] = None): # Initialize the Redis clients. ray.state.state._initialize_global_state(redis_address, redis_password=redis_password) self.redis = ray._private.services.create_redis_client( redis_address, password=redis_password) if monitor_ip: self.redis.set("AutoscalerMetricsAddress", f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}") (ip, port) = redis_address.split(":") self.gcs_client = connect_to_gcs(ip, int(port), redis_password) # Initialize the gcs stub for getting all node resource usage. gcs_address = self.redis.get("GcsServerAddress").decode("utf-8") options = (("grpc.enable_http_proxy", 0), ) gcs_channel = grpc.insecure_channel(gcs_address, options=options) self.gcs_node_resources_stub = \ gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel) # Set the redis client and mode so _internal_kv works for autoscaler. worker = ray.worker.global_worker worker.redis_client = self.redis worker.gcs_client = self.gcs_client worker.mode = 0 head_node_ip = redis_address.split(":")[0] self.redis_address = redis_address self.redis_password = redis_password self.load_metrics = LoadMetrics(local_ip=head_node_ip) self.last_avail_resources = None self.event_summarizer = EventSummarizer() self.prefix_cluster_info = prefix_cluster_info # Can be used to signal graceful exit from monitor loop. self.stop_event = stop_event # type: Optional[Event] self.autoscaling_config = autoscaling_config self.autoscaler = None self.prom_metrics = AutoscalerPrometheusMetrics() if monitor_ip: # If monitor_ip wasn't passed in, then don't attempt to start the # metric server to keep behavior identical to before metrics were # introduced try: logger.info( "Starting autoscaler metrics server on port {}".format( AUTOSCALER_METRIC_PORT)) prometheus_client.start_http_server( AUTOSCALER_METRIC_PORT, registry=self.prom_metrics.registry) except Exception: logger.exception( "An exception occurred while starting the metrics server.") logger.info("Monitor: Started")
def __init__(self, node_ip_address, redis_address, dashboard_agent_port, redis_password=None, temp_dir=None, session_dir=None, runtime_env_dir=None, log_dir=None, metrics_export_port=None, node_manager_port=None, listen_port=0, object_store_name=None, raylet_name=None, logging_params=None): """Initialize the DashboardAgent object.""" # Public attributes are accessible for all agent modules. self.ip = node_ip_address self.redis_address = dashboard_utils.address_tuple(redis_address) self.redis_password = redis_password self.temp_dir = temp_dir self.session_dir = session_dir self.runtime_env_dir = runtime_env_dir self.log_dir = log_dir self.dashboard_agent_port = dashboard_agent_port self.metrics_export_port = metrics_export_port self.node_manager_port = node_manager_port self.listen_port = listen_port self.object_store_name = object_store_name self.raylet_name = raylet_name self.logging_params = logging_params self.node_id = os.environ["RAY_NODE_ID"] # TODO(edoakes): RAY_RAYLET_PID isn't properly set on Windows. This is # only used for fate-sharing with the raylet and we need a different # fate-sharing mechanism for Windows anyways. if sys.platform not in ["win32", "cygwin"]: self.ppid = int(os.environ["RAY_RAYLET_PID"]) assert self.ppid > 0 logger.info("Parent pid is %s", self.ppid) self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), )) self.grpc_port = self.server.add_insecure_port( f"[::]:{self.dashboard_agent_port}") logger.info("Dashboard agent grpc address: %s:%s", self.ip, self.grpc_port) self.aioredis_client = None options = (("grpc.enable_http_proxy", 0), ) self.aiogrpc_raylet_channel = aiogrpc.insecure_channel( f"{self.ip}:{self.node_manager_port}", options=options) self.http_session = None ip, port = redis_address.split(":") self.gcs_client = connect_to_gcs(ip, int(port), redis_password)
def __init__(self, http_host, http_port, http_port_retries, redis_address, redis_password, log_dir): self.health_check_thread: GCSHealthCheckThread = None self._gcs_rpc_error_counter = 0 # Public attributes are accessible for all head modules. # Walkaround for issue: https://github.com/ray-project/ray/issues/7084 self.http_host = "127.0.0.1" if http_host == "localhost" else http_host self.http_port = http_port self.http_port_retries = http_port_retries self.redis_address = dashboard_utils.address_tuple(redis_address) self.redis_password = redis_password self.log_dir = log_dir self.aioredis_client = None self.aiogrpc_gcs_channel = None self.http_session = None self.ip = ray.util.get_node_ip_address() ip, port = redis_address.split(":") self.gcs_client = connect_to_gcs(ip, int(port), redis_password) self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), )) self.grpc_port = self.server.add_insecure_port("[::]:0") logger.info("Dashboard head grpc address: %s:%s", self.ip, self.grpc_port)
def serve_proxier(connection_str: str, redis_address: Optional[str], *, redis_password: Optional[str] = None, session_dir: Optional[str] = None, runtime_env_agent_port: int = 0): # Initialize internal KV to be used to upload and download working_dir # before calling ray.init within the RayletServicers. # NOTE(edoakes): redis_address and redis_password should only be None in # tests. if redis_address is not None and redis_password is not None: ip, port = redis_address.split(":") gcs_client = connect_to_gcs(ip, int(port), redis_password) ray.experimental.internal_kv._initialize_internal_kv(gcs_client) server = grpc.server( futures.ThreadPoolExecutor(max_workers=CLIENT_SERVER_MAX_THREADS), options=GRPC_OPTIONS) proxy_manager = ProxyManager(redis_address, session_dir=session_dir, redis_password=redis_password, runtime_env_agent_port=runtime_env_agent_port) task_servicer = RayletServicerProxy(None, proxy_manager) data_servicer = DataServicerProxy(proxy_manager) logs_servicer = LogstreamServicerProxy(proxy_manager) ray_client_pb2_grpc.add_RayletDriverServicer_to_server( task_servicer, server) ray_client_pb2_grpc.add_RayletDataStreamerServicer_to_server( data_servicer, server) ray_client_pb2_grpc.add_RayletLogStreamerServicer_to_server( logs_servicer, server) server.add_insecure_port(connection_str) server.start() return ClientServerHandle( task_servicer=task_servicer, data_servicer=data_servicer, logs_servicer=logs_servicer, grpc_server=server, )
def test_gcs_client_address(ray_start_cluster_head): cluster = ray_start_cluster_head ip, port = cluster.address.split(":") password = ray_constants.REDIS_DEFAULT_PASSWORD gcs_client = connect_to_gcs(ip, int(port), password) run_kv_test(gcs_client)