コード例 #1
0
    def __init__(self,
                 redis_address,
                 autoscaling_config,
                 redis_password=None,
                 prefix_cluster_info=False):
        # Initialize the Redis clients.
        ray.state.state._initialize_global_state(redis_address,
                                                 redis_password=redis_password)
        self.redis = ray._private.services.create_redis_client(
            redis_address, password=redis_password)

        (ip, port) = redis_address.split(":")
        self.gcs_client = connect_to_gcs(ip, int(port), redis_password)
        # Initialize the gcs stub for getting all node resource usage.
        gcs_address = self.redis.get("GcsServerAddress").decode("utf-8")

        options = (("grpc.enable_http_proxy", 0), )
        gcs_channel = grpc.insecure_channel(gcs_address, options=options)
        self.gcs_node_resources_stub = \
            gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel)

        # Set the redis client and mode so _internal_kv works for autoscaler.
        worker = ray.worker.global_worker
        worker.redis_client = self.redis
        worker.gcs_client = self.gcs_client
        worker.mode = 0
        head_node_ip = redis_address.split(":")[0]
        self.load_metrics = LoadMetrics(local_ip=head_node_ip)
        self.last_avail_resources = None
        self.event_summarizer = EventSummarizer()
        self.prefix_cluster_info = prefix_cluster_info
        self.autoscaling_config = autoscaling_config
        self.autoscaler = None

        logger.info("Monitor: Started")
コード例 #2
0
ファイル: monitor.py プロジェクト: yncxcw/ray
    def __init__(self,
                 redis_address,
                 autoscaling_config,
                 redis_password=None,
                 prefix_cluster_info=False,
                 monitor_ip=None,
                 stop_event: Optional[Event] = None):
        # Initialize the Redis clients.
        ray.state.state._initialize_global_state(redis_address,
                                                 redis_password=redis_password)
        self.redis = ray._private.services.create_redis_client(
            redis_address, password=redis_password)
        if monitor_ip:
            self.redis.set("AutoscalerMetricsAddress",
                           f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}")
        (ip, port) = redis_address.split(":")
        self.gcs_client = connect_to_gcs(ip, int(port), redis_password)
        # Initialize the gcs stub for getting all node resource usage.
        gcs_address = self.redis.get("GcsServerAddress").decode("utf-8")

        options = (("grpc.enable_http_proxy", 0), )
        gcs_channel = grpc.insecure_channel(gcs_address, options=options)
        self.gcs_node_resources_stub = \
            gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel)

        # Set the redis client and mode so _internal_kv works for autoscaler.
        worker = ray.worker.global_worker
        worker.redis_client = self.redis
        worker.gcs_client = self.gcs_client
        worker.mode = 0
        head_node_ip = redis_address.split(":")[0]
        self.redis_address = redis_address
        self.redis_password = redis_password
        self.load_metrics = LoadMetrics(local_ip=head_node_ip)
        self.last_avail_resources = None
        self.event_summarizer = EventSummarizer()
        self.prefix_cluster_info = prefix_cluster_info
        # Can be used to signal graceful exit from monitor loop.
        self.stop_event = stop_event  # type: Optional[Event]
        self.autoscaling_config = autoscaling_config
        self.autoscaler = None

        self.prom_metrics = AutoscalerPrometheusMetrics()
        if monitor_ip:
            # If monitor_ip wasn't passed in, then don't attempt to start the
            # metric server to keep behavior identical to before metrics were
            # introduced
            try:
                logger.info(
                    "Starting autoscaler metrics server on port {}".format(
                        AUTOSCALER_METRIC_PORT))
                prometheus_client.start_http_server(
                    AUTOSCALER_METRIC_PORT,
                    registry=self.prom_metrics.registry)
            except Exception:
                logger.exception(
                    "An exception occurred while starting the metrics server.")

        logger.info("Monitor: Started")
コード例 #3
0
 def __init__(self,
              node_ip_address,
              redis_address,
              dashboard_agent_port,
              redis_password=None,
              temp_dir=None,
              session_dir=None,
              runtime_env_dir=None,
              log_dir=None,
              metrics_export_port=None,
              node_manager_port=None,
              listen_port=0,
              object_store_name=None,
              raylet_name=None,
              logging_params=None):
     """Initialize the DashboardAgent object."""
     # Public attributes are accessible for all agent modules.
     self.ip = node_ip_address
     self.redis_address = dashboard_utils.address_tuple(redis_address)
     self.redis_password = redis_password
     self.temp_dir = temp_dir
     self.session_dir = session_dir
     self.runtime_env_dir = runtime_env_dir
     self.log_dir = log_dir
     self.dashboard_agent_port = dashboard_agent_port
     self.metrics_export_port = metrics_export_port
     self.node_manager_port = node_manager_port
     self.listen_port = listen_port
     self.object_store_name = object_store_name
     self.raylet_name = raylet_name
     self.logging_params = logging_params
     self.node_id = os.environ["RAY_NODE_ID"]
     # TODO(edoakes): RAY_RAYLET_PID isn't properly set on Windows. This is
     # only used for fate-sharing with the raylet and we need a different
     # fate-sharing mechanism for Windows anyways.
     if sys.platform not in ["win32", "cygwin"]:
         self.ppid = int(os.environ["RAY_RAYLET_PID"])
         assert self.ppid > 0
         logger.info("Parent pid is %s", self.ppid)
     self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), ))
     self.grpc_port = self.server.add_insecure_port(
         f"[::]:{self.dashboard_agent_port}")
     logger.info("Dashboard agent grpc address: %s:%s", self.ip,
                 self.grpc_port)
     self.aioredis_client = None
     options = (("grpc.enable_http_proxy", 0), )
     self.aiogrpc_raylet_channel = aiogrpc.insecure_channel(
         f"{self.ip}:{self.node_manager_port}", options=options)
     self.http_session = None
     ip, port = redis_address.split(":")
     self.gcs_client = connect_to_gcs(ip, int(port), redis_password)
コード例 #4
0
 def __init__(self, http_host, http_port, http_port_retries, redis_address,
              redis_password, log_dir):
     self.health_check_thread: GCSHealthCheckThread = None
     self._gcs_rpc_error_counter = 0
     # Public attributes are accessible for all head modules.
     # Walkaround for issue: https://github.com/ray-project/ray/issues/7084
     self.http_host = "127.0.0.1" if http_host == "localhost" else http_host
     self.http_port = http_port
     self.http_port_retries = http_port_retries
     self.redis_address = dashboard_utils.address_tuple(redis_address)
     self.redis_password = redis_password
     self.log_dir = log_dir
     self.aioredis_client = None
     self.aiogrpc_gcs_channel = None
     self.http_session = None
     self.ip = ray.util.get_node_ip_address()
     ip, port = redis_address.split(":")
     self.gcs_client = connect_to_gcs(ip, int(port), redis_password)
     self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), ))
     self.grpc_port = self.server.add_insecure_port("[::]:0")
     logger.info("Dashboard head grpc address: %s:%s", self.ip,
                 self.grpc_port)
コード例 #5
0
def serve_proxier(connection_str: str,
                  redis_address: Optional[str],
                  *,
                  redis_password: Optional[str] = None,
                  session_dir: Optional[str] = None,
                  runtime_env_agent_port: int = 0):
    # Initialize internal KV to be used to upload and download working_dir
    # before calling ray.init within the RayletServicers.
    # NOTE(edoakes): redis_address and redis_password should only be None in
    # tests.
    if redis_address is not None and redis_password is not None:
        ip, port = redis_address.split(":")
        gcs_client = connect_to_gcs(ip, int(port), redis_password)
        ray.experimental.internal_kv._initialize_internal_kv(gcs_client)

    server = grpc.server(
        futures.ThreadPoolExecutor(max_workers=CLIENT_SERVER_MAX_THREADS),
        options=GRPC_OPTIONS)
    proxy_manager = ProxyManager(redis_address,
                                 session_dir=session_dir,
                                 redis_password=redis_password,
                                 runtime_env_agent_port=runtime_env_agent_port)
    task_servicer = RayletServicerProxy(None, proxy_manager)
    data_servicer = DataServicerProxy(proxy_manager)
    logs_servicer = LogstreamServicerProxy(proxy_manager)
    ray_client_pb2_grpc.add_RayletDriverServicer_to_server(
        task_servicer, server)
    ray_client_pb2_grpc.add_RayletDataStreamerServicer_to_server(
        data_servicer, server)
    ray_client_pb2_grpc.add_RayletLogStreamerServicer_to_server(
        logs_servicer, server)
    server.add_insecure_port(connection_str)
    server.start()
    return ClientServerHandle(
        task_servicer=task_servicer,
        data_servicer=data_servicer,
        logs_servicer=logs_servicer,
        grpc_server=server,
    )
コード例 #6
0
def test_gcs_client_address(ray_start_cluster_head):
    cluster = ray_start_cluster_head
    ip, port = cluster.address.split(":")
    password = ray_constants.REDIS_DEFAULT_PASSWORD
    gcs_client = connect_to_gcs(ip, int(port), password)
    run_kv_test(gcs_client)