async def run(self, server): reporter_pb2_grpc.add_ReporterServiceServicer_to_server(self, server) gcs_addr = self._dashboard_agent.gcs_address assert gcs_addr is not None publisher = GcsAioPublisher(address=gcs_addr) await self._perform_iteration(publisher)
async def test_aio_publish_and_subscribe_logs(ray_start_regular): address_info = ray_start_regular redis = ray._private.services.create_redis_client( address_info["redis_address"], password=ray.ray_constants.REDIS_DEFAULT_PASSWORD) gcs_server_addr = gcs_utils.get_gcs_address_from_redis(redis) subscriber = GcsAioSubscriber(address=gcs_server_addr) await subscriber.subscribe_logs() publisher = GcsAioPublisher(address=gcs_server_addr) log_batch = { "ip": "127.0.0.1", "pid": "gcs", "job": "0001", "is_err": False, "lines": ["line 1", "line 2"], "actor_name": "test actor", "task_name": "test task", } await publisher.publish_logs(log_batch) assert await subscriber.poll_logs() == log_batch await subscriber.close()
async def run(self, server): reporter_pb2_grpc.add_ReporterServiceServicer_to_server(self, server) if gcs_pubsub_enabled(): gcs_addr = self._dashboard_agent.gcs_address if gcs_addr is None: aioredis_client = await aioredis.create_redis_pool( address=self._dashboard_agent.redis_address, password=self._dashboard_agent.redis_password, ) gcs_addr = await aioredis_client.get("GcsServerAddress") gcs_addr = gcs_addr.decode() publisher = GcsAioPublisher(address=gcs_addr) async def publish(key: str, data: str): await publisher.publish_resource_usage(key, data) else: aioredis_client = await aioredis.create_redis_pool( address=self._dashboard_agent.redis_address, password=self._dashboard_agent.redis_password, ) async def publish(key: str, data: str): await aioredis_client.publish(key, data) await self._perform_iteration(publish)
async def test_aio_publish_and_subscribe_resource_usage(ray_start_regular): address_info = ray_start_regular gcs_server_addr = address_info["gcs_address"] subscriber = GcsAioResourceUsageSubscriber(address=gcs_server_addr) await subscriber.subscribe() publisher = GcsAioPublisher(address=gcs_server_addr) await publisher.publish_resource_usage("aaa_id", '{"cpu": 1}') await publisher.publish_resource_usage("bbb_id", '{"cpu": 2}') assert await subscriber.poll() == ("aaa_id", '{"cpu": 1}') assert await subscriber.poll() == ("bbb_id", '{"cpu": 2}') await subscriber.close()
async def test_aio_publish_and_subscribe_error_info(ray_start_regular): address_info = ray_start_regular gcs_server_addr = address_info["gcs_address"] subscriber = GcsAioErrorSubscriber(address=gcs_server_addr) await subscriber.subscribe() publisher = GcsAioPublisher(address=gcs_server_addr) err1 = ErrorTableData(error_message="test error message 1") err2 = ErrorTableData(error_message="test error message 2") await publisher.publish_error(b"aaa_id", err1) await publisher.publish_error(b"bbb_id", err2) assert await subscriber.poll() == (b"aaa_id", err1) assert await subscriber.poll() == (b"bbb_id", err2) await subscriber.close()
async def test_aio_publish_and_subscribe_error_info(ray_start_regular): address_info = ray_start_regular redis = ray._private.services.create_redis_client( address_info["redis_address"], password=ray.ray_constants.REDIS_DEFAULT_PASSWORD) gcs_server_addr = gcs_utils.get_gcs_address_from_redis(redis) subscriber = GcsAioSubscriber(address=gcs_server_addr) await subscriber.subscribe_error() publisher = GcsAioPublisher(address=gcs_server_addr) err1 = ErrorTableData(error_message="test error message 1") err2 = ErrorTableData(error_message="test error message 2") await publisher.publish_error(b"aaa_id", err1) await publisher.publish_error(b"bbb_id", err2) assert await subscriber.poll_error() == (b"aaa_id", err1) assert await subscriber.poll_error() == (b"bbb_id", err2) await subscriber.close()
async def test_aio_publish_and_subscribe_logs(ray_start_regular): address_info = ray_start_regular gcs_server_addr = address_info["gcs_address"] subscriber = GcsAioLogSubscriber(address=gcs_server_addr) await subscriber.subscribe() publisher = GcsAioPublisher(address=gcs_server_addr) log_batch = { "ip": "127.0.0.1", "pid": "gcs", "job": "0001", "is_err": False, "lines": ["line 1", "line 2"], "actor_name": "test actor", "task_name": "test task", } await publisher.publish_logs(log_batch) assert await subscriber.poll() == log_batch await subscriber.close()
def __init__( self, node_ip_address, dashboard_agent_port, gcs_address, minimal, temp_dir=None, session_dir=None, runtime_env_dir=None, log_dir=None, metrics_export_port=None, node_manager_port=None, listen_port=0, object_store_name=None, raylet_name=None, logging_params=None, disable_metrics_collection: bool = False, ): """Initialize the DashboardAgent object.""" # Public attributes are accessible for all agent modules. self.ip = node_ip_address self.minimal = minimal assert gcs_address is not None self.gcs_address = gcs_address self.temp_dir = temp_dir self.session_dir = session_dir self.runtime_env_dir = runtime_env_dir self.log_dir = log_dir self.dashboard_agent_port = dashboard_agent_port self.metrics_export_port = metrics_export_port self.node_manager_port = node_manager_port self.listen_port = listen_port self.object_store_name = object_store_name self.raylet_name = raylet_name self.logging_params = logging_params self.node_id = os.environ["RAY_NODE_ID"] self.metrics_collection_disabled = disable_metrics_collection # TODO(edoakes): RAY_RAYLET_PID isn't properly set on Windows. This is # only used for fate-sharing with the raylet and we need a different # fate-sharing mechanism for Windows anyways. if sys.platform not in ["win32", "cygwin"]: self.ppid = int(os.environ["RAY_RAYLET_PID"]) assert self.ppid > 0 logger.info("Parent pid is %s", self.ppid) # Setup raylet channel options = ray_constants.GLOBAL_GRPC_OPTIONS self.aiogrpc_raylet_channel = ray._private.utils.init_grpc_channel( f"{self.ip}:{self.node_manager_port}", options, asynchronous=True) # Setup grpc server self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), )) grpc_ip = "127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0" try: self.grpc_port = ray._private.tls_utils.add_port_to_grpc_server( self.server, f"{grpc_ip}:{self.dashboard_agent_port}") except Exception: # TODO(SongGuyang): Catch the exception here because there is # port conflict issue which brought from static port. We should # remove this after we find better port resolution. logger.exception( "Failed to add port to grpc server. Agent will stay alive but " "disable the grpc service.") self.server = None self.grpc_port = None else: logger.info("Dashboard agent grpc address: %s:%s", grpc_ip, self.grpc_port) # If the agent is started as non-minimal version, http server should # be configured to communicate with the dashboard in a head node. self.http_server = None # Used by the agent and sub-modules. # TODO(architkulkarni): Remove gcs_client once the agent exclusively uses # gcs_aio_client and not gcs_client. self.gcs_client = GcsClient(address=self.gcs_address) _initialize_internal_kv(self.gcs_client) assert _internal_kv_initialized() self.gcs_aio_client = GcsAioClient(address=self.gcs_address) self.publisher = GcsAioPublisher(address=self.gcs_address)