Ejemplo n.º 1
0
    async def run(self, server):
        reporter_pb2_grpc.add_ReporterServiceServicer_to_server(self, server)

        gcs_addr = self._dashboard_agent.gcs_address
        assert gcs_addr is not None
        publisher = GcsAioPublisher(address=gcs_addr)
        await self._perform_iteration(publisher)
Ejemplo n.º 2
0
async def test_aio_publish_and_subscribe_logs(ray_start_regular):
    address_info = ray_start_regular
    redis = ray._private.services.create_redis_client(
        address_info["redis_address"],
        password=ray.ray_constants.REDIS_DEFAULT_PASSWORD)

    gcs_server_addr = gcs_utils.get_gcs_address_from_redis(redis)

    subscriber = GcsAioSubscriber(address=gcs_server_addr)
    await subscriber.subscribe_logs()

    publisher = GcsAioPublisher(address=gcs_server_addr)
    log_batch = {
        "ip": "127.0.0.1",
        "pid": "gcs",
        "job": "0001",
        "is_err": False,
        "lines": ["line 1", "line 2"],
        "actor_name": "test actor",
        "task_name": "test task",
    }
    await publisher.publish_logs(log_batch)

    assert await subscriber.poll_logs() == log_batch

    await subscriber.close()
Ejemplo n.º 3
0
    async def run(self, server):
        reporter_pb2_grpc.add_ReporterServiceServicer_to_server(self, server)
        if gcs_pubsub_enabled():
            gcs_addr = self._dashboard_agent.gcs_address
            if gcs_addr is None:
                aioredis_client = await aioredis.create_redis_pool(
                    address=self._dashboard_agent.redis_address,
                    password=self._dashboard_agent.redis_password,
                )
                gcs_addr = await aioredis_client.get("GcsServerAddress")
                gcs_addr = gcs_addr.decode()
            publisher = GcsAioPublisher(address=gcs_addr)

            async def publish(key: str, data: str):
                await publisher.publish_resource_usage(key, data)

        else:
            aioredis_client = await aioredis.create_redis_pool(
                address=self._dashboard_agent.redis_address,
                password=self._dashboard_agent.redis_password,
            )

            async def publish(key: str, data: str):
                await aioredis_client.publish(key, data)

        await self._perform_iteration(publish)
Ejemplo n.º 4
0
async def test_aio_publish_and_subscribe_resource_usage(ray_start_regular):
    address_info = ray_start_regular
    gcs_server_addr = address_info["gcs_address"]

    subscriber = GcsAioResourceUsageSubscriber(address=gcs_server_addr)
    await subscriber.subscribe()

    publisher = GcsAioPublisher(address=gcs_server_addr)
    await publisher.publish_resource_usage("aaa_id", '{"cpu": 1}')
    await publisher.publish_resource_usage("bbb_id", '{"cpu": 2}')

    assert await subscriber.poll() == ("aaa_id", '{"cpu": 1}')
    assert await subscriber.poll() == ("bbb_id", '{"cpu": 2}')

    await subscriber.close()
Ejemplo n.º 5
0
async def test_aio_publish_and_subscribe_error_info(ray_start_regular):
    address_info = ray_start_regular
    gcs_server_addr = address_info["gcs_address"]

    subscriber = GcsAioErrorSubscriber(address=gcs_server_addr)
    await subscriber.subscribe()

    publisher = GcsAioPublisher(address=gcs_server_addr)
    err1 = ErrorTableData(error_message="test error message 1")
    err2 = ErrorTableData(error_message="test error message 2")
    await publisher.publish_error(b"aaa_id", err1)
    await publisher.publish_error(b"bbb_id", err2)

    assert await subscriber.poll() == (b"aaa_id", err1)
    assert await subscriber.poll() == (b"bbb_id", err2)

    await subscriber.close()
Ejemplo n.º 6
0
async def test_aio_publish_and_subscribe_error_info(ray_start_regular):
    address_info = ray_start_regular
    redis = ray._private.services.create_redis_client(
        address_info["redis_address"],
        password=ray.ray_constants.REDIS_DEFAULT_PASSWORD)

    gcs_server_addr = gcs_utils.get_gcs_address_from_redis(redis)

    subscriber = GcsAioSubscriber(address=gcs_server_addr)
    await subscriber.subscribe_error()

    publisher = GcsAioPublisher(address=gcs_server_addr)
    err1 = ErrorTableData(error_message="test error message 1")
    err2 = ErrorTableData(error_message="test error message 2")
    await publisher.publish_error(b"aaa_id", err1)
    await publisher.publish_error(b"bbb_id", err2)

    assert await subscriber.poll_error() == (b"aaa_id", err1)
    assert await subscriber.poll_error() == (b"bbb_id", err2)

    await subscriber.close()
Ejemplo n.º 7
0
async def test_aio_publish_and_subscribe_logs(ray_start_regular):
    address_info = ray_start_regular
    gcs_server_addr = address_info["gcs_address"]

    subscriber = GcsAioLogSubscriber(address=gcs_server_addr)
    await subscriber.subscribe()

    publisher = GcsAioPublisher(address=gcs_server_addr)
    log_batch = {
        "ip": "127.0.0.1",
        "pid": "gcs",
        "job": "0001",
        "is_err": False,
        "lines": ["line 1", "line 2"],
        "actor_name": "test actor",
        "task_name": "test task",
    }
    await publisher.publish_logs(log_batch)

    assert await subscriber.poll() == log_batch

    await subscriber.close()
Ejemplo n.º 8
0
    def __init__(
        self,
        node_ip_address,
        dashboard_agent_port,
        gcs_address,
        minimal,
        temp_dir=None,
        session_dir=None,
        runtime_env_dir=None,
        log_dir=None,
        metrics_export_port=None,
        node_manager_port=None,
        listen_port=0,
        object_store_name=None,
        raylet_name=None,
        logging_params=None,
        disable_metrics_collection: bool = False,
    ):
        """Initialize the DashboardAgent object."""
        # Public attributes are accessible for all agent modules.
        self.ip = node_ip_address
        self.minimal = minimal

        assert gcs_address is not None
        self.gcs_address = gcs_address

        self.temp_dir = temp_dir
        self.session_dir = session_dir
        self.runtime_env_dir = runtime_env_dir
        self.log_dir = log_dir
        self.dashboard_agent_port = dashboard_agent_port
        self.metrics_export_port = metrics_export_port
        self.node_manager_port = node_manager_port
        self.listen_port = listen_port
        self.object_store_name = object_store_name
        self.raylet_name = raylet_name
        self.logging_params = logging_params
        self.node_id = os.environ["RAY_NODE_ID"]
        self.metrics_collection_disabled = disable_metrics_collection
        # TODO(edoakes): RAY_RAYLET_PID isn't properly set on Windows. This is
        # only used for fate-sharing with the raylet and we need a different
        # fate-sharing mechanism for Windows anyways.
        if sys.platform not in ["win32", "cygwin"]:
            self.ppid = int(os.environ["RAY_RAYLET_PID"])
            assert self.ppid > 0
            logger.info("Parent pid is %s", self.ppid)

        # Setup raylet channel
        options = ray_constants.GLOBAL_GRPC_OPTIONS
        self.aiogrpc_raylet_channel = ray._private.utils.init_grpc_channel(
            f"{self.ip}:{self.node_manager_port}", options, asynchronous=True)

        # Setup grpc server
        self.server = aiogrpc.server(options=(("grpc.so_reuseport", 0), ))
        grpc_ip = "127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0"
        try:
            self.grpc_port = ray._private.tls_utils.add_port_to_grpc_server(
                self.server, f"{grpc_ip}:{self.dashboard_agent_port}")
        except Exception:
            # TODO(SongGuyang): Catch the exception here because there is
            # port conflict issue which brought from static port. We should
            # remove this after we find better port resolution.
            logger.exception(
                "Failed to add port to grpc server. Agent will stay alive but "
                "disable the grpc service.")
            self.server = None
            self.grpc_port = None
        else:
            logger.info("Dashboard agent grpc address: %s:%s", grpc_ip,
                        self.grpc_port)

        # If the agent is started as non-minimal version, http server should
        # be configured to communicate with the dashboard in a head node.
        self.http_server = None

        # Used by the agent and sub-modules.
        # TODO(architkulkarni): Remove gcs_client once the agent exclusively uses
        # gcs_aio_client and not gcs_client.
        self.gcs_client = GcsClient(address=self.gcs_address)
        _initialize_internal_kv(self.gcs_client)
        assert _internal_kv_initialized()
        self.gcs_aio_client = GcsAioClient(address=self.gcs_address)
        self.publisher = GcsAioPublisher(address=self.gcs_address)