async def send_heartbeats(websocket, redis_conn): # First update the local scheduler info locally. client_keys = await redis_conn.execute("keys", "CL:*") for client_key in client_keys: client_fields = await hgetall_as_dict(redis_conn, client_key) if client_fields[b"client_type"] == b"local_scheduler": local_scheduler_id = hex_identifier(client_fields[b"ray_client_id"]) local_schedulers[local_scheduler_id] = { "node_ip_address": client_fields[b"node_ip_address"].decode("ascii"), "local_scheduler_socket_name": client_fields[b"local_scheduler_socket_name"].decode("ascii"), "aux_address": client_fields[b"aux_address"].decode("ascii"), "last_heartbeat": -1 * np.inf} # Subscribe to local scheduler heartbeats. await redis_conn.execute_pubsub("subscribe", "local_schedulers") # Start a method in the background to periodically update the frontend. asyncio.ensure_future(send_heartbeat_payload(websocket)) while True: msg = await redis_conn.pubsub_channels["local_schedulers"].get() heartbeat = LocalSchedulerInfoMessage.GetRootAsLocalSchedulerInfoMessage( msg, 0) local_scheduler_id_bytes = heartbeat.DbClientId() local_scheduler_id = hex_identifier(local_scheduler_id_bytes) if local_scheduler_id not in local_schedulers: # A new local scheduler has joined the cluster. Ignore it. This won't be # displayed in the UI until the page is refreshed. continue local_schedulers[local_scheduler_id]["last_heartbeat"] = time.time()
def local_scheduler_info_handler(self, unused_channel, data): """Handle a local scheduler heartbeat from Redis.""" message = LocalSchedulerInfoMessage.GetRootAsLocalSchedulerInfoMessage( data, 0) num_resources = message.DynamicResourcesLength() static_resources = {} dynamic_resources = {} for i in range(num_resources): dyn = message.DynamicResources(i) static = message.StaticResources(i) dynamic_resources[dyn.Key().decode("utf-8")] = dyn.Value() static_resources[static.Key().decode("utf-8")] = static.Value() client_id = binascii.hexlify(message.DbClientId()).decode("utf-8") clients = ray.global_state.client_table() local_schedulers = [ entry for client in clients.values() for entry in client if (entry["ClientType"] == "local_scheduler" and not entry["Deleted"]) ] ip = None for ls in local_schedulers: if ls["DBClientID"] == client_id: ip = ls["AuxAddress"].split(":")[0] if ip: self.load_metrics.update(ip, static_resources, dynamic_resources) else: print("Warning: could not find ip for client {} in {}".format( client_id, local_schedulers))
def local_scheduler_info_handler(self, unused_channel, data): """Handle a local scheduler heartbeat from Redis.""" message = LocalSchedulerInfoMessage.GetRootAsLocalSchedulerInfoMessage( data, 0) num_resources = message.DynamicResourcesLength() static_resources = {} dynamic_resources = {} for i in range(num_resources): dyn = message.DynamicResources(i) static = message.StaticResources(i) dynamic_resources[dyn.Key().decode("utf-8")] = dyn.Value() static_resources[static.Key().decode("utf-8")] = static.Value() # Update the load metrics for this local scheduler. client_id = binascii.hexlify(message.DbClientId()).decode("utf-8") ip = self.local_scheduler_id_to_ip_map.get(client_id) if ip: self.load_metrics.update(ip, static_resources, dynamic_resources) else: print( "Warning: could not find ip for client {}.".format(client_id))