Esempio n. 1
0
 def __init__(self, dashboard_agent):
     """Initialize the reporter object."""
     super().__init__(dashboard_agent)
     self._cpu_counts = (psutil.cpu_count(),
                         psutil.cpu_count(logical=False))
     self._ip = ray._private.services.get_node_ip_address()
     self._hostname = socket.gethostname()
     self._workers = set()
     self._network_stats_hist = [(0, (0.0, 0.0))]  # time, (sent, recv)
     self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
     self._key = f"{reporter_consts.REPORTER_PREFIX}" \
                 f"{self._dashboard_agent.node_id}"
     # A list of gauges to record and export metrics.
     self._gauges = {
         "node_cpu": Gauge("node_cpu", "Total CPU usage on a ray node",
                           "percentage", ["ip"]),
         "node_mem": Gauge("node_mem", "Total memory usage on a ray node",
                           "mb", ["ip"]),
         "raylet_cpu": Gauge("raylet_cpu",
                             "CPU usage of the raylet on a node.",
                             "percentage", ["ip", "pid"]),
         "raylet_mem": Gauge("raylet_mem",
                             "Memory usage of the raylet on a node", "mb",
                             ["ip", "pid"])
     }
Esempio n. 2
0
def test_gauge():
    tags = [tag_key_module.TagKey(str(i)) for i in range(10)]
    name = "name"
    description = "description"
    units = "units"
    gauge = Gauge(name, description, units, tags)
    assert gauge.__dict__()["name"] == name
    assert gauge.__dict__()["description"] == description
    assert gauge.__dict__()["units"] == units
    assert gauge.__dict__()["tags"] == tags
Esempio n. 3
0
 def __init__(self, dashboard_agent):
     """Initialize the reporter object."""
     super().__init__(dashboard_agent)
     self._cpu_counts = (psutil.cpu_count(),
                         psutil.cpu_count(logical=False))
     self._ip = ray._private.services.get_node_ip_address()
     self._hostname = socket.gethostname()
     self._workers = set()
     self._network_stats_hist = [(0, (0.0, 0.0))]  # time, (sent, recv)
     self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
     self._key = f"{reporter_consts.REPORTER_PREFIX}" \
                 f"{self._dashboard_agent.node_id}"
     # A list of gauges to record and export metrics.
     self._gauges = {
         "node_cpu_utilization":
         Gauge("node_cpu_utilization", "Total CPU usage on a ray node",
               "percentage", ["ip"]),
         "node_cpu_count":
         Gauge("node_cpu_count", "Total CPUs available on a ray node",
               "cores", ["ip"]),
         "node_mem_used":
         Gauge("node_mem_used", "Memory usage on a ray node", "bytes",
               ["ip"]),
         "node_mem_available":
         Gauge("node_mem_available", "Memory available on a ray node",
               "bytes", ["ip"]),
         "node_mem_total":
         Gauge("node_mem_total", "Total memory on a ray node", "bytes",
               ["ip"]),
         "node_gpus_available":
         Gauge("node_gpus_available", "Total GPUs available on a ray node",
               "percentage", ["ip"]),
         "node_gpus_utilization":
         Gauge("node_gpus_utilization", "Total GPUs usage on a ray node",
               "percentage", ["ip"]),
         "node_gram_used":
         Gauge("node_gram_used", "Total GPU RAM usage on a ray node",
               "bytes", ["ip"]),
         "node_gram_available":
         Gauge("node_gram_available",
               "Total GPU RAM available on a ray node", "bytes", ["ip"]),
         "node_disk_usage":
         Gauge("node_disk_usage", "Total disk usage (bytes) on a ray node",
               "bytes", ["ip"]),
         "node_disk_utilization_percentage":
         Gauge("node_disk_utilization_percentage",
               "Total disk utilization (percentage) on a ray node",
               "percentage", ["ip"]),
         "node_network_sent":
         Gauge("node_network_sent", "Total network sent", "bytes", ["ip"]),
         "node_network_received":
         Gauge("node_network_received", "Total network received", "bytes",
               ["ip"]),
         "node_network_send_speed":
         Gauge("node_network_send_speed", "Network send speed", "bytes/sec",
               ["ip"]),
         "node_network_receive_speed":
         Gauge("node_network_receive_speed", "Network receive speed",
               "bytes/sec", ["ip"]),
         "raylet_cpu":
         Gauge("raylet_cpu", "CPU usage of the raylet on a node.",
               "percentage", ["ip", "pid"]),
         "raylet_mem":
         Gauge("raylet_mem", "Memory usage of the raylet on a node", "mb",
               ["ip", "pid"])
     }
Esempio n. 4
0
    if isinstance(o, dict):
        D = {k: recursive_asdict(v) for k, v in o.items()}
        return D

    return o


def jsonify_asdict(o):
    return json.dumps(dashboard_utils.to_google_style(recursive_asdict(o)))


# A list of gauges to record and export metrics.
METRICS_GAUGES = {
    "node_cpu_utilization":
    Gauge("node_cpu_utilization", "Total CPU usage on a ray node",
          "percentage", ["ip"]),
    "node_cpu_count":
    Gauge("node_cpu_count", "Total CPUs available on a ray node", "cores",
          ["ip"]),
    "node_mem_used":
    Gauge("node_mem_used", "Memory usage on a ray node", "bytes", ["ip"]),
    "node_mem_available":
    Gauge("node_mem_available", "Memory available on a ray node", "bytes",
          ["ip"]),
    "node_mem_total":
    Gauge("node_mem_total", "Total memory on a ray node", "bytes", ["ip"]),
    "node_gpus_available":
    Gauge("node_gpus_available", "Total GPUs available on a ray node",
          "percentage", ["ip"]),
    "node_gpus_utilization":
    Gauge("node_gpus_utilization", "Total GPUs usage on a ray node",