Esempio n. 1
0
 def test_case_stats_exist():
     components_dict, metric_names, metric_samples = fetch_prometheus(
         prom_addresses)
     return all([
         "ray_node_cpu" in metric_names, "ray_node_mem" in metric_names,
         "ray_raylet_cpu" in metric_names, "ray_raylet_mem" in metric_names
     ])
Esempio n. 2
0
    def test_cases():
        components_dict, metric_names, metric_samples = fetch_prometheus(
            prom_addresses)

        # Raylet should be on every node
        assert all("raylet" in components
                   for components in components_dict.values())

        # GCS server should be on one node
        assert any("gcs_server" in components
                   for components in components_dict.values())

        # Core worker should be on at least on node
        assert any("core_worker" in components
                   for components in components_dict.values())

        # Make sure our user defined metrics exist
        for metric_name in [
                "test_counter", "test_histogram", "test_driver_counter"
        ]:
            assert any(metric_name in full_name for full_name in metric_names)

        # Make sure metrics are recorded.
        for metric in _METRICS:
            assert metric in metric_names, \
                f"metric {metric} not in {metric_names}"

        # Make sure the numeric values are correct
        test_counter_sample = [
            m for m in metric_samples if "test_counter" in m.name
        ][0]
        assert test_counter_sample.value == 2.0

        test_driver_counter_sample = [
            m for m in metric_samples if "test_driver_counter" in m.name
        ][0]
        assert test_driver_counter_sample.value == 1.0

        test_histogram_samples = [
            m for m in metric_samples if "test_histogram" in m.name
        ]
        buckets = {
            m.labels["le"]: m.value
            for m in test_histogram_samples if "_bucket" in m.name
        }
        # We recorded value 1.5 for the histogram. In Prometheus data model
        # the histogram is cumulative. So we expect the count to appear in
        # <1.1 and <+Inf buckets.
        assert buckets == {"0.1": 0.0, "1.6": 1.0, "+Inf": 1.0}
        hist_count = [m for m in test_histogram_samples
                      if "_count" in m.name][0].value
        hist_sum = [m for m in test_histogram_samples
                    if "_sum" in m.name][0].value
        assert hist_count == 1
        assert hist_sum == 1.5
Esempio n. 3
0
 def test_case_ip_correct():
     components_dict, metric_names, metric_samples = fetch_prometheus(
         prom_addresses)
     raylet_proc = ray.worker._global_node.all_processes[
         ray_constants.PROCESS_TYPE_RAYLET][0]
     raylet_pid = None
     # Find the raylet pid recorded in the tag.
     for sample in metric_samples:
         if sample.name == "ray_raylet_cpu":
             raylet_pid = sample.labels["pid"]
             break
     return str(raylet_proc.process.pid) == str(raylet_pid)
Esempio n. 4
0
 def test_case_stats_exist():
     components_dict, metric_names, metric_samples = fetch_prometheus(
         prom_addresses)
     return all([
         "ray_node_cpu" in metric_names, "ray_node_mem" in metric_names,
         "ray_raylet_cpu" in metric_names, "ray_raylet_mem" in metric_names,
         "ray_node_disk_usage" in metric_names,
         "ray_node_disk_utilization_percentage" in metric_names,
         "ray_node_network_sent" in metric_names,
         "ray_node_network_received" in metric_names,
         "ray_node_network_send_speed" in metric_names,
         "ray_node_network_receive_speed" in metric_names
     ])