Beispiel #1
0
    def test_process_nodes_status(self):
        obj = json.loads(self.get_data_test_input("data/nodes_list.json"))

        gauge = watchdog.gen_pai_node_gauge()

        watchdog.process_nodes_status(gauge, obj)

        self.assertTrue(len(gauge.samples) > 0)
Beispiel #2
0
    def test_process_dlws_nodes_status_with_unscheduable(self):
        obj = json.loads(
            self.get_data_test_input(
                "data/dlws_nodes_list_with_unschedulable.json"))

        pod_info = collections.defaultdict(lambda: [])
        pod_info["192.168.255.1"].append(watchdog.PodInfo("job1", False, 2))
        gauges = watchdog.process_nodes_status(obj, pod_info,
                                               watchdog.ClusterGPUInfo())

        self.assertEqual(5, len(gauges))

        self.assertEqual("k8s_node_count", gauges[0].name)
        self.assertEqual(1, len(gauges[0].samples))
        self.assertEqual("true", gauges[0].samples[0].labels["unschedulable"])
        self.assertEqual("k8s_node_gpu_available", gauges[1].name)
        self.assertEqual(1, len(gauges[1].samples))
        self.assertEqual(0, gauges[1].samples[0].value)
        self.assertEqual("k8s_node_preemptable_gpu_available", gauges[2].name)
        self.assertEqual(1, len(gauges[2].samples))
        self.assertEqual(0, gauges[2].samples[0].value)
        self.assertEqual("k8s_node_gpu_total", gauges[3].name)
        self.assertEqual(1, len(gauges[3].samples))
        self.assertEqual(4, gauges[3].samples[0].value)
        self.assertEqual("k8s_node_gpu_allocatable", gauges[4].name)
        self.assertEqual(1, len(gauges[4].samples))
        self.assertEqual(0, gauges[4].samples[0].value)

        for gauge in gauges:
            self.assertTrue(len(gauge.samples) > 0)

        for gauge in gauges[1:]:
            self.assertEqual("192.168.255.1",
                             gauge.samples[0].labels["host_ip"])
    def test_process_dlws_nodes_status(self):
        obj = json.loads(self.get_data_test_input("data/dlws_nodes_list.json"))

        pod_info = collections.defaultdict(lambda: [])
        pod_info["192.168.255.1"].append(watchdog.PodInfo("job1", 2))
        gauges = watchdog.process_nodes_status(obj, pod_info)

        self.assertTrue(len(gauges) == 4)

        self.assertEqual("k8s_node_gpu_available", gauges[1].name)
        self.assertEqual(1, len(gauges[1].samples))
        self.assertEqual(2, gauges[1].samples[0].value)
        self.assertEqual("k8s_node_gpu_total", gauges[2].name)
        self.assertEqual(1, len(gauges[2].samples))
        self.assertEqual(4, gauges[2].samples[0].value)
        self.assertEqual("k8s_node_gpu_reserved", gauges[3].name)
        self.assertEqual(1, len(gauges[3].samples))
        self.assertEqual(0, gauges[3].samples[0].value)

        for gauge in gauges:
            self.assertTrue(len(gauge.samples) > 0)

        for gauge in gauges[1:]:
            self.assertEqual("192.168.255.1",
                             gauge.samples[0].labels["host_ip"])
Beispiel #4
0
    def test_process_nodes_status(self):
        obj = json.loads(self.get_data_test_input("data/nodes_list.json"))

        gauges = watchdog.process_nodes_status(obj, {})

        self.assertTrue(len(gauges) == 4)

        for gauge in gauges:
            self.assertTrue(len(gauge.samples) > 0)
Beispiel #5
0
    def test_process_nodes_status(self):
        obj = json.loads(self.get_data_test_input("data/nodes_list.json"))

        gauges = watchdog.process_nodes_status(obj, {},
                                               watchdog.ClusterGPUInfo())

        self.assertEqual(5, len(gauges))

        for gauge in gauges:
            self.assertTrue(len(gauge.samples) > 0)