Exemple #1
0
    def test_parse_monitor_response_time(self):
        obj = json.loads(
            self.get_data_test_input(
                "data/pods_with_response_time_monitor.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        pod_info = collections.defaultdict(lambda: [])

        endpoints = []

        vc_usage = watchdog.VcUsage()

        watchdog.process_pods_status(obj, pod_gauge, container_gauge, pod_info,
                                     endpoints, vc_usage)

        self.assertEqual(2, len(endpoints))
        endpoint0 = endpoints[0]
        endpoint1 = endpoints[1]

        self.assertEqual("job-exporter", endpoint0.name)
        self.assertEqual("job-exporter", endpoint1.name)

        self.assertEqual("10.151.40.231", endpoint0.ip)
        self.assertEqual("10.151.40.227", endpoint1.ip)

        self.assertEqual(9102, endpoint0.port)
        self.assertEqual(9102, endpoint1.port)

        self.assertEqual("/healthz", endpoint0.path)
        self.assertEqual("/healthz", endpoint1.path)

        self.assertEqual(10, endpoint0.timeout)
        self.assertEqual(10, endpoint1.timeout)
Exemple #2
0
    def test_parse_pods_status(self):
        obj = json.loads(self.get_data_test_input("data/pods_list.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        job_pod_gauge = watchdog.gen_pai_job_pod_gauge()
        pod_info = collections.defaultdict(lambda : [])

        watchdog.process_pods_status(obj, pod_gauge, container_gauge, job_pod_gauge, pod_info)

        self.assertTrue(len(pod_gauge.samples) > 0)
        self.assertEqual('10.151.40.4', pod_gauge.samples[0].labels['host_ip'])
        self.assertEqual('running', pod_gauge.samples[0].labels['phase'])
        self.assertEqual('true', pod_gauge.samples[0].labels['pod_scheduled'])
        self.assertEqual('true', pod_gauge.samples[0].labels['initialized'])

        self.assertTrue(len(container_gauge.samples) > 0)
        self.assertEqual('10.151.40.4', container_gauge.samples[0].labels['host_ip'])
        self.assertEqual('running', container_gauge.samples[0].labels['state'])
        self.assertEqual('default', container_gauge.samples[0].labels['namespace'])
        self.assertEqual('nvidia-drivers', container_gauge.samples[0].labels['name'])

        self.assertTrue(len(job_pod_gauge.samples) > 0)
        self.assertEqual('10.1.3.29', job_pod_gauge.samples[0].labels['host_ip'])
        self.assertEqual('pending', job_pod_gauge.samples[0].labels['phase'])
        self.assertEqual('true', job_pod_gauge.samples[0].labels['pod_bound'])
        self.assertEqual('true', job_pod_gauge.samples[0].labels['initialized'])
Exemple #3
0
    def test_process_pods_with_vc_usage(self):
        obj = json.loads(
            self.get_data_test_input("data/dlts_non_preemptable_pod.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        pod_info = collections.defaultdict(lambda: [])

        vc_usage = watchdog.VcUsage()

        watchdog.parse_pod_item(obj, pod_gauge, container_gauge, pod_info, [],
                                vc_usage)

        self.assertEqual(1, len(vc_usage.map))
        self.assertEqual(1, len(vc_usage.map["some_vc_name"]))
        self.assertEqual(1, vc_usage.map["some_vc_name"]["P40"][0])
        self.assertEqual(1, vc_usage.map["some_vc_name"]["P40"][1])

        obj = json.loads(
            self.get_data_test_input("data/dlts_preemptable_pod.json"))
        watchdog.parse_pod_item(obj, pod_gauge, container_gauge, pod_info, [],
                                vc_usage)

        self.assertEqual(1, len(vc_usage.map))
        self.assertEqual(2, len(vc_usage.map["some_vc_name"]))
        # P40 do not change since preemptable pod using P80
        self.assertEqual(1, vc_usage.map["some_vc_name"]["P40"][0])
        self.assertEqual(1, vc_usage.map["some_vc_name"]["P40"][1])

        self.assertEqual(1, vc_usage.map["some_vc_name"]["P80"][0])
        self.assertEqual(0, vc_usage.map["some_vc_name"]["P80"][1])
    def test_process_unscheduled_pods(self):
        objs = json.loads(
            self.get_data_test_input("data/dlts_unscheduled_pods.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        pods_info = collections.defaultdict(lambda: [])

        vc_usage = watchdog.VcUsage()

        for obj in objs:
            watchdog.parse_pod_item(obj, pod_gauge, container_gauge, pods_info,
                                    [], vc_usage)

        self.assertEqual(1, len(pods_info))
        self.assertEqual(2, len(pods_info["unscheduled"]))

        cluster_gpu_info = watchdog.ClusterGPUInfo()
        cluster_gpu_info.available = 10
        cluster_gpu_info.preemptable_available = 10

        watchdog.process_unscheduled_pods(pods_info, cluster_gpu_info)

        self.assertEqual(9, cluster_gpu_info.available)
        self.assertEqual(8, cluster_gpu_info.preemptable_available)
Exemple #5
0
            def collect(self):
                pod_gauge = watchdog.gen_pai_pod_gauge()
                container_gauge = watchdog.gen_pai_container_gauge()

                watchdog.process_pods_status(pod_gauge, container_gauge, obj)

                yield pod_gauge
                yield container_gauge
Exemple #6
0
    def test_process_pods_with_no_condition(self):
        obj = json.loads(self.get_data_test_input("data/no_condtion_pod.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()

        watchdog.process_pods_status(pod_gauge, container_gauge, obj)

        self.assertTrue(len(pod_gauge.samples) > 0)
        self.assertEquals(0, len(container_gauge.samples))
Exemple #7
0
    def test_parse_pods_status(self):
        obj = json.loads(self.get_data_test_input("data/pods_list.json"))

        pod_gauge = watchdog.gen_pai_node_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()

        watchdog.process_pods_status(pod_gauge, container_gauge, obj)

        self.assertTrue(len(pod_gauge.samples) > 0)
        self.assertTrue(len(container_gauge.samples) > 0)
            def collect(self):
                pod_gauge = watchdog.gen_pai_pod_gauge()
                container_gauge = watchdog.gen_pai_container_gauge()
                pod_info = collections.defaultdict(lambda: [])

                watchdog.process_pods_status(obj, pod_gauge, container_gauge,
                                             pod_info)

                yield pod_gauge
                yield container_gauge
    def test_process_pods_with_no_condition(self):
        obj = json.loads(self.get_data_test_input("data/no_condtion_pod.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        pod_info = collections.defaultdict(lambda: [])

        watchdog.process_pods_status(obj, pod_gauge, container_gauge, pod_info)

        self.assertTrue(len(pod_gauge.samples) > 0)
        self.assertEqual(0, len(container_gauge.samples))
Exemple #10
0
    def test_parse_pods_status(self):
        obj = json.loads(self.get_data_test_input("data/pods_list.json"))

        pod_gauge = watchdog.gen_pai_pod_gauge()
        container_gauge = watchdog.gen_pai_container_gauge()
        pod_info = collections.defaultdict(lambda: [])

        watchdog.process_pods_status(obj, pod_gauge, container_gauge, pod_info,
                                     [], watchdog.VcUsage())

        self.assertTrue(len(pod_gauge.samples) > 0)
        self.assertTrue(len(container_gauge.samples) > 0)