def test_parse_smi_xml_result(self):
        sample_path = "data/nvidia_smi_sample.xml"
        with open(sample_path, "r") as f:
            nvidia_smi_result = f.read()
        nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
            nvidia_smi_result)

        zero = nvidia.NvidiaGpuStatus(
            100, 25, [1357, 2384, 3093],
            nvidia.EccError(volatile_single=1,
                            volatile_double=2,
                            aggregated_single=3,
                            aggregated_double=4), "0",
            "GPU-e511a7b2-f9d5-ba47-9b98-853732ca6c1b", 60.0)
        one = nvidia.NvidiaGpuStatus(
            98, 50, [3093], nvidia.EccError(), "1",
            "GPU-28daffaf-8abe-aaf8-c298-4bd13aecb5e6", 59.0)

        target_smi_info = {
            "1": one,
            "0": zero,
            "GPU-e511a7b2-f9d5-ba47-9b98-853732ca6c1b": zero,
            "GPU-28daffaf-8abe-aaf8-c298-4bd13aecb5e6": one
        }

        self.assertEqual(target_smi_info, nvidia_smi_parse_result)
예제 #2
0
 def test_exporter_will_not_report_unsupported_gpu(self):
     sample_path = "data/nvidia_smi_outdated_gpu.xml"
     with open(sample_path, "r") as f:
         nvidia_smi_result = f.read()
     nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
         nvidia_smi_result)
     self.assertEqual({}, nvidia_smi_parse_result)
예제 #3
0
 def test_parse_smi_xml_result(self):
     sample_path = "data/nvidia_smi_sample.xml"
     with open(sample_path, "r") as f:
         nvidia_smi_result = f.read()
     nvidia_smi_parse_result = nvidia.parse_smi_xml_result(nvidia_smi_result)
     target_smi_info = {"1": nvidia.NvidiaGpuStatus(98, 50, [3093], nvidia.EccError()),
             "0": nvidia.NvidiaGpuStatus(100, 25, [1357, 2384, 3093], nvidia.EccError())}
     self.assertEqual(target_smi_info, nvidia_smi_parse_result)
예제 #4
0
    def test_parse_smi_out_of_order_xml_result(self):
        sample_path = "data/nvidia_smi_out_of_order.xml"
        with open(sample_path, "r") as f:
            nvidia_smi_result = f.read()

        os.environ["LAUNCHER_TYPE"] = "k8s"
        nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
            nvidia_smi_result)

        self.assertEqual(nvidia_smi_parse_result["0"].gpu_util, 99.0)
        self.assertEqual(nvidia_smi_parse_result["1"].gpu_util, 99.0)
        self.assertEqual(nvidia_smi_parse_result["2"].gpu_util, 0.0)
        self.assertEqual(nvidia_smi_parse_result["3"].gpu_util, 0.0)

        del os.environ["LAUNCHER_TYPE"]
예제 #5
0
 def test_parse_smi_xml_result(self):
     sample_path = "data/nvidia_smi_sample.xml"
     with open(sample_path, "r") as f:
         nvidia_smi_result = f.read()
     nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
         nvidia_smi_result)
     target_smi_info = {
         '1': {
             'gpu_util': 98,
             'gpu_mem_util': 50
         },
         '0': {
             'gpu_util': 100,
             'gpu_mem_util': 25
         }
     }
     self.assertEqual(target_smi_info, nvidia_smi_parse_result)
예제 #6
0
    def test_parse_smi_new_xml_result(self):
        sample_path = "data/nvidia_smi_sample_ecc_unsupported.xml"
        with open(sample_path, "r") as f:
            nvidia_smi_result = f.read()
        nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
            nvidia_smi_result)

        zero = nvidia.NvidiaGpuStatus(
            0.000, 0.000, [], nvidia.EccError(), "0",
            "GPU-57567e11-0be2-381b-5132-2ad95c262e58", 24.0)
        one = nvidia.NvidiaGpuStatus(
            0.000, 0.000, [], nvidia.EccError(), "1",
            "GPU-ef1d0068-5bfd-f1e4-7e79-ff35d71d44b8", 24.0)

        target_smi_info = {
            "0": zero,
            "GPU-57567e11-0be2-381b-5132-2ad95c262e58": zero,
            "1": one,
            "GPU-ef1d0068-5bfd-f1e4-7e79-ff35d71d44b8": one
        }

        self.assertEqual(target_smi_info, nvidia_smi_parse_result)
    def test_get_retired_page_count(self):
        sample_path = "data/nvidia_smi_retired_pages.xml"
        with open(sample_path, "r") as f:
            nvidia_smi_result = f.read()
        nvidia_smi_parse_result = nvidia.parse_smi_xml_result(
            nvidia_smi_result)

        zero = nvidia.NvidiaGpuStatus(
            0.0, 0.0, [], nvidia.EccError(), "0",
            "GPU-ef23ffa6-c9fd-93d5-aeb8-612c087255ff", 33.0)
        zero.ecc_errors.single_retirement = 0
        zero.ecc_errors.double_retirement = 2
        zero.ecc_errors.volatile_single = 1
        zero.ecc_errors.volatile_double = 1
        zero.ecc_errors.aggregated_single = 291
        zero.ecc_errors.aggregated_double = 7627

        target_smi_info = {
            "0": zero,
            "GPU-ef23ffa6-c9fd-93d5-aeb8-612c087255ff": zero
        }

        self.assertEqual(target_smi_info, nvidia_smi_parse_result)