Exemple #1
0
def test_resource_monitor_store_to_file(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if storing metrics to a file works correctly.
    """
    tensorboard_folder = test_output_dirs.root_dir
    r = ResourceMonitor(interval_seconds=5,
                        tensorboard_folder=tensorboard_folder,
                        csv_results_folder=tensorboard_folder)
    r.gpu_aggregates = {
        1: GpuUtilization(id=1, mem_util=1, load=2, mem_reserved_gb=30.0, mem_allocated_gb=40.0, count=10),
    }
    r.gpu_max = {
        1: GpuUtilization(id=1, mem_util=0.4, load=0.5, mem_reserved_gb=6.0, mem_allocated_gb=7.0, count=10),
    }
    r.store_to_file()
    # Write a second time - we expect that to overwrite and only produce one set of metrics
    r.store_to_file()
    parsed_metrics = r.read_aggregate_metrics()
    assert parsed_metrics == {
        "GPU1": {
            "MemUtil_Percent": 10.0,
            "Load_Percent": 20.0,
            "MemReserved_GB": 3.0,
            "MemAllocated_GB": 4.0,
            "MaxMemUtil_Percent": 40.0,
            "MaxLoad_Percent": 50.0,
            "MaxMemReserved_GB": 6.0,
            "MaxMemAllocated_GB": 7.0,
        }}
def test_resource_monitor_store_to_file(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if storing metrics to a file works correctly.
    """
    tensorboard_folder = Path(test_output_dirs.root_dir)
    r = ResourceMonitor(interval_seconds=5,
                        tensorboard_folder=tensorboard_folder)
    r.gpu_aggregates = {
        1:
        GpuUtilization(id=1,
                       mem_util=1,
                       load=2,
                       mem_reserved_gb=30.0,
                       mem_allocated_gb=40.0,
                       count=10),
    }
    r.gpu_max = {
        1:
        GpuUtilization(id=1,
                       mem_util=0.4,
                       load=0.5,
                       mem_reserved_gb=6.0,
                       mem_allocated_gb=7.0,
                       count=10),
    }
    r.store_to_file()
    # Write a second time - we expect that to overwrite and only produce one set of metrics
    r.store_to_file()
    parsed_metrics = r.read_aggregate_metrics()
    assert parsed_metrics == [
        ("GPU1/MemUtil_Percent", 10.0),
        ("GPU1/Load_Percent", 20.0),
        ("GPU1/MemReserved_GB", 3.0),
        ("GPU1/MemAllocated_GB", 4.0),
        ("GPU1/MaxMemUtil_Percent", 40.0),
        ("GPU1/MaxLoad_Percent", 50.0),
        ("GPU1/MaxMemReserved_GB", 6.0),
        ("GPU1/MaxMemAllocated_GB", 7.0),
    ]