def get_device_total_memory(index=0): """ Return total memory of CUDA device with index or with device identifier UUID """ pynvml.nvmlInit() if index and not str(index).isnumeric(): # This means index is UUID. This works for both MIG and non-MIG device UUIDs. handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(str(index))) else: # This is a device index handle = pynvml.nvmlDeviceGetHandleByIndex(index) return pynvml.nvmlDeviceGetMemoryInfo(handle).total
def get_cpu_affinity(device_index=None): """Get a list containing the CPU indices to which a GPU is directly connected. Use either the device index or the specified device identifier UUID. Parameters ---------- device_index: int or str Index or UUID of the GPU device Examples -------- >>> from dask_cuda.utils import get_cpu_affinity >>> get_cpu_affinity(0) # DGX-1 has GPUs 0-3 connected to CPUs [0-19, 20-39] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] >>> get_cpu_affinity(5) # DGX-1 has GPUs 5-7 connected to CPUs [20-39, 60-79] [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] >>> get_cpu_affinity(1000) # DGX-1 has no device on index 1000 dask_cuda/utils.py:96: UserWarning: Cannot get CPU affinity for device with index 1000, setting default affinity [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] """ pynvml.nvmlInit() try: if device_index and not str(device_index).isnumeric(): # This means device_index is UUID. # This works for both MIG and non-MIG device UUIDs. handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(device_index)) if pynvml.nvmlDeviceIsMigDeviceHandle(handle): # Additionally get parent device handle # if the device itself is a MIG instance handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(handle) else: handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) # Result is a list of 64-bit integers, thus ceil(get_cpu_count() / 64) affinity = pynvml.nvmlDeviceGetCpuAffinity( handle, math.ceil(get_cpu_count() / 64), ) return unpack_bitmask(affinity) except pynvml.NVMLError: warnings.warn( "Cannot get CPU affinity for device with index %d, setting default affinity" % device_index ) return list(range(get_cpu_count()))
def test_nvmlDeviceGetHandleByUUID(ngpus, uuids): handles = [pynvml.nvmlDeviceGetHandleByUUID(uuids[i]) for i in range(ngpus)] assert len(handles) == ngpus