def get_gpu_memory(device_idx): assert device_idx < NvidiaSmi.total_devices, "device index should {} less than total devices {}"\ .format(device_idx, NvidiaSmi.total_devices) handle = nvidia_smi.nvmlDeviceGetHandleByIndex(device_idx) res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) M = 1024**2 return res.free / M, res.total / M, res.used / M
def get_gpu_memory(): import nvidia_smi nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Used GPU memory: {}%".format((info.used * 100) // info.total)) nvidia_smi.nvmlShutdown()
def checkGPUsAvailability(n_gpus=1): ''' Test that GPUs have free memory on 'n_gpus'. OUT: True: if they have False: if not ''' # For every gpu to check for i_gpu in range(n_gpus): # Access to the memory used by the i-th gpu try: nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i_gpu) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) except Exception: print('Warning: GPU did not accessed') break # If more than 1GB is taken, then stop if (mem_res.used/(1024.**3) > 1.0): # greater than 1GB of VRAM # Report it print('Memory used (gpu-%i): %.2f GB' % (i_gpu, mem_res.used/(1024**3)), end='') print(' - on total: %.2f GB' % (mem_res.total/(1024**3))) return False return True
def Stop(self): totalTime = time.perf_counter() - self.StartTime mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) self.ElapsedTime.append(totalTime) return totalTime, psutil.cpu_percent(), psutil.virtual_memory( )[2], mem_res.used / mem_res.total, mem_res.used / mem_res.total
def get_gpu_mem(concat_string=''): # res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) # print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%') res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # print(f'mem: {res.used / (1024**2)} (GiB)') # usage in GiB print(concat_string, f'mem: {100 * (res.used / res.total):.3f}%') # percentage
def show_memory_usage(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # GPU number mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # print('=' * 50) # print(f'mem: {mem_res.used / (1024 ** 3)} (GiB)') # usage in GiB print(f'mem usage: {100 * (mem_res.used / mem_res.total):.3f}%' ) # percentage
def print_gpu_info(idx=0): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(idx) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Total memory:", info.total) print("Free memory:", info.free) print("Used memory:", info.used) nvidia_smi.nvmlShutdown()
def get_mem_info(device_id): gpu_list = [device_id] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res[0]
def Watch_fin(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1) res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) time.sleep(1) if res.used == 0: return 0 else: return 1
def memory_check(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) mbs = mem_res.used / (1024**2) percent = mem_res.used / mem_res.total return mbs, percent
def stats(self, n_iter): if self.eps is not None: self.log.add_scalar('eps', self.eps(), n_iter) if self.handle is not None: res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) self.log.add_scalar('nvidia/load', res.gpu, n_iter) res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) self.log.add_scalar('nvidia/mem_gb', res.used / (1024**3), n_iter)
def Available_GPUs(self): available = [] for i in range(self.total_gpus): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if res.gpu < 30 and (mem_res.used / mem_res.total * 100) < 30: available.append(i) return available
def gpu_memory_tracker(): """returns nvidia gpu memory consumed""" nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) used = info.used total = info.total percent = used / total * 100 return percent
def get_usage(gpu_list=None, **kwargs): """ Track GPU memory usage. """ _ = kwargs gpu_list = gpu_list or [0] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res
def log_gpu_memory_to_tensorboard(): ''' Log every gpus current free memory level to tensorboard. ''' for i in range(nvidia_smi.nvmlDeviceGetCount()): info = nvidia_smi.nvmlDeviceGetMemoryInfo(gpus[i]) with loggers[i].as_default(): tl.summary({'free': np.array(info.free) / (1024**3)}, step=int(time.time()), name='GPUs')
def use_gpu(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) nvidia_smi.nvmlShutdown() if info.used > 1000000000: return True else: return False
def gpu_usage(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) usage = info.used nvidia_smi.nvmlShutdown() return usage
def measure_memory(fn, handle): """Measure memory to run fn. Returns the maximum allocated memory each run.""" try: _ = fn() except Exception as e: print(e) return np.nan info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) memory = info.used / 1000. / 1000. / 1000. print('.', end='') return memory
def available_GPUs(total_gpus): available_gpus = [] for i in range(total_gpus): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if res.gpu < 30 and ( mem_res.used / mem_res.total * 100 ) < 30: # Jon heuristically defines what it means for a GPU to be available available_gpus.append(i) return available_gpus
def check_cuda_memory(): nvidia_smi.nvmlInit() deviceCount = nvidia_smi.nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)"\ .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, \ info.total, info.free, info.used)) nvidia_smi.nvmlShutdown() return
def get_max_data_group_size(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) total_memory = info.total if total_memory >= 12 * (10 ** 9): return 2 ** 12 elif total_memory >= 6 * (10 ** 9): return 2 ** 11 else: os.environ["CUDA_VISIBLE_DEVICES"] = "" return 2 ** 12
def get_device(gpuID=False): """Checks available GPUs and selects the one with the most available memory Parameters ---------- gpuID: bool or int whether to use GPU, or the device ID of a specific GPU to use. If False, use only CPU. If True, attempts to find the GPU with most available memory. Returns ------- device : jax.device handle to gpu or cpu device selected """ import jax if gpuID is False: return jax.devices('cpu')[0] try: gpus = jax.devices('gpu') # did the user request a specific GPU? if isinstance(gpuID, int) and gpuID < len(gpus): return gpus[gpuID] if isinstance(gpuID, int): from desc.backend import TextColors # ID was not valid warnings.warn( TextColors.WARNING + 'gpuID did not match any found devices, trying default gpu option' + TextColors.ENDC) # find all available options and see which has the most space import nvidia_smi nvidia_smi.nvmlInit() maxmem = 0 gpu = gpus[0] for i in range(len(gpus)): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if info.free > maxmem: maxmem = info.free gpu = gpus[i] nvidia_smi.nvmlShutdown() return gpu except: from desc.backend import TextColors warnings.warn(TextColors.WARNING + 'No GPU found, falling back to CPU' + TextColors.ENDC) return jax.devices('cpu')[0]
def on_train_batch_begin(self, batch, logs=None): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) res1 = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) #GPUs = GPU.getGPUs() #gpu = GPUs[0] print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
def get_gpu_info(gpu_id=None): """ Get gpu-info regarding gpu_id :param gpu_id: gpu bus id :return mem_used: used memory in MiB :return mem_total: total memory in MiB """ if gpu_id is None: gpu_id = int(os.environ["CUDA_VISIBLE_DEVICES"]) handle = nvidia_smi.nvmlDeviceGetHandleByIndex(int(gpu_id)) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) mem_used = mem_res.used / (1024**2) mem_total = mem_res.total / (1024**2) return mem_used, mem_total, gpu_id
def output(self, data_dict, n_iter): if self.handle is not None: res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) self.log.add_scalar('nvidia/load', res.gpu, n_iter) res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) self.log.add_scalar( 'nvidia/mem_gb', res.used / (1024 ** 3), n_iter) for key, val in data_dict.items(): if hasattr(val, 'shape') and np.prod(val.shape) > 1: self.log.add_histogram(key, val, n_iter) else: self.log.add_scalar(key, val, n_iter)
def run(data_loader, engine): batch_time = AverageMeter() gpu = AverageMeter() gpu_mem = AverageMeter() # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. input = torch.rand((args.batch_size, ) + ModelData.INPUT_SHAPE) # if data_loader != 0: with engine.create_execution_context() as context: end = time.time() # for i in range(args.loop): for i, (input, target) in enumerate(data_loader): if i == args.loop: break np.copyto(h_input, input.reshape(-1)) do_inference(context, h_input, d_input, h_output, d_output, stream) batch_time.update(time.time() - end) end = time.time() # https://pypi.org/project/py3nvml/ util_rate = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) # print(util_rate.gpu, util_rate.memory) gpu.update(util_rate.gpu) mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) gpu_mem.update(mem_info.used >> 20) print("predict:", h_output) print("predict: ", h_output.shape) print("target:", target) print("target:", target.shape) if i % args.print_freq == 0 and not args.csv: print( '[{}/{}] batch time {batch_time.val:.3f} s (avg:{batch_time.avg:.3f})' .format(i, args.loop, batch_time=batch_time)) # print summary if args.csv: print("{}, {:.3f}, {:.3f}, {:.3f}, {}".format( args.batch_size, args.loop * args.batch_size / batch_time.sum, batch_time.avg, gpu.avg, gpu_mem.avg)) else: print("batchsize: {} ".format(args.batch_size)) print("throughput: {:.3f} img/sec".format(args.loop * args.batch_size / batch_time.sum)) print("Latency: {:.3f} sec".format(batch_time.avg)) # see https://forums.fast.ai/t/show-gpu-utilization-metrics-inside-training-loop-without-subprocess-call/26594 # show gpu utilization metrics inside trianing loop print("GPU util: {:.3f} %, GPU mem: {} MiB".format( gpu.avg, gpu_mem.avg))
def predict_age(file_path="/media/original/data/vtps/sub-CC00050XX01_ses-7201_hemi-L_inflated_reduce50.vtp"): torch.manual_seed(0) if osp.isfile(file_path): # mesh = read(file_path) # reader = vtk.vtkPolyDataReader() reader = vtk.vtkXMLPolyDataReader() reader.SetFileName(file_path) reader.Update() # output = reader.GetOutput() points = torch.tensor(np.array(reader.GetOutput().GetPoints().GetData())) local_features = ['corrected_thickness', 'curvature', 'sulcal_depth'] x = get_features(local_features, reader) transform = T.NormalizeScale() # transform_samp = T.FixedPoints(10000) data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points) data = transform(data) # data = transform_samp(data) # data = Data(batch=torch.zeros_like(x[:, 0]).long(), x=x, pos=points) # data = Data(x=x, pos=points) try: nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) free_mem = mem_res.free / 1024 ** 2 except: free_mem = 0 device = torch.device('cuda' if torch.cuda.is_available() and free_mem >= GPU_MEM_LIMIT else 'cpu') numb_local_features = x.size(1) numb_global_features = 0 model = Net(numb_local_features, numb_global_features).to(device) model.load_state_dict(torch.load(MODEL_PATH, map_location=device)) model.eval() # data_loader = DataLoader([data], batch_size=1, shuffle=False) # print(len(data_loader)) # pred = model(next(iter(data_loader)).to(device)) pred = model(data.to(device)) return pred.item() else: return 'Unable to predict..'
def get_free_gpu_mem(gpu_index): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # logging.info("Total GPU memory: {}".format(info.total)) # logging.info("Free GPU memory: {}".format(info.free)) # logging.info("Used GPU memory: {}".format(info.used)) nvidia_smi.nvmlShutdown() return info.free
def watch(memory_max): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(1) res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) time.sleep(1) if memory_max < res.used: memory_max = res.used with open( "/mnt/mqs02/data/ogawa/BERT/preprocess-for-BERT/conbination/NUMAS/vocab_cost/recipe/32000/memory_cost_3.txt", "a", encoding="utf-8") as f: result = str(memory_max) + "\n" f.write(result) print(memory_max) return memory_max
def getMem(): data = psutil.virtual_memory() total = data.total # byte free = data.available # # print("total virtual memory" + str(total / 1024 / 1024 / 1024)) # print("free virtual memory" + str(free / 1024 / 1024 / 1024)) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) GPU_memory = "GPU Memory usage: " + str( (info.used) / 1024 / 1024 / 1024) + "GB\n" memory = "CPU Memory usage: " + str( (total - free) / 1024 / 1024 / 1024) + "GB\nCPU Memory usage percent: %d" % (int( round(data.percent))) + "%" + "\n" cpu = "CPU usage percent:%0.2f" % psutil.cpu_percent(interval=1) + "%" # return memory + cpu return GPU_memory + memory + cpu