def get_gpu_memory(): import nvidia_smi nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Used GPU memory: {}%".format((info.used * 100) // info.total)) nvidia_smi.nvmlShutdown()
def get_mem_info(device_id): gpu_list = [device_id] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res[0]
def print_gpu_info(idx=0): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(idx) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Total memory:", info.total) print("Free memory:", info.free) print("Used memory:", info.used) nvidia_smi.nvmlShutdown()
def get_usage(gpu_list=None, **kwargs): """ Track GPU memory usage. """ _ = kwargs gpu_list = gpu_list or [0] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [nvidia_smi.nvmlDeviceGetMemoryInfo(item) for item in handle] res = [100 * item.used / item.total for item in res] nvidia_smi.nvmlShutdown() return res
def use_gpu(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) nvidia_smi.nvmlShutdown() if info.used > 1000000000: return True else: return False
def gpu_usage(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) usage = info.used nvidia_smi.nvmlShutdown() return usage
def check_cuda_memory(): nvidia_smi.nvmlInit() deviceCount = nvidia_smi.nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)"\ .format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, \ info.total, info.free, info.used)) nvidia_smi.nvmlShutdown() return
def get_device(gpuID=False): """Checks available GPUs and selects the one with the most available memory Parameters ---------- gpuID: bool or int whether to use GPU, or the device ID of a specific GPU to use. If False, use only CPU. If True, attempts to find the GPU with most available memory. Returns ------- device : jax.device handle to gpu or cpu device selected """ import jax if gpuID is False: return jax.devices('cpu')[0] try: gpus = jax.devices('gpu') # did the user request a specific GPU? if isinstance(gpuID, int) and gpuID < len(gpus): return gpus[gpuID] if isinstance(gpuID, int): from desc.backend import TextColors # ID was not valid warnings.warn( TextColors.WARNING + 'gpuID did not match any found devices, trying default gpu option' + TextColors.ENDC) # find all available options and see which has the most space import nvidia_smi nvidia_smi.nvmlInit() maxmem = 0 gpu = gpus[0] for i in range(len(gpus)): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if info.free > maxmem: maxmem = info.free gpu = gpus[i] nvidia_smi.nvmlShutdown() return gpu except: from desc.backend import TextColors warnings.warn(TextColors.WARNING + 'No GPU found, falling back to CPU' + TextColors.ENDC) return jax.devices('cpu')[0]
def get_free_gpu_mem(gpu_index): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) # logging.info("Total GPU memory: {}".format(info.total)) # logging.info("Free GPU memory: {}".format(info.free)) # logging.info("Used GPU memory: {}".format(info.used)) nvidia_smi.nvmlShutdown() return info.free
def kill(self): self.log.debug("GPUsmanager try to stop") self._stopevent.set() nvidia_smi.nvmlShutdown() for id in list(self.gpusActiveList): try: self.log.debug("GPUsmanager try stop gpu: "+ str(id)) self.gpusActiveList[id].kill() except: self.log.debug('GPUsmanager Erorr stop gpu: '+ str(id)) time.sleep(4) for id in list(self.gpusActiveList): try: ready = self.gpusActiveList[id].id except: self.log.debug('GPUsmanager Eroro stop gpu: '+ str(id)) del self.gpusActiveList[id] if len(self.gpusActiveList) == 0: self.log.debug('GPUsmanager No active gpus now...') else: self.log.error('GPUsmanager Still active gpus: '+ " ".join([str(key) for key in self.gpusActiveList.keys()]))
def nvapi(): nvmlInit() ret = {} n_gpus = int(nvmlDeviceGetCount()) ret['n_gpus'] = n_gpus for i in range(n_gpus): gpu_str = '{}.'.format(i) gpu_obj = nvmlDeviceGetHandleByIndex(i) ret[gpu_str + 'temp'] = nvmlDeviceGetTemperature( gpu_obj, NVML_TEMPERATURE_GPU) this_ram = nvmlDeviceGetMemoryInfo(gpu_obj) ret[gpu_str + 'ram.used'] = this_ram.used / MB ret[gpu_str + 'ram.total'] = this_ram.total / MB ret[gpu_str + 'power.current'] = nvmlDeviceGetPowerUsage(gpu_obj) / 1000.0 ret[gpu_str + 'power.limit'] = nvmlDeviceGetEnforcedPowerLimit(gpu_obj) / 1.0 ret[gpu_str + 'util'] = nvmlDeviceGetUtilizationRates(gpu_obj).gpu / 1.0 nvmlShutdown() return ret
log.info('%s gen = %s', arch, genotype) if genotype in trained_gen.values(): same_arch = list(trained_gen.keys())[list( trained_gen.values()).index(genotype)] log.info( "Skipping arch %s because its genotype was already trained by %s", arch, same_arch) else: trained_gen[arch] = genotype batch_size = calculate_batch_size(batch_model, gpu_memory, genotype, args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary) args.arch = arch trained = False while not trained: try: args.batch_size = batch_size train.main(args) trained = True except RuntimeError as e: if "out of memory" in str(e) and batch_size > 5: log.error(e) batch_size -= 5 log.info( f"Re trying to train with smaller batch size of {batch_size}" ) else: raise e nvidia_smi.nvmlShutdown()
def inference(): useGpu = True fileName = "run_" if not useGpu: fileName += "cpu_" device = torch.device( "cuda:0" if useGpu and torch.cuda.is_available() else "cpu") os.makedirs('results', exist_ok=True) f = open( "results/" + fileName + str(int(round(time.time() * 1000))) + ".txt", "w+") f.write('=== Start time: ' + str(datetime.now()) + '\n') p = psutil.Process(os.getpid()) nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) model = RedNet_model.RedNet(pretrained=False) load_ckpt(model, None, args.last_ckpt, device) model.eval() model.to(device) print('Starting list image files') filesCount = 0 files = glob.glob("datasets/mestrado/**/rgb/*.png", recursive=True) files.extend(glob.glob("datasets/mestrado/**/rgb/*.jpg", recursive=True)) cpuTimes = [0.0, 0.0, 0.0, 0.0] gpuTimes = 0.0 gpuMemTimes = 0.0 maxNumThreads = 0 memUsageTimes = 0 for imagePath in files: print('imagePath: ' + imagePath) pathRgb = Path(imagePath) datasetName = osp.basename(str(pathRgb.parent.parent)) # print('datasetName: ' + datasetName) parentDatasetDir = str(pathRgb.parent.parent) # print('parentDatasetDir: ' + parentDatasetDir) depthImageName = os.path.basename(imagePath).replace('jpg', 'png') image = imageio.imread(imagePath) depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName) if datasetName == "active_vision" or datasetName == "putkk": image = image[0:1080, 240:1680] depth = depth[0:1080, 240:1680] elif datasetName == "semantics3d_mod": image = image[270:1080, 0:1080] depth = depth[270:1080, 0:1080] elif datasetName == "semantics3d_raw": image = image[64:1024, 0:1280] depth = depth[64:1024, 0:1280] # Bi-linear image = skimage.transform.resize(image, (image_h, image_w), order=1, mode='reflect', preserve_range=True) # Nearest-neighbor depth = skimage.transform.resize(depth, (image_h, image_w), order=0, mode='reflect', preserve_range=True) image = image / 255 image = torch.from_numpy(image).float() depth = torch.from_numpy(depth).float() image = image.permute(2, 0, 1) depth.unsqueeze_(0) image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image) depth = torchvision.transforms.Normalize(mean=[19050], std=[9650])(depth) image = image.to(device).unsqueeze_(0) depth = depth.to(device).unsqueeze_(0) pred = model(image, depth) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) curGpuTime = res.gpu #curGpuMemTime = res.memory #(in percent) curGpuMemTime = mem_res.used / 1e+6 gpuTimes += curGpuTime gpuMemTimes += curGpuMemTime f.write('GPU Usage Percent: ' + str(curGpuTime) + '\n') f.write('GPU Mem Usage (MB)): ' + str(curGpuMemTime) + '\n') curProcessCpuPerU = p.cpu_percent() curCpusPerU = psutil.cpu_percent(interval=None, percpu=True) # gives a single float value for i in range(len(cpuTimes)): curProcessCpu = curProcessCpuPerU curCpu = curCpusPerU[i] cpuTimes[i] += curCpu f.write('Process CPU Percent: ' + str(curProcessCpu) + ' --- CPU Percent: ' + str(curCpu) + '\n') # you can convert that object to a dictionary memInfo = dict(p.memory_full_info()._asdict()) curMemUsage = memInfo['uss'] memUsageTimes += curMemUsage f.write('Process memory usage: ' + str(curMemUsage / 1e+6) + '\n') f.write('Memory information: ' + str(memInfo) + '\n') if maxNumThreads < p.num_threads(): maxNumThreads = p.num_threads() # print('############## Index: ') # print(index) os.makedirs('results/' + datasetName, exist_ok=True) output = utils.to_label(torch.max(pred, 1)[1] + 1) #output = utils.to_label(torch.max(pred, 1)[1] + 1)[0] #imageio.imsave('results/' + datasetName + '/' + depthImageName, output.cpu().numpy().transpose((1, 2, 0))) #imageio.imsave('results/' + datasetName + '/' + depthImageName, output) lbl_pil = PIL.Image.fromarray(output.astype(np.uint8), mode='P') lbl_pil.save('results/' + datasetName + '/' + depthImageName) filesCount = filesCount + 1 del image, depth, pred, output torch.cuda.empty_cache() nvidia_smi.nvmlShutdown() start = time.time() for imagePath in files: pathRgb = Path(imagePath) datasetName = osp.basename(str(pathRgb.parent.parent)) parentDatasetDir = str(pathRgb.parent.parent) depthImageName = os.path.basename(imagePath).replace('jpg', 'png') image = imageio.imread(imagePath) depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName) if datasetName == "active_vision" or datasetName == "putkk": image = image[0:1080, 240:1680] depth = depth[0:1080, 240:1680] elif datasetName == "semantics3d_mod": image = image[270:1080, 0:1080] depth = depth[270:1080, 0:1080] elif datasetName == "semantics3d_raw": image = image[64:1024, 0:1280] depth = depth[64:1024, 0:1280] # Bi-linear image = skimage.transform.resize(image, (image_h, image_w), order=1, mode='reflect', preserve_range=True) # Nearest-neighbor depth = skimage.transform.resize(depth, (image_h, image_w), order=0, mode='reflect', preserve_range=True) image = image / 255 image = torch.from_numpy(image).float() depth = torch.from_numpy(depth).float() image = image.permute(2, 0, 1) depth.unsqueeze_(0) image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image) depth = torchvision.transforms.Normalize(mean=[19050], std=[9650])(depth) image = image.to(device).unsqueeze_(0) depth = depth.to(device).unsqueeze_(0) pred = model(image, depth) del image, depth, pred #torch.cuda.empty_cache() end = time.time() f.write('=== Mean GPU Usage Percent: ' + str(gpuTimes / filesCount) + '\n') f.write('=== Mean GPU Mem Usage (MB): ' + str(gpuMemTimes / filesCount) + '\n') for i in range(len(cpuTimes)): f.write("=== Mean cpu" + str(i) + " usage: " + str(cpuTimes[i] / filesCount) + '\n') f.write("=== Mean memory usage (MB): " + str((memUsageTimes / filesCount) / 1e+6) + '\n') f.write("=== Total image predicted: " + str(filesCount) + '\n') f.write("=== Seconds per image: " + str(((end - start) / filesCount)) + '\n') f.write("=== Max num threads: " + str(maxNumThreads) + '\n') f.write('=== End time: ' + str(datetime.now()) + '\n') f.close()
def __exit__(self, exc_type, exc_val, exc_tb): nvidia_smi.nvmlShutdown()
def get_gpu_memory(): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) nvidia_smi.nvmlShutdown() return info