def train(self, epoch): self.model.train() print("Epoch {0}/{1}".format(epoch, self.n_epochs)) t = tqdm(self.train_loader) loss_avg = 0.0 n = 1 for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] for batch_idx, (stokes, phys) in enumerate(t): stokes = stokes.to(self.device) phys = phys.to(self.device) self.optimizer.zero_grad() out_phys = self.model(stokes) # Loss loss = torch.mean(self.weights[None,:]*(out_phys-phys)**2) loss.backward() self.optimizer.step() loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) t.set_postfix(loss=loss_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory) self.loss.append(loss_avg)
def check_gpu_stat(): nvidia_smi.nvmlInit() deviceCount = nvmlDeviceGetCount() for i in range(deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) print(f'gpu{i}: {res.gpu}%, gpu-mem: {res.memory}%')
def test(self): self.model.eval() loss_L2_avg = 0.0 n = 1 t = tqdm(self.test_loader) for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] with torch.no_grad(): for batch_idx, (data, target) in enumerate(t): if self.cuda: data, target = data.to(self.device), target.to(self.device) output = self.model(data) # sum up batch loss loss_L2 = self.lossfn_L2(output, target) loss_L2_avg += (loss_L2.item() - loss_L2_avg) / n n += 1 self.loss_L2_val.append(loss_L2_avg) if self.cuda: tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) t.set_postfix(loss=loss_L2_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory) else: t.set_postfix(loss=loss_L2_avg, lr=current_lr)
def train(self, epoch): """ Train for one epoch """ # Set model in training mode self.model.train() print("Epoch {0}/{1}".format(epoch, self.n_epochs)) t = tqdm(self.train_loader) loss_avg = 0.0 # Get current learning rate for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] for batch_idx, (images, images_ft, variance) in enumerate(t): # Move all data to GPU/CPU images, images_ft, variance = images.to(self.device), images_ft.to( self.device), variance.to(self.device) # Zero the gradients in the optimizer self.optimizer.zero_grad() # Evaluate the model coeff, numerator, denominator, psf, psf_ft, loss = self.model( images, images_ft, variance) # Backpropagate loss.backward() if (batch_idx == 0): loss_avg = loss.item() else: loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg # Update the weights according to the optimizer self.optimizer.step() # Get GPU usage for printing gpu_usage = '' memory_usage = '' if (NVIDIA_SMI): tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) gpu_usage = gpu_usage + f' {tmp.gpu}' memory_usage = memory_usage + f' {tmp.memory}' t.set_postfix(loss=loss.item(), loss_avg=loss_avg, lr=current_lr, gpu=gpu_usage, mem=memory_usage) else: t.set_postfix(loss=loss.ite(), loss_avg=loss_avg, lr=current_lr) self.loss.append(loss_avg)
def train(self, epoch): self.model.train() print("Epoch {0}/{1}".format(epoch, self.n_epochs)) t = tqdm(self.train_loader) loss_avg = 0.0 n = 1 for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] for batch_idx, (inputs, outputs) in enumerate(t): inputs = inputs.to(self.device) outputs = outputs.to(self.device) self.optimizer.zero_grad() out = self.model(inputs) # Loss loss = self.loss_fn(out, outputs) loss.backward() self.optimizer.step() loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg if (NVIDIA_SMI): tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) t.set_postfix(loss=loss_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory) else: t.set_postfix(loss=loss_avg, lr=current_lr) self.loss.append(loss_avg)
def stats(self, n_iter): if self.eps is not None: self.log.add_scalar('eps', self.eps(), n_iter) if self.handle is not None: res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) self.log.add_scalar('nvidia/load', res.gpu, n_iter) res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) self.log.add_scalar('nvidia/mem_gb', res.used / (1024**3), n_iter)
def get(): handles = [] output = [] for device_id in nvidia_smi.nvmlDeviceGetCount(): handles.append(nvidia_smi.nvmlDeviceGetHandleByIndex(device_id)) for handle in handles: res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) output.append({'usage': res.gpu, 'memory': res.memory}) return output
def Available_GPUs(self): available = [] for i in range(self.total_gpus): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if res.gpu < 30 and (mem_res.used / mem_res.total * 100) < 30: available.append(i) return available
def get_usage(gpu_list=None, **kwargs): """ Track GPU memory utilization. """ _ = kwargs gpu_list = gpu_list or [0] nvidia_smi.nvmlInit() handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list] res = [ nvidia_smi.nvmlDeviceGetUtilizationRates(item) for item in handle ] return [item.memory for item in res]
def available_GPUs(total_gpus): available_gpus = [] for i in range(total_gpus): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) if res.gpu < 30 and ( mem_res.used / mem_res.total * 100 ) < 30: # Jon heuristically defines what it means for a GPU to be available available_gpus.append(i) return available_gpus
def on_train_batch_begin(self, batch, logs=None): nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) res1 = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) #GPUs = GPU.getGPUs() #gpu = GPUs[0] print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
def output(self, data_dict, n_iter): if self.handle is not None: res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) self.log.add_scalar('nvidia/load', res.gpu, n_iter) res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) self.log.add_scalar( 'nvidia/mem_gb', res.used / (1024 ** 3), n_iter) for key, val in data_dict.items(): if hasattr(val, 'shape') and np.prod(val.shape) > 1: self.log.add_histogram(key, val, n_iter) else: self.log.add_scalar(key, val, n_iter)
def run(data_loader, engine): batch_time = AverageMeter() gpu = AverageMeter() gpu_mem = AverageMeter() # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. input = torch.rand((args.batch_size, ) + ModelData.INPUT_SHAPE) # if data_loader != 0: with engine.create_execution_context() as context: end = time.time() # for i in range(args.loop): for i, (input, target) in enumerate(data_loader): if i == args.loop: break np.copyto(h_input, input.reshape(-1)) do_inference(context, h_input, d_input, h_output, d_output, stream) batch_time.update(time.time() - end) end = time.time() # https://pypi.org/project/py3nvml/ util_rate = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) # print(util_rate.gpu, util_rate.memory) gpu.update(util_rate.gpu) mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) gpu_mem.update(mem_info.used >> 20) print("predict:", h_output) print("predict: ", h_output.shape) print("target:", target) print("target:", target.shape) if i % args.print_freq == 0 and not args.csv: print( '[{}/{}] batch time {batch_time.val:.3f} s (avg:{batch_time.avg:.3f})' .format(i, args.loop, batch_time=batch_time)) # print summary if args.csv: print("{}, {:.3f}, {:.3f}, {:.3f}, {}".format( args.batch_size, args.loop * args.batch_size / batch_time.sum, batch_time.avg, gpu.avg, gpu_mem.avg)) else: print("batchsize: {} ".format(args.batch_size)) print("throughput: {:.3f} img/sec".format(args.loop * args.batch_size / batch_time.sum)) print("Latency: {:.3f} sec".format(batch_time.avg)) # see https://forums.fast.ai/t/show-gpu-utilization-metrics-inside-training-loop-without-subprocess-call/26594 # show gpu utilization metrics inside trianing loop print("GPU util: {:.3f} %, GPU mem: {} MiB".format( gpu.avg, gpu_mem.avg))
def train(self, epoch): self.model.train() print("Epoch {0}/{1}".format(epoch, self.n_epochs)) t = tqdm(self.train_loader) loss_avg = 0.0 n = 1 for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] for batch_idx, (Phi_split, surface, clouds, rho, d_split) in enumerate(t): Phi_split, surface, clouds, rho, d_split = Phi_split.to( self.device), surface.to(self.device), clouds.to( self.device), rho.to(self.device), d_split.to(self.device) self.optimizer.zero_grad() surf, clouds, out_surface, out_clouds = self.model(d_split, self.surf0, self.clouds0, Phi_split, rho, n_epochs=5) # Loss loss = 0.0 for i in range(self.K): loss += self.loss_fn(out_surface[i], surface) # loss += self.loss_fn(out_clouds[i], clouds) loss.backward() self.optimizer.step() if (batch_idx == 0): loss_avg = loss.item() else: loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg if (NVIDIA_SMI): tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) t.set_postfix(loss=loss_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory) else: t.set_postfix(loss=loss_avg, lr=current_lr) self.loss.append(loss_avg)
def cal_gpu_util(job): ct = 0 gpu = 0 nvidia_smi.nvmlInit() for key in job.gpus_loc.keys(): for i in job.gpus_loc[key]: ct += 1 handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) gpu += res.gpu if ct > 0: avg = gpu / ct return avg else: print('job no gpu') return 0
def get_SystemStats(process, NVIDIA_GPU): if NVIDIA_GPU: deviceCount = nvidia_smi.nvmlDeviceGetCount() gpu_memory = [] gpu_utilization = [] for i in range(0, deviceCount): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) gpu_stat = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) gpu_memory.append(gpu_stat.memory) gpu_utilization.append(gpu_stat.gpu) else: gpu_memory = [] gpu_utilization = [] sys_memory = process.memory_info()[0] / 2. ** 30 return gpu_memory, gpu_utilization, sys_memory
def get_gpu(): n_gpus = gs.n_gpus() G, M = [], [] for i in xrange(n_gpus): bus_id = gs.bus_id(i) h = nvidia_smi.nvmlDeviceGetHandleByPciBusId("0000:%d:00.0" % bus_id) memutil, gpuutil = [], [] for k in xrange(100): util = nvidia_smi.nvmlDeviceGetUtilizationRates(h) memutil.append(util.memory) gpuutil.append(util.gpu) time.sleep(.01) G.append(np.mean(gpuutil)) M.append(np.mean(memutil)) print "GPU Utilization:", G print "Mem Utilization:", M return np.argmin(2*np.array(G) + np.array(M))
def Waypoint(self): elapsedTime = time.perf_counter() - self.WaypointStartTime self.WaypointStartTime = time.perf_counter() memoryUsage = psutil.virtual_memory()[2] cpuUsage = psutil.cpu_percent() self.MemoryUsage.append(memoryUsage) self.CpuUsage.append(cpuUsage) self.ElapsedTime.append(elapsedTime) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) self.gpu_mem.append(mem_res.used / mem_res.total) self.gpu_usage.append(res.gpu) return elapsedTime, cpuUsage, memoryUsage, mem_res.used / mem_res.total, res.gpu
def getHardwareStatus(self): res = {} try: c_t = int(psutil.sensors_temperatures()['i350bb'][0].current) #c_t = 0 res = {'cpu':[int(psutil.cpu_percent()), int(psutil.virtual_memory().percent), c_t, len(self.camsList)]} if self.isGPU: if self.gpuInfo: for name in self.gpuInfo: index = self.gpuInfo[name] handle = nvidia_smi.nvmlDeviceGetHandleByIndex(index) r = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) #temperature = 0 temperature = nvidia_smi.nvmlDeviceGetTemperature(handle, nvidia_smi.NVML_TEMPERATURE_GPU) num = len(self.gpusActiveList[name].cams) res[name] = [int(r.gpu), int(r.memory), int(temperature), num] except: print("get hardware") print(sys.exc_info()) return res
def get_vals(self): # cmd = ['nvidia-settings', '-t', '-q', 'GPUUtilization'] # gpu_util = subprocess.check_output(cmd).strip().decode('utf-8').split(",") # gpu_util = dict([f.strip().split("=") for f in gpu_util]) # cmd[-1] = 'UsedDedicatedGPUMemory' # gpu_used_mem = subprocess.check_output(cmd).strip().decode('utf-8') nvidia_smi.nvmlInit() # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate self.gpu_handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) util_res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.gpu_handle) #mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle) # current_vals = {"gpu_mem_alloc": mem_res.used / (1024**2), "gpu_graphics_util": int(gpu_util['graphics']), # "gpu_mem_util": gpu_util['memory'], "time": time.time()} current_vals = { "gpu_graphics_util": float(util_res.gpu), "time": time.time() } return current_vals
def nvapi(): nvmlInit() ret = {} n_gpus = int(nvmlDeviceGetCount()) ret['n_gpus'] = n_gpus for i in range(n_gpus): gpu_str = '{}.'.format(i) gpu_obj = nvmlDeviceGetHandleByIndex(i) ret[gpu_str + 'temp'] = nvmlDeviceGetTemperature( gpu_obj, NVML_TEMPERATURE_GPU) this_ram = nvmlDeviceGetMemoryInfo(gpu_obj) ret[gpu_str + 'ram.used'] = this_ram.used / MB ret[gpu_str + 'ram.total'] = this_ram.total / MB ret[gpu_str + 'power.current'] = nvmlDeviceGetPowerUsage(gpu_obj) / 1000.0 ret[gpu_str + 'power.limit'] = nvmlDeviceGetEnforcedPowerLimit(gpu_obj) / 1.0 ret[gpu_str + 'util'] = nvmlDeviceGetUtilizationRates(gpu_obj).gpu / 1.0 nvmlShutdown() return ret
def run(model): batch_time = AverageMeter() gpu = AverageMeter() gpu_mem = AverageMeter() input = torch.rand((args.batch_size,) + ModelData.INPUT_SHAPE) with torch.no_grad(): end = time.time() for i in range(args.loop): input_cuda = input.cuda(non_blocking=True) model(input_cuda) torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # https://pypi.org/project/py3nvml/ util_rate = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) # print(util_rate.gpu, util_rate.memory) gpu.update(util_rate.gpu) mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) gpu_mem.update(mem_info.used >> 20) if i % args.print_freq == 0 and not args.csv: print('[{}/{}] batch time {batch_time.val:.3f} s (avg: {batch_time.avg:.3f})'.format( i, args.loop, batch_time=batch_time)) # print summary if args.csv: print("{}, {:.3f}, {:.3f}, {:.3f}, {}".format( args.batch_size, args.loop * args.batch_size / batch_time.sum, batch_time.avg, gpu.avg, gpu_mem.avg)) else: print("batchsize: {} ".format(args.batch_size)) print("throughput: {:.3f} img/sec".format(args.loop * args.batch_size / batch_time.sum)) print("Latency: {:.3f} sec".format(batch_time.avg)) # see https://forums.fast.ai/t/show-gpu-utilization-metrics-inside-training-loop-without-subprocess-call/26594 # show gpu utilization metrics inside trianing loop print("GPU util: {:.3f} %, GPU mem: {} MiB".format( gpu.avg, gpu_mem.avg))
def train(self, epoch): self.model.train() print("Epoch {0}/{1} - {2}".format(epoch, self.n_epochs, time.strftime("%Y_%m_%d-%H_%M_%S"))) t = tqdm(self.train_loader) loss_L2_avg = 0.0 n = 1 for param_group in self.optimizer.param_groups: current_lr = param_group['lr'] for batch_idx, (data, target) in enumerate(t): if self.cuda: data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss_L2 = self.lossfn_L2(output, target) loss_L2_avg += (loss_L2.item() - loss_L2_avg) / n n += 1 self.loss_L2.append(loss_L2_avg) loss_L2.backward() self.optimizer.step() if self.cuda: tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) t.set_postfix(loss=loss_L2_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory) else: t.set_postfix(loss=loss_L2_avg, lr=current_lr)
def show_GPU(self): res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) # print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%') return res.memory
#!/usr/bin/env python3 #coding: utf8 #author: Tian Xia ([email protected]) import nvidia_smi if __name__ == "__main__": nvidia_smi.nvmlInit() for gpu_id in range(8): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
def resource_util(pid, interval): ''' arg: pid: process id (int) example return: { 'pid': 24832, 'cpu': 0.0, 'mem_total': 3371, 'mem_shared': 502, 'mem_data': 3039, 'gpu_id': 0, 'gpu_mem': 5985.0, 'gpu_usage': 100, 'result': [24832, 0.0, 3371, 502, 3039, 0, 5985.0, 100] } ''' nvidia_smi.nvmlInit() # Get resources used by process p = psutil.Process(pid) usage = {'pid': pid} result = [pid] # cpu usage of current PID usage['cpu'] = p.cpu_percent(interval=interval) result.append(usage['cpu']) # Memory usage current PID mem = p.memory_info() # print(mem, type(mem)) usage['mem_total'] = mem.rss >> 20 result.append(usage['mem_total']) usage['mem_shared'] = mem.shared >> 20 result.append(usage['mem_shared']) usage['mem_data'] = mem.data >> 20 result.append(usage['mem_data']) for process in (nvsmi.get_gpu_processes()): # print(process.pid, process.gpu_id, process.used_memory) if process.pid == pid: usage['gpu_id'] = int(process.gpu_id) result.append(usage['gpu_id']) usage['gpu_mem'] = process.used_memory result.append(usage['gpu_mem']) handle = nvidia_smi.nvmlDeviceGetHandleByIndex(int(process.gpu_id)) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) usage[ 'gpu_usage'] = res.gpu # gpu utilization, may not only by this process result.append(usage['gpu_usage']) break else: usage['gpu_id'] = None result.append(usage['gpu_id']) usage['gpu_mem'] = None result.append(usage['gpu_mem']) usage[ 'gpu_usage'] = None # gpu utilization, may not only by this process result.append(usage['gpu_usage']) usage['result'] = result return usage
def run_scenario(self,path,path1): """ Trigger the start of the scenario and wait for it to finish/fail """ print("ScenarioManager: Running scenario {}".format(self.scenario_tree.name)) self.start_system_time = time.time() start_game_time = GameTime.get_time() total_risk_score = [] self._watchdog.start() self._running = True x = 0 while self._running: timestamp = None world = CarlaDataProvider.get_world() if world: snapshot = world.get_snapshot() if snapshot: timestamp = snapshot.timestamp if timestamp: self._tick_scenario(timestamp) x+=1 if(x%60==1): res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle) #print(f'mem: {mem_res.used / (1024**2)} (GiB)') # usage in GiB print(f'mem: {100 * (mem_res.used / mem_res.total):.3f}%') # percentage usage cpu = psutil.cpu_percent()#cpu utilization stats mem = psutil.virtual_memory()#virtual memory stats print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%') fields = ['GPU Utilization', 'GPU Memory', 'CPU Utilization', 'CPU Memory' ] dict = [{'GPU Utilization':res.gpu, 'GPU Memory':100 * (mem_res.used / mem_res.total), 'CPU Utilization':cpu, 'CPU Memory':100 * (mem.used/mem.total)}] file_exists = os.path.isfile(path1) with open(path1, 'a') as csvfile: # creating a csv dict writer object writer = csv.DictWriter(csvfile, fieldnames = fields) if not file_exists: writer.writeheader() writer.writerows(dict) #total_risk_score.append(risk_score) #print("--------------------------------------------------------------------------") #print("Average Risk Score:%f"%(float(sum(total_risk_score))/len(total_risk_score))) #print("--------------------------------------------------------------------------") self._watchdog.stop() self.end_system_time = time.time() end_game_time = GameTime.get_time() self.scenario_duration_system = self.end_system_time - \ self.start_system_time self.scenario_duration_game = end_game_time - start_game_time fields = ['Route Completed', 'Collisions' ] route_completed, collisions = self._console_message() dict = [{'Route Completed':route_completed, 'Collisions':collisions}] file_exists = os.path.isfile(path) with open(path, 'a') as csvfile: # creating a csv dict writer object writer = csv.DictWriter(csvfile, fieldnames = fields) if not file_exists: writer.writeheader() writer.writerows(dict)
def __get_gpu_info(self): def parse_unit(val, scale=1000): unit_ls = ['B', 'KB', 'MB', 'GB'] unit_lv = 0 while val >= scale: val /= scale unit_lv += 1 if unit_lv == len(unit_ls) - 1: break return '{:.2f} {}'.format(val, unit_ls[unit_lv]) sum_info = [] process_ls = [] nv.nvmlInit() gpu_num = nv.nvmlDeviceGetCount() # 遍历每块卡 for gpu_idx in range(gpu_num): h = nv.nvmlDeviceGetHandleByIndex(gpu_idx) dev_name = nv.nvmlDeviceGetName(h).decode() raw_total_mem = nv.nvmlDeviceGetMemoryInfo(h).total total_mem = parse_unit(raw_total_mem, 1024) raw_used_mem = nv.nvmlDeviceGetMemoryInfo(h).used used_mem = parse_unit(raw_used_mem, 1024) gpu_util = '{:.2f}'.format(nv.nvmlDeviceGetUtilizationRates(h).gpu) gpu_mem_util = '{:.2f}'.format(raw_used_mem * 100 / raw_total_mem) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['total_mem'] = total_mem tmp['used_mem'] = used_mem tmp['gpu_util'] = gpu_util tmp['gpu_mem_util'] = gpu_mem_util sum_info.append(tmp) running_process_obj_ls = nv.nvmlDeviceGetComputeRunningProcesses(h) for obj in running_process_obj_ls: process_pid = obj.pid process_type = 'C' process_raw_gpu_mem = obj.usedGpuMemory process_name = nv.nvmlSystemGetProcessName( process_pid).decode() ctan_name = self.get_ctan_name_by_pid(process_pid) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['process_pid'] = str(process_pid) tmp['process_type'] = process_type tmp['process_name'] = process_name tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024) tmp['ctan_name'] = ctan_name process_ls.append(tmp) running_process_obj_ls = nv.nvmlDeviceGetGraphicsRunningProcesses( h) for obj in running_process_obj_ls: process_pid = obj.pid process_type = 'G' process_raw_gpu_mem = obj.usedGpuMemory process_name = nv.nvmlSystemGetProcessName( process_pid).decode() ctan_name = self.get_ctan_name_by_pid(process_pid) tmp = {} tmp['gpu_idx'] = str(gpu_idx) tmp['dev_name'] = dev_name tmp['process_pid'] = str(process_pid) tmp['process_type'] = process_type tmp['process_name'] = process_name tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024) tmp['ctan_name'] = ctan_name process_ls.append(tmp) return sum_info, process_ls
def run(self): import random self.time_step = 0.01 counter = 0 print_counter = 0 while (self.running): res = [] for i in range(self.deviceCount): res.append( nvidia_smi.nvmlDeviceGetUtilizationRates(self.GPUs[i])) # Print every self.print_time # if print_counter == int(self.print_time / self.time_step): # Print current # if self.print_current: s = "\t[GPU] " for i in range(self.deviceCount): s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % ( i, nvmlDeviceGetName( self.GPUs[i]), res[i].gpu, res[i].memory) logging.info(s) # Print avg # if self.print_time < 60: logging.info( "\n[GPU] Occupation over the last %d seconds" % self.print_time) else: minutes = self.print_time // 60 seconds = self.print_time % 60 logging.info( "\n[GPU] Occupation over the last %d minutes, %d seconds" % (minutes, seconds)) s = "[GPU] " for i in range(self.deviceCount): self.occAvgStep[i] /= (print_counter * self.time_step) self.memAvgStep[i] /= (print_counter * self.time_step) s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % ( i, nvmlDeviceGetName(self.GPUs[i]), self.occAvgStep[i], self.memAvgStep[i]) # Reinitialize average # self.occAvgStep[i] = 0 self.memAvgStep[i] = 0 logging.info(s) # reset printing counter # print_counter = 0 # Add to total and step # for i in range(self.deviceCount): self.occAvgTot[i] += res[i].gpu * self.time_step self.occAvgStep[i] += res[i].gpu * self.time_step self.memAvgTot[i] += res[i].memory * self.time_step self.memAvgStep[i] += res[i].memory * self.time_step # Sleep and counters # print_counter += 1 counter += 1 sleep(self.time_step) # Print total # logging.info("[GPU] Average occupation over whole period") s = "[GPU] " for i in range(self.deviceCount): self.occAvgTot[i] /= (counter * self.time_step) self.memAvgTot[i] /= (counter * self.time_step) s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % ( i, nvmlDeviceGetName( self.GPUs[i]), self.occAvgTot[i], self.memAvgTot[i]) logging.info(s)
def gpu_mem_usage(): return nvidia_smi.nvmlDeviceGetUtilizationRates(handle).memory / 100
def inference(): useGpu = True fileName = "run_" if not useGpu: fileName += "cpu_" device = torch.device( "cuda:0" if useGpu and torch.cuda.is_available() else "cpu") os.makedirs('results', exist_ok=True) f = open( "results/" + fileName + str(int(round(time.time() * 1000))) + ".txt", "w+") f.write('=== Start time: ' + str(datetime.now()) + '\n') p = psutil.Process(os.getpid()) nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) model = RedNet_model.RedNet(pretrained=False) load_ckpt(model, None, args.last_ckpt, device) model.eval() model.to(device) print('Starting list image files') filesCount = 0 files = glob.glob("datasets/mestrado/**/rgb/*.png", recursive=True) files.extend(glob.glob("datasets/mestrado/**/rgb/*.jpg", recursive=True)) cpuTimes = [0.0, 0.0, 0.0, 0.0] gpuTimes = 0.0 gpuMemTimes = 0.0 maxNumThreads = 0 memUsageTimes = 0 for imagePath in files: print('imagePath: ' + imagePath) pathRgb = Path(imagePath) datasetName = osp.basename(str(pathRgb.parent.parent)) # print('datasetName: ' + datasetName) parentDatasetDir = str(pathRgb.parent.parent) # print('parentDatasetDir: ' + parentDatasetDir) depthImageName = os.path.basename(imagePath).replace('jpg', 'png') image = imageio.imread(imagePath) depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName) if datasetName == "active_vision" or datasetName == "putkk": image = image[0:1080, 240:1680] depth = depth[0:1080, 240:1680] elif datasetName == "semantics3d_mod": image = image[270:1080, 0:1080] depth = depth[270:1080, 0:1080] elif datasetName == "semantics3d_raw": image = image[64:1024, 0:1280] depth = depth[64:1024, 0:1280] # Bi-linear image = skimage.transform.resize(image, (image_h, image_w), order=1, mode='reflect', preserve_range=True) # Nearest-neighbor depth = skimage.transform.resize(depth, (image_h, image_w), order=0, mode='reflect', preserve_range=True) image = image / 255 image = torch.from_numpy(image).float() depth = torch.from_numpy(depth).float() image = image.permute(2, 0, 1) depth.unsqueeze_(0) image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image) depth = torchvision.transforms.Normalize(mean=[19050], std=[9650])(depth) image = image.to(device).unsqueeze_(0) depth = depth.to(device).unsqueeze_(0) pred = model(image, depth) res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) curGpuTime = res.gpu #curGpuMemTime = res.memory #(in percent) curGpuMemTime = mem_res.used / 1e+6 gpuTimes += curGpuTime gpuMemTimes += curGpuMemTime f.write('GPU Usage Percent: ' + str(curGpuTime) + '\n') f.write('GPU Mem Usage (MB)): ' + str(curGpuMemTime) + '\n') curProcessCpuPerU = p.cpu_percent() curCpusPerU = psutil.cpu_percent(interval=None, percpu=True) # gives a single float value for i in range(len(cpuTimes)): curProcessCpu = curProcessCpuPerU curCpu = curCpusPerU[i] cpuTimes[i] += curCpu f.write('Process CPU Percent: ' + str(curProcessCpu) + ' --- CPU Percent: ' + str(curCpu) + '\n') # you can convert that object to a dictionary memInfo = dict(p.memory_full_info()._asdict()) curMemUsage = memInfo['uss'] memUsageTimes += curMemUsage f.write('Process memory usage: ' + str(curMemUsage / 1e+6) + '\n') f.write('Memory information: ' + str(memInfo) + '\n') if maxNumThreads < p.num_threads(): maxNumThreads = p.num_threads() # print('############## Index: ') # print(index) os.makedirs('results/' + datasetName, exist_ok=True) output = utils.to_label(torch.max(pred, 1)[1] + 1) #output = utils.to_label(torch.max(pred, 1)[1] + 1)[0] #imageio.imsave('results/' + datasetName + '/' + depthImageName, output.cpu().numpy().transpose((1, 2, 0))) #imageio.imsave('results/' + datasetName + '/' + depthImageName, output) lbl_pil = PIL.Image.fromarray(output.astype(np.uint8), mode='P') lbl_pil.save('results/' + datasetName + '/' + depthImageName) filesCount = filesCount + 1 del image, depth, pred, output torch.cuda.empty_cache() nvidia_smi.nvmlShutdown() start = time.time() for imagePath in files: pathRgb = Path(imagePath) datasetName = osp.basename(str(pathRgb.parent.parent)) parentDatasetDir = str(pathRgb.parent.parent) depthImageName = os.path.basename(imagePath).replace('jpg', 'png') image = imageio.imread(imagePath) depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName) if datasetName == "active_vision" or datasetName == "putkk": image = image[0:1080, 240:1680] depth = depth[0:1080, 240:1680] elif datasetName == "semantics3d_mod": image = image[270:1080, 0:1080] depth = depth[270:1080, 0:1080] elif datasetName == "semantics3d_raw": image = image[64:1024, 0:1280] depth = depth[64:1024, 0:1280] # Bi-linear image = skimage.transform.resize(image, (image_h, image_w), order=1, mode='reflect', preserve_range=True) # Nearest-neighbor depth = skimage.transform.resize(depth, (image_h, image_w), order=0, mode='reflect', preserve_range=True) image = image / 255 image = torch.from_numpy(image).float() depth = torch.from_numpy(depth).float() image = image.permute(2, 0, 1) depth.unsqueeze_(0) image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image) depth = torchvision.transforms.Normalize(mean=[19050], std=[9650])(depth) image = image.to(device).unsqueeze_(0) depth = depth.to(device).unsqueeze_(0) pred = model(image, depth) del image, depth, pred #torch.cuda.empty_cache() end = time.time() f.write('=== Mean GPU Usage Percent: ' + str(gpuTimes / filesCount) + '\n') f.write('=== Mean GPU Mem Usage (MB): ' + str(gpuMemTimes / filesCount) + '\n') for i in range(len(cpuTimes)): f.write("=== Mean cpu" + str(i) + " usage: " + str(cpuTimes[i] / filesCount) + '\n') f.write("=== Mean memory usage (MB): " + str((memUsageTimes / filesCount) / 1e+6) + '\n') f.write("=== Total image predicted: " + str(filesCount) + '\n') f.write("=== Seconds per image: " + str(((end - start) / filesCount)) + '\n') f.write("=== Max num threads: " + str(maxNumThreads) + '\n') f.write('=== End time: ' + str(datetime.now()) + '\n') f.close()