def main(args): # get all paths print('chk1') GPUtil.showUtilization() # exit(0) model = GecBERTModel(vocab_path=args.vocab_path, model_paths=args.model_path, max_len=args.max_len, min_len=args.min_len, iterations=args.iteration_count, min_error_probability=args.min_error_probability, min_probability=args.min_error_probability, lowercase_tokens=args.lowercase_tokens, model_name=args.transformer_model, special_tokens_fix=args.special_tokens_fix, log=False, confidence=args.additional_confidence, is_ensemble=args.is_ensemble, weigths=args.weights, prune_amount=args.prune_amount, num_layers_to_keep=args.keep ) GPUtil.showUtilization() print('chk2') # print('model:', model) # exit(0) cnt_corrections = predict_for_file(args.input_file, args.output_file, model, batch_size=args.batch_size) # evaluate with m2 or ERRANT print(f"Produced overall corrections: {cnt_corrections}")
def configure_tf_devices(visible_ids=None): # Do nothing if no visible GPU IDs if not visible_ids or visible_ids[0] == -1: return try: deviceIDs = GPUtil.getAvailable(order='load', limit=100, maxLoad=0.5, maxMemory=0.5, includeNan=False, excludeID=[], excludeUUID=[]) except ValueError: cprint(NO_NVIDIA_GPUS, 'yellow') return deviceIDs = [id_ for id_ in deviceIDs if id_ in visible_ids] if not deviceIDs: cprint(NO_NVIDIA_GPUS, 'yellow') return if not deviceIDs: cprint( "Error: Currently, no GPU is eligible (available memory and load at <=50%)", "red") GPUtil.showUtilization() else: cprint( "GPUs with utilization and memory load <50%: {}".format(', '.join( [str(x) for x in deviceIDs])), "green") return deviceIDs
def on_epoch_end(self, epoch, logs={}): x, y = self.test_data loss, acc = self.model.evaluate(x, y, batch_size=2, verbose=0) self.test_loss.append(loss) self.test_acc.append(acc) GPUtil.showUtilization() print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))
def on_train_batch_begin(self, batch, logs=None): if (self.record == True): if (batch == 2 or batch == 20): with open('logs/BRNN_GPU_Utils.txt', 'a') as f: with contextlib.redirect_stdout(f): print('Batch {} Begin.'.format(batch)) GPUtil.showUtilization()
def on_epoch_begin(self, epoch, logs=None): if (epoch == 5): self.record = True with open('logs/BRNN_GPU_Utils.txt', 'a') as f: with contextlib.redirect_stdout(f): print('Epoch {} Begin.'.format(epoch)) GPUtil.showUtilization()
def get_dev(n=1, ok=range(8), mem=(0.1, 0.45), sleep=20): import GPUtil, time def _limit(devs, ok): devs = [int(dev) for dev in devs if int(dev) in ok] return devs def get_dev_one(mem): devs = GPUtil.getAvailable(order='memory', maxLoad=1, maxMemory=mem, limit=n) devs = _limit(devs, ok) if len(devs) >= n: logging.info('available {}'.format(devs)) return devs else: return [] logging.info('Auto select gpu') GPUtil.showUtilization() devs = [] while len(devs) < n: devs = get_dev_one(mem[0]) if devs: return devs devs = get_dev_one(mem[1]) if devs: return devs print('no enough device available') GPUtil.showUtilization() time.sleep(sleep)
def forward(self, x): print("Net10a GPU") GPUtil.showUtilization() print("Net10a pre assigned x: " + str(x)) x = self.features(x) # do not flatten print("Net10a post assigned x: " + str(x)) return x
def _make_layers(self, batch_norm=True): layers = [] in_channels = self.in_channels for tup in self.cfg: assert (len(tup) == 2) print("adding cluster layer") GPUtil.showUtilization() out, dilation = tup sz = self.conv_size stride = 1 pad = self.pad # to avoid shrinking if out == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] elif out == 'A': layers += [nn.AvgPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, out, kernel_size=sz, stride=stride, padding=pad, dilation=dilation, bias=False) if batch_norm: layers += [conv2d, nn.BatchNorm2d(out, track_running_stats=self.batchnorm_track), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = out return nn.Sequential(*layers)
def train(self): r"""Training function. """ # setup self.model.train() self.monitor.reset() self.optimizer.zero_grad() for iteration in range(self.total_iter_nums): iter_total = self.start_iter + iteration start = time.perf_counter() # load data batch = next(self.dataloader) volume, target = batch time1 = time.perf_counter() target_vis = target volume = volume.to(self.device, dtype=torch.float) volume = volume.unsqueeze(1) target = target[0].to(self.device, dtype=torch.long) target = target.squeeze(axis=1) pred = self.model(volume) pred_vis = pred.argmax(1) pred_vis = pred_vis.unsqueeze(0).to(self.device, dtype=torch.float) loss = self.criterion(pred, target) # compute gradient loss.backward() if (iteration+1) % self.cfg.SOLVER.ITERATION_STEP == 0: self.optimizer.step() self.optimizer.zero_grad() # logging and update record do_vis = self.monitor.update(self.lr_scheduler, iter_total, loss, self.optimizer.param_groups[0]['lr']) if do_vis: self.monitor.visualize(self.cfg, volume, target_vis, pred_vis, iter_total) # Display GPU stats using the GPUtil package. GPUtil.showUtilization(all=True) # Save model if (iter_total+1) % self.cfg.SOLVER.ITERATION_SAVE == 0: self.save_checkpoint(iter_total) # update learning rate self.lr_scheduler.step(loss) if self.cfg.SOLVER.LR_SCHEDULER_NAME == 'ReduceLROnPlateau' else self.lr_scheduler.step() end = time.perf_counter() print('[Iteration %05d] Data time: %.5f, Iter time: %.5f' % (iter_total, time1 - start, end - start)) # Release some GPU memory and ensure same GPU usage in the consecutive iterations according to # https://discuss.pytorch.org/t/gpu-memory-consumption-increases-while-training/2770 del loss, pred
def on_epoch_end(self, epoch, logs=None): if (self.record == True): self.record = False with open('logs/BRNN_GPU_Utils.txt', 'a') as f: with contextlib.redirect_stdout(f): print('Epoch {} End.'.format(epoch)) GPUtil.showUtilization() print('---------------')
def check_gpu_usage(): old_stdout = sys.stdout sys.stdout = mystdout = StringIO() GPUtil.showUtilization() sys.stdout = old_stdout gpu_usage = mystdout.getvalue().strip().split('|')[-2].strip() return gpu_usage
def main(args): pvutils.set_gpus_to_use(args) logging.info("Loading Config file: {}".format(args.config)) config = json.load(args.config) logdir = pvorg.get_logdir_name(project=config['pyvision']['project_name'], bench=args.bench, cfg_file=args.config, prefix=args.name, timestamp=args.timestamp) pvorg.init_logdir(config, args.config, logdir) logging.info("Model initialized in: ") logging.info(logdir) if args.wait: import GPUtil while GPUtil.getGPUs()[0].memoryUtil > 0.1: logging.info("GPU 0 is beeing used.") GPUtil.showUtilization() sleep(60) if args.debug or args.train: sfile = config['pyvision']['entry_point'] model_file = os.path.realpath( os.path.join(os.path.dirname(args.config), sfile)) assert (os.path.exists(model_file)) m = imp.load_source('model', model_file) mymodel = m.create_pyvision_model(config, logdir=logdir, debug=args.debug) if args.debug: restarts = 0 else: restarts = args.restarts pvutils.robust_training(mymodel, restarts=restarts, subprocess=False) # Do forward pass # img_var = Variable(sample['image']).cuda() # NOQA # prediction = mymodel(img_var) else: logging.info("Initializing only mode. [Try train.py --train ]") logging.info("To start training run:") logging.info(" pv2 train {} --gpus".format(logdir)) return logdir
def forward(self, x): results = [] for i in range(self.num_sub_heads): print("GPU pre head forward") GPUtil.showUtilization() x_i = self.heads[i](x) x_i = F.interpolate(x_i, size=self.input_sz, mode="bilinear") results.append(x_i) return results
def parse(self): if not self.initialized: self.initialize() self.opt = self.parser.parse_args() # === processing options === begin === # determine which GPU to use # auto, throw exception when no GPU is available if self.opt.gpu_ids == 'auto': GPUtil.showUtilization() deviceIDs = GPUtil.getAvailable(order='first', limit=4, maxLoad=0.5, maxMemory=0.5, excludeID=[], excludeUUID=[]) deviceID_costs = [-1*x for x in deviceIDs] # reorder the deviceID according to the computational capacity, i.e., total memory size # memory size is divided by 1000 without remainder, to avoid small fluctuation gpus = GPUtil.getGPUs() memory_size_costs = [-1*(gpu.memoryTotal//1000) for gpu in gpus if (gpu.load < 0.5 and gpu.memoryUtil < 0.5)] names = [gpu.name for gpu in gpus if (gpu.load < 0.5 and gpu.memoryUtil < 0.5)] sorted_idx = np.lexsort((deviceID_costs, memory_size_costs)) self.opt.gpu_ids = [deviceIDs[sorted_idx[0]]] print('### selected GPU PCI_ID: %d, Name: %s ###' % (self.opt.gpu_ids[0], names[sorted_idx[0]])) else: # split into integer list, manual or multi-gpu self.opt.gpu_ids = list(map(int, self.opt.gpu_ids.split(','))) self.opt.device = torch.device("cuda:%d" % self.opt.gpu_ids[0] if (torch.cuda.is_available() and len(self.opt.gpu_ids) >= 1) else "cpu") # cuda.select_device(self.opt.gpu_ids[0]) # torch.cuda.set_device(self.opt.gpu_ids[0]) # set unique display_id self.opt.display_id = int(self.opt.display_id + 100 * self.opt.gpu_ids[0]) # assure that 2d & 3d rot are not conflicting assert ((self.opt.rot_3d & self.opt.rot_horizontal) == False) # === processing options === end === args = vars(self.opt) print('------------ Options -------------') for k, v in sorted(args.items()): print('%s: %s' % (str(k), str(v))) print('-------------- End ----------------') # save to the disk expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) util.mkdirs(expr_dir) file_name = os.path.join(expr_dir, 'opt.txt') with open(file_name, 'wt') as opt_file: opt_file.write('------------ Options -------------\n') for k, v in sorted(args.items()): opt_file.write('%s: %s\n' % (str(k), str(v))) opt_file.write('-------------- End ----------------\n') return self.opt
def pick_device(): try: GPUtil.showUtilization() # Get the first available GPU DEVICE_ID_LIST = GPUtil.getFirstAvailable() DEVICE_ID = DEVICE_ID_LIST[0] # grab first element from list # Set CUDA_VISIBLE_DEVICES to mask out all other GPUs than the first available device id os.environ["CUDA_VISIBLE_DEVICES"] = str(DEVICE_ID) logging.debug('Device ID (unmasked): ' + str(DEVICE_ID)) except: logging.exception('Cannot detect GPUs')
def train(self): optimizer = torch.optim.Adam(itertools.chain(self.encoder.parameters(), self.out.parameters()), lr=self.config.learning_rate) criterion = torch.nn.CrossEntropyLoss() #torch.nn.MSELoss()] self.encoder.train() self.out.train() for e in range(1, self.config.epoch_size + 1): print(f'Start {e} epoch') for i, (content, target) in enumerate(self.train_loader): content = content.cuda() target = target.cuda() latent_feature = self.encoder(content) classification = self.out(latent_feature) loss = criterion(classification, target) optimizer.zero_grad() loss.backward() optimizer.step() if i % self.config.log_interval == 0: import GPUtil GPUtil.showUtilization() now = datetime.datetime.now() otherStyleTime = now.strftime("%Y-%m-%d %H:%M:%S") print(otherStyleTime) print('epoch: ', e, ' iter: ', i) print('loss:', loss.cpu().item()) self.encoder.eval() self.out.eval() pred = self.out(self.encoder(content)) pred = torch.argmax(pred, -1) acc = torch.sum((pred == target).float()) / target.shape[0] print('accuracy :', acc.item()) torch.save( { 'encoder': self.encoder.state_dict(), 'out': self.out.state_dict() }, f'{self.model_state_dir}/epoch_{e}-iter_{i}.pth') self.encoder.train() self.out.train()
def get_gpu_info(): """ :return: """ gpulist = [] GPUtil.showUtilization() # 获取多个GPU信息,存在列表 for gpu in Gpus: print('GPU.id:', gpu.id) print('GPU总量:', gpu.memoryTotal) print('GPU使用量:', gpu.memoryUsed) print('GPU使用占比:', gpu.memoryUtil * 100) print('GPU.id:', gpu.id) # 按GPU逐个添加信息 gpulist.append( [gpu.id, gpu.memoryTotal, gpu.memoryUsed, gpu.memoryUtil * 100]) """ 根据GPU负载以及显存使用量返回可用GPU_id列表 first: 返回的gpu可用id按升序排列 limit: 返回可用GPU的id数量 maxload: GPU负载率最大限制(超过该值,将不会返回) maxMemory: GPU显存使用率最大限制(超过该值,将不会返回) includeNan: 是否包括负载或内存使用为NaN的GPU excludeID: 排除的GPU_id列表 excludeUUID: 类似excludeID,将ID替换成UUID """ GPUavailable = GPUtil.getAvailable(order='first', limit=1, maxLoad=0.5, maxMemory=0.5, includeNan=False, excludeID=[], excludeUUID=[]) gpulist.append(GPUavailable) """ 根据GPU负载以及显存使用量返回第一个可用GPU_id,当无可用GPU时,将报错 getAvailable参数均可用,含义一致 attempts: 表示无法获取可用GPU时,尝试重复获取次数 interval: 表示每次获取可用GPU时,时间间隔(秒) verbose: 表示在获取到最佳可用GPU时,是否打印尝试次数 """ GPUfirstavailable = GPUtil.getFirstAvailable(order='first', attempts=1, interval=900, verbose=False) gpulist.append(GPUfirstavailable) return gpulist
def _initialize_weights(self, mode='fan_in'): for m in self.modules(): print("GPU pre module") GPUtil.showUtilization() if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode=mode, nonlinearity='relu') if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): assert (m.track_running_stats == self.batchnorm_track) m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_()
def test_render_rendering_cleaning(): for i in range(5): renderer = MeshRenderer(width=800, height=600) renderer.load_object( os.path.join(dir, 'mesh/bed1a77d92d64f5cbbaaae4feed64ec1_new.obj')) renderer.add_instance(0) renderer.set_camera([0, 0, 1.2], [0, 1, 1.2], [0, 1, 0]) renderer.set_fov(90) rgb, _, seg, _ = renderer.render() assert (np.allclose(np.mean(rgb, axis=(0, 1)), np.array([0.51661223, 0.5035339, 0.4777793, 1.]), rtol=1e-3)) GPUtil.showUtilization() renderer.release() GPUtil.showUtilization()
def run_fixed_lambda_bbcluster(train_cluster_data, val_cluster_data, test_cluster_data, output_path, train_batch_size, eval_steps, num_epochs, warmup_frac, lambda_val, reg, beta, loss_name, use_model_device, model_name='distilbert-base-uncased', out_features=256): task = Task.init(project_name='BB Clustering', task_name='bbclustering_fixed_lambda') config_dict = {'lambda_val': lambda_val, 'reg': reg} config_dict = task.connect(config_dict) if torch.cuda.is_available(): device = torch.device('cuda') print('CUDA is available and using device: '+str(device)) else: device = torch.device('cpu') print('CUDA not available, using device: '+str(device)) ### Configure sentence transformers for training and train on the provided dataset # Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings word_embedding_model = models.Transformer(model_name) # Apply mean pooling to get one fixed sized sentence vector pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_cls_token=False, pooling_mode_max_tokens=False) doc_dense_model = models.Dense(in_features=pooling_model.get_sentence_embedding_dimension(), out_features=out_features, activation_function=nn.Tanh()) model = CustomSentenceTransformer(modules=[word_embedding_model, pooling_model, doc_dense_model]) # model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) GPUtil.showUtilization() if loss_name == 'bbspec': loss_model = BBSpectralClusterLossModel(model=model, device=device, lambda_val=config_dict.get('lambda_val', lambda_val), reg_const=config_dict.get('reg', reg), beta=beta) else: loss_model = BBClusterLossModel(model=model, device=device, lambda_val=config_dict.get('lambda_val', lambda_val), reg_const=config_dict.get('reg', reg)) # reg_loss_model = ClusterDistLossModel(model=model) train_dataloader = DataLoader(train_cluster_data, shuffle=True, batch_size=train_batch_size) GPUtil.showUtilization() # train_dataloader2 = DataLoader(train_cluster_data, shuffle=True, batch_size=train_batch_size) evaluator = ClusterEvaluator.from_input_examples(val_cluster_data, use_model_device) test_evaluator = ClusterEvaluator.from_input_examples(test_cluster_data, use_model_device) GPUtil.showUtilization() warmup_steps = int(len(train_dataloader) * num_epochs * warmup_frac) # 10% of train data print("Raw BERT embedding performance") model.to(device) evaluator(model, output_path) GPUtil.showUtilization() # Train the model model.fit(train_objectives=[(train_dataloader, loss_model)], evaluator=evaluator, test_evaluator=test_evaluator, epochs=num_epochs, evaluation_steps=eval_steps, warmup_steps=warmup_steps, output_path=output_path)
def checkgpu(): '''check gpu availability and utilization''' card = gpu.getGPUs() isavailable = gpu.getAvailability(card, maxLoad=.6) print(time.ctime()) if isavailable == [1]: print("can mine") time.sleep(5) return 'isavailable' if isavailable == [0]: print("gpu in use") gpu.showUtilization() time.sleep(5) return 'notavailable'
def get_gpu_info(): ''' :return: ''' gpulist = [] GPUtil.showUtilization() for gpu in Gpus: print('gpu.id:', gpu.id) print('Total GPU:', gpu.memoryTotal) print('GPU usage:', gpu.memoryUsed) print('gpu Util percentage:', gpu.memoryUtil * 100) gpulist.append( [gpu.id, gpu.memoryTotal, gpu.memoryUsed, gpu.memoryUtil * 100]) return gpulist
def get_gpu_memory_owned(): try: gpu_usage = GPUtil.showUtilization() COMPONENTS_INFO["gpu_memory_owned"][ "message"] = f"GPU memory - Total: {'%.2f' % float(gpu_usage.memoryTotal / 1000)}GB" except UnboundLocalError: COMPONENTS_INFO["gpu_memory_owned"][ "message"] = "GPU memory - Total: None"
def get_gpu_info(): ''' :return: ''' gpulist = [] GPUtil.showUtilization() # 获取多个GPU的信息,存在列表里 for gpu in Gpus: print('gpu.id:', gpu.id) print('GPU总量:', gpu.memoryTotal) print('GPU使用量:', gpu.memoryUsed) print('gpu使用占比:', gpu.memoryUtil * 100) # 按GPU逐个添加信息 gpulist.append( [gpu.id, gpu.memoryTotal, gpu.memoryUsed, gpu.memoryUtil * 100]) return gpulist
def test_render_rendering_cleaning(): download_assets() test_dir = os.path.join(gibson2.assets_path, 'test') for i in range(5): renderer = MeshRenderer(width=800, height=600) renderer.load_object( os.path.join(test_dir, 'mesh/bed1a77d92d64f5cbbaaae4feed64ec1_new.obj')) renderer.add_instance(0) renderer.set_camera([0, 0, 1.2], [0, 1, 1.2], [0, 1, 0]) renderer.set_fov(90) rgb = renderer.render(('rgb'))[0] assert (np.sum(rgb, axis=(0, 1, 2)) > 0) GPUtil.showUtilization() renderer.release() GPUtil.showUtilization()
def _train_misc(self, loss, pred, volume, target, weight, iter_total, losses_vis): self.backward_pass(loss) # backward pass # logging and update record if hasattr(self, 'monitor'): do_vis = self.monitor.update(iter_total, loss, losses_vis, self.optimizer.param_groups[0]['lr']) if do_vis: self.monitor.visualize(volume, target, pred, weight, iter_total) if torch.cuda.is_available(): GPUtil.showUtilization(all=True) # Save model if (iter_total + 1) % self.cfg.SOLVER.ITERATION_SAVE == 0: self.save_checkpoint(iter_total) if (iter_total + 1) % self.cfg.SOLVER.ITERATION_VAL == 0: self.validate(iter_total) # update learning rate self.maybe_update_swa_model(iter_total) self.scheduler_step(iter_total, loss) if self.is_main_process: self.iter_time = time.perf_counter() - self.start_time self.total_time += self.iter_time avg_iter_time = self.total_time / (iter_total + 1 - self.start_iter) est_time_left = avg_iter_time * \ (self.total_iter_nums+self.start_iter-iter_total-1) / 3600.0 info = [ '[Iteration %05d]' % iter_total, 'Data time: %.4fs,' % self.data_time, 'Iter time: %.4fs,' % self.iter_time, 'Avg iter time: %.4fs,' % avg_iter_time, 'Time Left %.2fh.' % est_time_left ] print(' '.join(info)) # Release some GPU memory and ensure same GPU usage in the consecutive iterations according to # https://discuss.pytorch.org/t/gpu-memory-consumption-increases-while-training/2770 del volume, target, pred, weight, loss, losses_vis
def determine(args): def get_or_else(val, default): if val is not None: return val return default def generate_random(n): return ''.join(random.choice(string.ascii_lowercase) for _ in range(n)) description_fuscated = ''.join(ch for ch in args.description if ch.isalnum()).lower() base_name = int((args.max_len_name * 3) / 4) rest = int(args.max_len_name - base_name) custom_name = (''.join(description_fuscated.split(' '))[0:base_name] ) + "-" + generate_random(rest) model_name = get_or_else(args.model_name, custom_name) docker_image = get_or_else(args.docker_image, custom_name) description = args.description available_gpus = GPUtil.getAvailable(order='first', limit=1, maxLoad=0.2, maxMemory=0.2, includeNan=False) if len(available_gpus) == 0: print("Currently there is no gpu available, printing load") GPUtil.showUtilization() exit(EXIT_CODE) id_free_gpu = available_gpus[0] print('Determined starting arguments:') print(f'description = [{description}]') print(f'model_name = [{model_name}]') print(f'docker_image = [{docker_image}]') print(f'id_free_gpu = [{id_free_gpu}]') GPUtil.showUtilization() if not query_yes_no('Do you accept these?'): print("Right, start again [might want to --help then]") exit(EXIT_CODE) return (description_fuscated + '.' + model_name), model_name, docker_image, id_free_gpu
def get_gpu_memory_usage(): while True: try: gpu_usage = GPUtil.showUtilization() COMPONENTS_INFO["gpu_memory_usage"][ "message"] = f"Using: {'%.2f' % float(gpu_usage.memoryUsed / 1000)}GB ({'%.1f' % (float(gpu_usage.memoryUsed / gpu_usage.memoryTotal) * 100)}%)" except UnboundLocalError: COMPONENTS_INFO["gpu_memory_usage"]["message"] = "Using: None" break time.sleep(config.SLEEP_IN_SEC)
def run(clock): while True: print("Time: " + time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) p = psutil.Process(int(pid)) pinfo = p.as_dict(ad_value=psutil.AccessDenied) print bcolors.WARNING + "**Process CPU Info**" + bcolors.ENDC print "Memory: " + str_ntuple(pinfo['memory_full_info'], bytes2human=True) print "Memory %: " + str(p.memory_percent()) print "cpu time: " + str_ntuple(pinfo['cpu_times']) print "cpu %: " + str(p.cpu_percent(interval=1)) print bcolors.WARNING + "**Process GPU Info**" + bcolors.ENDC print "gpu index: " + gpu_index(pid) print "process type: " + gpu_type(pid) print "gpu Memory: " + gpu_mem(pid) print bcolors.WARNING + "**Global GPU Info**" + bcolors.ENDC GPUtil.showUtilization() print "" time.sleep(clock)
def get_gpu_usage(): while True: try: gpu_usage = GPUtil.showUtilization() COMPONENTS_INFO["gpu_usage"][ "message"] = f"GPU - Using: {'%.1f' % float(gpu_usage.load * 100)}%" except UnboundLocalError: print(termcolor.colored("Can't find GPU\n", "red")) COMPONENTS_INFO["gpu_usage"]["message"] = "GPU - Using: None" break time.sleep(config.SLEEP_IN_SEC)