def gpu_info(): """Return a list of namedtuples representing attributes of each GPU device. """ GPUInfo = namedtuple('GPUInfo', ['name', 'driver', 'totalmem', 'freemem']) gpus = GPUtil.getGPUs() info = [] for g in gpus: info.append(GPUInfo(g.name, g.driver, g.memoryTotal, g.memoryFree)) return info
def get_sweep_cmds(yaml_file): configs = load_from_yaml(yaml_file) base_cmd = configs['cmd'] hparams = configs['hparams'] hparams_combo = get_hparams_combo(hparams) cmds = [] for idx, hps in enumerate(hparams_combo): cmd = base_cmd + ' ' + cmd_for_hparams(hps) cmds.append(cmd) all_gpus_stats = GPUtil.getGPUs() exclude_gpus = configs['exclude_gpus'] gpu_mem_per_job = configs['gpu_memory_per_job'] gpu_mem_pct_per_job = float( gpu_mem_per_job) / all_gpus_stats[0].memoryTotal if exclude_gpus == 'None': exclude_gpus = [] gpus_to_use = GPUtil.getAvailable(order='first', limit=100, maxLoad=0.8, maxMemory=1 - gpu_mem_pct_per_job, includeNan=False, excludeID=exclude_gpus, excludeUUID=[]) num_exps = len(cmds) gpus_free_mem = [all_gpus_stats[x].memoryFree for x in gpus_to_use] sorted_gpu_ids = np.argsort(gpus_free_mem)[::-1] allowable_gpu_jobs = [ int(math.floor(x / gpu_mem_per_job)) for x in gpus_free_mem ] jobs_run_on_gpu = [0 for i in range(len(gpus_to_use))] can_run_on_gpu = [True for i in range(len(gpus_to_use))] gpu_id = 0 final_cmds = [] for idx in range(num_exps): if not any(can_run_on_gpu): logger.warning(f'Run out of GPUs!') break sorted_gpu_id = sorted_gpu_ids[gpu_id] while not can_run_on_gpu[sorted_gpu_id]: gpu_id = (gpu_id + 1) % len(gpus_to_use) sorted_gpu_id = sorted_gpu_ids[gpu_id] final_cmds.append(cmds[idx] + f' --device=cuda:{gpus_to_use[sorted_gpu_id]}') jobs_run_on_gpu[sorted_gpu_id] += 1 can_run_on_gpu[sorted_gpu_id] = jobs_run_on_gpu[ sorted_gpu_id] < allowable_gpu_jobs[sorted_gpu_id] gpu_id = (gpu_id + 1) % len(gpus_to_use) return final_cmds
def nvidia_measure(self, host_rank): GPUs = GPU.getGPUs() if len(GPUs) > 1: gpu_host = int(host_rank) gpu = GPUs[gpu_host] else: gpu_host = int(os.environ['SM_CURRENT_HOST'].split('-')[1]) - 1 gpu = GPUs[0] gpu_perform = [ gpu_host, gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil * 100, gpu.memoryTotal ] return gpu_perform
def hook(self): logging.debug("Registering resources utilization") GPUs = GPUtil.getGPUs() gpu = GPUs[0] utilization_metrics = { "colab_GPU_mem_free": gpu.memoryFree, "colab_GPU_mem_used": gpu.memoryUsed, "colab_GPU_mem_util_percentage": gpu.memoryUtil * 100, "colab_GPU_mem_total": gpu.memoryTotal, "colab_RAM_used_percentage": psutil.virtual_memory().percent, "colab_RAM_total_MB": psutil.virtual_memory().total / (1024 * 1024), } self.monitor("pull_metrics", utilization_metrics)
def _f(self): """ Until a stop signal is encountered, this function monitors each `every` seconds the maximum amount of GPU used by the process """ start_time = time.monotonic() while not self.stop_f: # GPU percentage gpu_perc = GPUtil.getGPUs()[self.gpu_id].load * 100 if gpu_perc > self.max_usage: self.max_usage = gpu_perc time.sleep(self.every - ((time.monotonic() - start_time) % self.every))
def printm(): GPUs = GPU.getGPUs() # XXX: only one GPU on Colab and isn’t guaranteed gpu = GPUs[0] process = psutil.Process(os.getpid()) print( "Gen RAM Free: " + humanize.naturalsize(psutil.virtual_memory().available), " | Proc size: " + humanize.naturalsize(process.memory_info().rss)) print( "GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB" .format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil * 100, gpu.memoryTotal))
def get_available_device(max_memory=0.49): GPUs = GPUtil.getGPUs() freeMemory = 0 available = -1 for GPU in GPUs: # print('GPU.memoryUtil type =>', type(GPU.memoryUtil)) # print('max_memory type =>', type(max_memory)) if GPU.memoryUtil > max_memory: continue if GPU.memoryFree >= freeMemory: freeMemory = GPU.memoryFree available = GPU.id return available
def GetDynamicStatsDict( self): # Gets Dynamic Stats And Puts Them Into A Dictionary # # Get CPU Realtime Statistics # CPUInfo = psutil.cpu_freq() self.SystemHardware.update({'CPUFrequency': CPUInfo.current}) CPUUsage = [] for _, Percent in enumerate(psutil.cpu_percent(percpu=True, interval=1)): CPUUsage.append(Percent) self.SystemHardware.update({'CPUUsage': CPUUsage}) # Get Memory Info # PhysRAM = psutil.virtual_memory() Swap = psutil.swap_memory() self.SystemHardware.update({'RAMUsage': PhysRAM.used}) self.SystemHardware.update({'RAMFree': PhysRAM.free}) self.SystemHardware.update({'RAMPercent': PhysRAM.percent}) self.SystemHardware.update({'SWAPFree': Swap.free}) self.SystemHardware.update({'SWAPUsage': Swap.used}) self.SystemHardware.update({'SWAPPercent': Swap.percent}) # Get Realtime Network Info # NetInfo = psutil.net_io_counters() self.SystemHardware.update({'BytesSent': NetInfo.bytes_sent}) self.SystemHardware.update({'BytesRecv': NetInfo.bytes_recv}) # Get RealTime GPU Info # GPUs = GPUtil.getGPUs() GPUUsage = [] GPUMem = [] GPUTemps = [] for GPU in GPUs: GPUUsage.append(GPU.load * 100) GPUMem.append(GPU.memoryFree) GPUTemps.append(GPU.temperature) self.SystemHardware.update({'GPUUsage': GPUUsage}) self.SystemHardware.update({'GPUMem': GPUMem}) self.SystemHardware.update({'GPUTemps': GPUTemps})
def run(self): #disk_previo=0 while not self.stopped: #disk_previo=0 #gpu.showUtilization() gpu_usage = 0.0 gpus=gpu.getGPUs() for GPU in gpus: gpu_usage = GPU.load*100 gpu_memory = GPU.memoryUtil*100 #print("ID de GPU: ",GPU.id) #print("Carga de GPU %: ",GPU.load*100) #print("mem GPU %: ",GPU.memoryUtil*100) cpu_usage = psutil.cpu_percent() memory_usage = psutil.virtual_memory().percent disk_usage = psutil.disk_usage('/').percent #disk_status_w = psutil.disk_io_counters().write_bytes #disk_status=(disk_status_w-disk_previo)/(self.delay*10**6) #print("jiro") if cpu_usage >= 90.0: report["cpu"] = (True,cpu_usage) if gpu_usage >= 90.0: report["gpu"] = (True,gpu_usage) if memory_usage >= 90.0: report["ram"] = (True,memory_usage) if disk_usage >= 90.0: report["disk"] = (True,disk_usage) cout = "" for key in report: if True in report[key]: cout = f"Status not OK \n {key} usage at {report[key][1]}%" else: cout = "Todo sólido" print(cout) #print("cpu %: ",cpu_usage) #print("ram %: ",memory_usage) #print("disco utilizado %: ", disk_usage) #print("MB escritos por segundo: ",disk_status,"\n") #disk_previo=disk_status_w time.sleep(self.delay)
def start_training(epochs, general_seed, xgboost_seed, cuda, single_precision_histogram, training_data, test_data): avail_gpus = GPUtil.getGPUs() use_cuda = True if cuda == 'True' and len(avail_gpus) > 0 else False if use_cuda: click.echo(click.style(f'Using {len(avail_gpus)} GPUs!', fg='blue')) else: click.echo(click.style('No GPUs detected. Running on the CPU', fg='blue')) with mlflow.start_run(): # Fetch and prepare data training_data, test_data = load_train_test_data(training_data, test_data) # Enable the logging of all parameters, metrics and models to mlflow mlflow.xgboost.autolog() # Set XGBoost parameters param = {'objective': 'multi:softmax', 'num_class': 8, 'single_precision_histogram': True if single_precision_histogram == 'True' else False, 'subsample': 0.5, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.5} # Set random seeds set_general_random_seeds(general_seed) set_xgboost_random_seeds(xgboost_seed, param) # Set CPU or GPU as training device if use_cuda: param['tree_method'] = 'gpu_hist' else: param['tree_method'] = 'hist' # Train on the chosen device results = {} runtime = time.time() booster = xgb.train(param, training_data.DM, epochs, evals=[(test_data.DM, 'test')], evals_result=results) device = 'GPU' if use_cuda else 'CPU' if use_cuda: click.echo(click.style(f'{device} Run Time: {str(time.time() - runtime)} seconds', fg='green')) # Perform some predictions on the test data, evaluate and log them print('[bold blue]Performing predictions on test data.') test_predictions = np.round(booster.predict(test_data.DM)) calculate_log_metrics(test_data.y, test_predictions) # Log hardware and software log_sys_intel_conda_env()
def gpu_info() -> list: result = [] gpus = GPUtil.getGPUs() # gpu.id, gpu.name, gpu.uuid for gpu in gpus: result.append ({ "id": gpu.id, "name": gpu.name, "load_percent": round(gpu.load*100, 3), "memory_percent": round(gpu.memoryUsed/gpu.memoryTotal*100, 3), "temperature": gpu.temperature, }) return result
def __queryGPUStats(self): """ Query stats for all GPUs. Returns: gpuStats (list): GPU statistics. """ gpus = GPUtil.getGPUs() if gpus: return [ GPUStat(gpu.id, gpu.uuid, gpu.name, gpu.memoryTotal, gpu.memoryUsed, gpu.memoryFree, gpu.memoryUtil, gpu.temperature) for gpu in gpus ] return []
def _create_computer(): tot_m, used_m, free_m = memory() tot_d, used_d, free_d = disk(ROOT_FOLDER) computer = Computer(name=socket.gethostname(), gpu=len(GPUtil.getGPUs()), cpu=cpu_count(), memory=tot_m, ip=IP, port=PORT, user=get_username(), disk=tot_d, root_folder=ROOT_FOLDER, sync_with_this_computer=SYNC_WITH_THIS_COMPUTER, can_process_tasks=CAN_PROCESS_TASKS) ComputerProvider(_session).create_or_update(computer, 'name')
def build_sys_uuid(use = ""): uname = platform.uname().system mach = platform.uname().machine gpu_id = GPUtil.getGPUs()[0].uuid # This is the string that will be hashed. # Change as necessary for whichever definition of computer hardware you wish to use as a unique identifier. if not use: comb = uname.encode('ascii') + mach.encode('ascii') + gpu_id.encode('ascii') else: comb = use[:int(len(use)/2)].encode('ascii') + uname.encode('ascii') + mach.encode('ascii') + gpu_id.encode('ascii') + use[int(len(use)/2):].encode('ascii') hasher = blake2b() hasher.update(comb) dig = hasher.hexdigest() + "" return [dig[0:32], dig[32:64], dig[64:96], dig[96:128]]
def get_system_specs(): str_system = "" cpu_info = cpuinfo.get_cpu_info() str_system += "CPU {} {}. ".format(cpu_info.get('brand', 'Unknown'), cpu_info.get('family', 'Unknown')) memory_info = psutil.virtual_memory() str_system += "{:03.2f} GB RAM memory. ".format(memory_info.total / (1024 * 1024 * 1024)) nvidia_cmd = shutil.which("nvidia-smi") if nvidia_cmd: str_system += "GPU " gpu_info = GPUtil.getGPUs() for gpu in gpu_info: str_system += "{} ".format(gpu.name) return str_system
def gpu_load(wproc=0.5, wmem=0.5): """Return a list of namedtuples representing the current load for each GPU device. The processor and memory loads are fractions between 0 and 1. The weighted load represents a weighted average of processor and memory loads using the parameters `wproc` and `wmem` respectively. """ GPULoad = namedtuple('GPULoad', ['processor', 'memory', 'weighted']) gpus = GPUtil.getGPUs() load = [] for g in gpus: wload = (wproc * g.load + wmem * g.memoryUtil) / (wproc + wmem) load.append(GPULoad(g.load, g.memoryUtil, wload)) return load
def update_processor_log(step='null'): log_file = os.path.join(get_log_dir(), 'processor_util.csv') if not os.path.exists(log_file): clear_processor_log() with open(log_file, 'a') as f: log_line = [datetime.now().isoformat(), step] for gpu in GPUtil.getGPUs(): log_line.append(f'{gpu.load*100:.2f}') for cpu_util in psutil.cpu_percent(percpu=True): log_line.append(f'{cpu_util:.2f}') f.write(f'{",".join(map(str, log_line))}\n')
def get_gpu_stats(): gpus = GPUtil.getGPUs() list_gpus = [] list = [] for gpu in gpus: # name of GPU gpu_name = gpu.name # get % percentage of GPU usage of that GPU gpu_load = gpu.load * 100 # get GPU temperature in Celsius gpu_temperature = f"{gpu.temperature} °C" list = [gpu_name, gpu_load, gpu_temperature] list_gpus.append(list) return list_gpus
def _get_device_map(self): self.logger.info('get devices') run_on_gpu = False device_map = [-1] * self.num_worker if not self.args.cpu: try: import GPUtil num_all_gpu = len(GPUtil.getGPUs()) avail_gpu = GPUtil.getAvailable(order='memory', limit=min( num_all_gpu, self.num_worker), maxMemory=0.9, maxLoad=0.9) num_avail_gpu = len(avail_gpu) if num_avail_gpu >= self.num_worker: run_on_gpu = True elif 0 < num_avail_gpu < self.num_worker: self.logger.warning( 'only %d out of %d GPU(s) is available/free, but "-num_worker=%d"' % (num_avail_gpu, num_all_gpu, self.num_worker)) if not self.args.device_map: self.logger.warning( 'multiple workers will be allocated to one GPU, ' 'may not scale well and may raise out-of-memory') else: self.logger.warning( 'workers will be allocated based on "-device_map=%s", ' 'may not scale well and may raise out-of-memory' % self.args.device_map) run_on_gpu = True else: self.logger.warning('no GPU available, fall back to CPU') if run_on_gpu: device_map = ((self.args.device_map or avail_gpu) * self.num_worker)[:self.num_worker] except FileNotFoundError: self.logger.warning( 'nvidia-smi is missing, often means no gpu on this machine. ' 'fall back to cpu!') self.logger.info( 'device map: \n\t\t%s' % '\n\t\t'.join('worker %2d -> %s' % (w_id, ('gpu %2d' % g_id) if g_id >= 0 else 'cpu') for w_id, g_id in enumerate(device_map))) return device_map
def run(self) -> None: if self._interval_seconds <= 0: logging.warning( "Resource monitoring requires an interval that is larger than 0 seconds, but " "got: {}. Exiting.".format(self._interval_seconds)) logging.info("Process ({}) started with pid: {}".format( self.name, self.pid)) # create the TB writers and AML run context for this process writer = tensorboardX.SummaryWriter(self._tb_log_file_path) run_context = Run.get_context() is_offline_run = is_offline_run_context(run_context) current_iteration = 0 def log_to_azure_and_tb(label: str, value: float) -> None: writer.add_scalar(label, value, current_iteration) if not is_offline_run: run_context.log(label, value) gpu_available = is_gpu_available() while True: if gpu_available: gpus: List[GPU] = GPUtil.getGPUs() if len(gpus) > 0: for gpu in gpus: log_to_azure_and_tb( 'Diagnostics/GPU_{}_Load_Percent'.format(gpu.id), gpu.load * 100) log_to_azure_and_tb( 'Diagnostics/GPU_{}_MemUtil_Percent'.format( gpu.id), gpu.memoryUtil * 100) # log the average GPU usage log_to_azure_and_tb( 'Diagnostics/Average_GPU_Load_Percent', statistics.mean(map(lambda x: x.load, gpus)) * 100) log_to_azure_and_tb( 'Diagnostics/Average_GPU_MemUtil_Percent', statistics.mean(map(lambda x: x.memoryUtil, gpus)) * 100) # log the CPU util log_to_azure_and_tb('Diagnostics/CPU_Util_Percent', psutil.cpu_percent(interval=None)) log_to_azure_and_tb('Diagnostics/CPU_MemUtil_Percent', psutil.virtual_memory()[2]) current_iteration += 1 # pause the thread for the requested delay time.sleep(self._interval_seconds)
def checkgpu(): '''check gpu availability and utilization''' card = gpu.getGPUs() isavailable = gpu.getAvailability(card, maxLoad=.6) print(time.ctime()) if isavailable == [1]: print("can mine") time.sleep(5) return 'isavailable' if isavailable == [0]: print("gpu in use") gpu.showUtilization() time.sleep(5) return 'notavailable'
def log_chat(gpu_util, question, answer, generation_time): GPUs = GPUtil.getGPUs() for GPU in GPUs: print('sep') logging.basicConfig( filename='/KoGPT2chatbot/log/chat/{}_chat.log'.format( datetime.datetime.now().strftime('%Y-%m-%d')), level=logging.DEBUG) print('sep2') print('/KoGPT2chatbot/log/chat/{}_chat.log'.format( datetime.datetime.now().strftime('%Y-%m-%d'))) logging.debug( '{}, {}, used_memory={}/{}, max_utilization={}, q={}, a={}, generation_time={}' .format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), GPU.name, GPU.memoryUsed, GPU.memoryTotal, gpu_util, question, answer, generation_time))
def write_gpu_status(self): self.GPU = GPUtil.getGPUs()[self.gpu_id] GPU_load = self.GPU.load * 100 GPU_memoryUsed = self.GPU.memoryUsed / self.GPU_memoryTotal * 100 GPU_memoryFree = self.GPU.memoryFree / self.GPU_memoryTotal * 100 self.writer.add_scalars( "device/GPU", { "GPU_load (%)": GPU_load, "GPU_memory_used (%)": GPU_memoryUsed, "GPU_memory_free (%)": GPU_memoryFree, }, self.count, ) self.GPU_memoryUsed.append(GPU_memoryUsed) self.GPU_memoryFree.append(GPU_memoryFree)
def get_current_memory_mb(gpu_id=None): pid = os.getpid() p = psutil.Process(pid) info = p.memory_full_info() cpu_mem = info.uss / 1024. / 1024. gpu_mem = 0 gpu_precent = 0 if gpu_id is not None: GPUs = GPUtil.getGPUs() gpu_load = GPUs[gpu_id].load gpu_precent = gpu_load pynvml.nvmlInit() handle = pynvml.nvmlDeviceGetHandleByIndex(0) meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) gpu_mem = meminfo.used / 1024. / 1024. return cpu_mem, gpu_mem, gpu_precent
def auto_select_gpu(): try: import GPUtil except ImportError: wlog("please install GPUtil for automatically selecting GPU") return "" if len(GPUtil.getGPUs()) == 0: return "" id_list = GPUtil.getAvailable(order="load", maxLoad=0.7, maxMemory=0.9, limit=8) if len(id_list) == 0: print("GPU memory is not enough for predicted usage") raise NotImplementedError return str(id_list[0])
def select_computational_device(): print('Select the Computational Device to run the clustering on') gpus = gputil.getGPUs() if (len(gpus) == 0): print( 'So Sorry! you dont have any GPU, computational will be done on your CPU :(' ) return -1 else: for i in range(len(gpus)): print('GPU with ID: ', i) print('Information: \n', ) selected_device = input("Type the device id for the selected GPU") return gpus[selected_device]
def __init__( self, *, name: str = "generic", base_uri: str = None, language: str = "en", namespaces: dict = None, import_graph: typing.Optional[GraphLike] = None, ) -> None: """ Constructor for a `KnowledgeGraph` object. name: optional, internal name for this graph base_uri: the default [*base URI*](https://tools.ietf.org/html/rfc3986#section-5.1) for this RDF graph language: the default [*language tag*](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag), e.g., used for [*language indexing*](https://www.w3.org/TR/json-ld11/#language-indexing) namespaces: a dictionary of [*namespace*s](https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=namespace#rdflib.Namespace) (dict values) and their corresponding *prefix* strings (dict keys) to add as *controlled vocabularies* available to use in the RDF graph, binding each prefix to the given namespace. import_graph: optionally, another existing RDF graph to be used as a starting point """ self.name = name self.base_uri = base_uri self.language = language self.gpus = GPUtil.getGPUs() # import relations from another existing RDF graph, or start from blank if import_graph: self._g = import_graph else: self._g = rdflib.Graph() # initialize the namespaces self._ns: dict = {} for prefix, iri in self._DEFAULT_NAMESPACES.items(): self.add_ns(prefix, iri) if namespaces: for prefix, iri in namespaces.items(): self.add_ns(prefix, iri)
def _monitor_container(container, poll_interval=1, use_gpu=False): max_cpu_mem = 0 max_gpu_mem = 0 while True: try: container.wait(timeout=1) break except: pass stats = container.stats(stream=False) memory_stats = stats["memory_stats"] memory_usage = memory_stats.get("usage") if memory_usage is not None: max_cpu_mem = max(max_cpu_mem, float(memory_usage / 1000000)) if use_gpu: max_gpu_mem = max(max_gpu_mem, float(GPUtil.getGPUs()[0].memoryUsed)) return max_cpu_mem, max_gpu_mem
def gpu_info(usage=True): """ gpu info """ info_all = {'Num GPUs': 0} # in case of no GPU for i, gpu in enumerate(GPUtil.getGPUs()): info = {'ID': gpu.id, 'UUID': gpu.uuid, 'Name': gpu.name, 'Serial': gpu.serial, 'Total memory': gpu.memoryTotal} if usage: info.update({'Free memory': f'{gpu.memoryFree} MB', 'Used memory': f'{gpu.memoryUsed} MB', 'Current load': f'{gpu.load * 100:.2f}%', 'Temperature': f'{gpu.temperature}°C'}) info_all[f'GPU{i}'] = info info_all['Num GPUs'] = len(info_all) - 1 return info_all
def check_GPU_usage(): """ Checks and prints the amount of GPU available and used currently """ gpu = GPU.getGPUs()[0] gpu_stats = [ gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil * 100, gpu.memoryTotal ] # process = psutil.Process(os.getpid()) # ram_free = humanize.naturalsize(psutil.virtual_memory().available) # proc_size = humanize.naturalsize(process.memory_info().rss) # # print("Gen RAM Free: %s | Proc size: %s" %(ram_free, proc_size)) print( "GPU RAM Free: {:.0f}MB | Used: {:.0f}MB | Util {:.0f}% | Total {:.0f}MB" .format(*gpu_stats))
def run(self): while not self.stopped: for gpu in GPUtil.getGPUs(): if gpu.id == DEVICE_ID: print('|'.join([ f'{"ID": ^5}', f'{"GPU util.": ^10}', f'{"Memory util.": ^14}', f'{"Memory used": ^14}', f'{"Memory total": ^14}', f'{"T": ^6}' ])) print('|'.join([ f'{gpu.id: ^5}', f'{f"{int(gpu.load * 100)}%": ^10}', f'{f"{int(gpu.memoryUtil * 100)}%": ^14}', f'{f"{int(gpu.memoryUsed)}MB": ^14}', f'{f"{int(gpu.memoryTotal)}MB": ^14}', f'{f"{int(gpu.temperature)}С°": ^6}' ])) time.sleep(self.delay)
def is_nvidia_gpu_present(): try: import GPUtil except ImportError: # py36 ModuleNotFoundError try: import gpu_dfcc except ImportError: # py36 ModuleNotFoundError # who knows? return False else: return gpu_dfcc.cudaGetDeviceCount() > 0 else: try: ngpu = len(GPUtil.getGPUs()) except OSError: # py3 FileNotFoundError # no `nvidia-smi` return False else: return ngpu > 0
try: # Try to import cupy import cupy as cp import cupyx.scipy.linalg as cpxl # Try to access a device cp.cuda.Device(0).compute_capability # Flag indicating successful import have_cupy = True # Import appropriate versions of utility functions from ._cp_util import * try: # Try to import GPUtil import GPUtil # Check whether GPUtil is functional gpus = GPUtil.getGPUs() if gpus: have_gputil = True else: have_gputil = False except ImportError: have_gputil = False except ValueError: have_gputil = False if have_gputil: from ._gputil import * else: from ._nogputil import * except Exception: # If cupy import or device access fails, import numpy to the same alias import numpy as cp