def __init__(self, pcidev): """ check if the pci device is a GPU device and privided by vendor, if yes, fill in self.nvh """ self.nvh = None self.display = ':0' self.gpu_flag = False INFO_PATH = "/proc/driver/nvidia/gpus/" super(GpuNV, self).__init__(pcidev) nv.nvmlInit() self.nvh = None self.name = 'NV ' try: self.nvh = nv.nvmlDeviceGetHandleByPciBusId( pcidev.slot_name.encode()) self.nv_id = nv.nvmlDeviceGetIndex(self.nvh) self.gpu_flag = True self.pcidev = pcidev except Exception as e: if e.value == nv.NVML_ERROR_GPU_IS_LOST: self.gpu_flag = True # for each GPU, it have audio and vidoe pci device, supress warning for audio device if e.value != nv.NVML_ERROR_INVALID_ARGUMENT: logger.debug( f'[{self.pci_dev.slot_name}/{self.name}] {str(e)}') self.curve = GpuNV.CURVE self.temp_delta = GpuNV.TEMP_DELTA
def is_working(self): rv = True try: h = nv.nvmlDeviceGetHandleByPciBusId( self.pcidev.slot_name.encode()) except Exception as e: rv = False return rv
def test_nvmlDeviceGetHandleByPciBusId(ngpus, pci_info): handles = [ pynvml.nvmlDeviceGetHandleByPciBusId(pci_info[i].busId) for i in range(ngpus) ] assert len(handles) == ngpus