コード例 #1
0
ファイル: gpu.py プロジェクト: sanders41/codecarbon
def get_graphics_processes(handle):
    """Returns the list of processes ids having a graphics context on the
    device with the memory used
    https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g7eacf7fa7ba4f4485d166736bf31195e
    """
    processes = pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle)

    return [{"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes]
コード例 #2
0
    def sample_processes_information(self, handle: DeviceHandle) -> GPUProcesses:
        try:
            # List processes with a compute context (e.g. CUDA applications)
            compute_processes = nvmlDeviceGetComputeRunningProcesses(handle)
            # List processes with a graphics context (eg. applications using OpenGL, DirectX)
            graphics_processes = nvmlDeviceGetGraphicsRunningProcesses(handle)
            # Note: a single process may have both the graphics and compute context active at the same time
        except NVMLError as err:
            self.logger.warning(nvml_error_to_string(err))
            return {}

        processes = {}
        for p in compute_processes + graphics_processes:
            mem = p.usedGpuMemory
            processes[p.pid] = None if mem is None else mem / MiB

        self.log_debug(f"Sampled processes ({len(processes)}): {processes}")
        return processes
コード例 #3
0
def get_usage(device_index, my_pid):
    N.nvmlInit()

    handle = N.nvmlDeviceGetHandleByIndex(int(device_index))

    usage = [
        nv_process.usedGpuMemory // MB
        for nv_process in N.nvmlDeviceGetComputeRunningProcesses(handle) +
        N.nvmlDeviceGetGraphicsRunningProcesses(handle)
        if nv_process.pid == my_pid
    ]

    if len(usage) == 1:
        usage = usage[0]
    else:
        raise KeyError("PID not found")

    return usage
コード例 #4
0
ファイル: stats.py プロジェクト: coolalexzb/pyhf-benchmark
def gpu_in_use_by_this_process(gpu_handle):
    if not psutil:
        return False

    base_process = psutil.Process().parent() or psutil.Process()

    our_processes = base_process.children(recursive=True)
    our_processes.append(base_process)

    our_pids = set([process.pid for process in our_processes])

    compute_pids = set([
        process.pid
        for process in pynvml.nvmlDeviceGetComputeRunningProcesses(gpu_handle)
    ])
    graphics_pids = set([
        process.pid
        for process in pynvml.nvmlDeviceGetGraphicsRunningProcesses(gpu_handle)
    ])

    pids_using_device = compute_pids | graphics_pids

    return len(pids_using_device & our_pids) > 0
コード例 #5
0
ファイル: nvidia.py プロジェクト: lfwa/carbontracker
    def _get_handles_by_pid(self):
        """Returns handles of GPU running at least one process from PIDS.

        Note:
            GPUs need to have started work before showing any processes.
            Requires NVML to be initialized.
            Bug: Containers need to be started with --pid=host for NVML to show
            processes: https://github.com/NVIDIA/nvidia-docker/issues/179.
        """
        device_count = pynvml.nvmlDeviceGetCount()
        devices = []

        for index in range(device_count):
            handle = pynvml.nvmlDeviceGetHandleByIndex(index)
            gpu_pids = [
                p.pid
                for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle) +
                pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle)
            ]

            if set(gpu_pids).intersection(self.pids):
                devices.append(handle)

        return devices
コード例 #6
0
ファイル: core.py プロジェクト: IgnacioJPickering/gpustat
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""
            def get_process_info(nv_process):
                """Get the process information of specific pid"""
                process = {}
                ps_process = psutil.Process(pid=nv_process.pid)
                process['username'] = ps_process.username()
                # cmdline returns full path;
                # as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:
                    # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                # Bytes to MBytes
                process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB
                process['pid'] = nv_process.pid
                return process

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU)
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                fan_speed = N.nvmlDeviceGetFanSpeed(handle)
            except N.NVMLError:
                fan_speed = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except N.NVMLError:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except N.NVMLError:
                power_limit = None

            try:
                nv_comp_processes = \
                    N.nvmlDeviceGetComputeRunningProcesses(handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = \
                    N.nvmlDeviceGetGraphicsRunningProcesses(handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None
            else:
                processes = []
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in nv_comp_processes + nv_graphics_processes:
                    # TODO: could be more information such as system memory
                    # usage, CPU percentage, create time etc.
                    try:
                        process = get_process_info(nv_process)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

            index = N.nvmlDeviceGetIndex(handle)
            gpu_info = {
                'index':
                index,
                'uuid':
                uuid,
                'name':
                name,
                'temperature.gpu':
                temperature,
                'fan.speed':
                fan_speed,
                'utilization.gpu':
                utilization.gpu if utilization else None,
                'power.draw':
                power // 1000 if power is not None else None,
                'enforced.power.limit':
                power_limit // 1000 if power_limit is not None else None,
                # Convert bytes into MBytes
                'memory.used':
                memory.used // MB if memory else None,
                'memory.total':
                memory.total // MB if memory else None,
                'processes':
                processes,
            }
            return gpu_info
コード例 #7
0
ファイル: core.py プロジェクト: vinthony/gpustat
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""
            def get_process_info(nv_process):
                """Get the process information of specific pid"""
                process = {}
                if nv_process.pid not in GPUStatCollection.global_processes:
                    GPUStatCollection.global_processes[nv_process.pid] = \
                        psutil.Process(pid=nv_process.pid)
                ps_process = GPUStatCollection.global_processes[nv_process.pid]
                process['username'] = ps_process.username()
                # cmdline returns full path;
                # as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:
                    # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                    process['full_command'] = ['?']
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                    process['full_command'] = _cmdline
                # Bytes to MBytes
                # if drivers are not TTC this will be None.
                usedmem = nv_process.usedGpuMemory // MB if \
                          nv_process.usedGpuMemory else None
                process['gpu_memory_usage'] = usedmem
                process['cpu_percent'] = ps_process.cpu_percent()
                process['cpu_memory_usage'] = \
                    round((ps_process.memory_percent() / 100.0) *
                          psutil.virtual_memory().total)
                process['pid'] = nv_process.pid
                return process

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU)
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                fan_speed = N.nvmlDeviceGetFanSpeed(handle)
            except N.NVMLError:
                fan_speed = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except N.NVMLError:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except N.NVMLError:
                power_limit = None

            try:
                nv_comp_processes = \
                    N.nvmlDeviceGetComputeRunningProcesses(handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = \
                    N.nvmlDeviceGetGraphicsRunningProcesses(handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None
            else:
                processes = []
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in nv_comp_processes + nv_graphics_processes:
                    try:
                        process = get_process_info(nv_process)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

                # TODO: Do not block if full process info is not requested
                time.sleep(0.1)
                for process in processes:
                    pid = process['pid']
                    cache_process = GPUStatCollection.global_processes[pid]
                    process['cpu_percent'] = cache_process.cpu_percent()

            index = N.nvmlDeviceGetIndex(handle)
            gpu_info = {
                'index':
                index,
                'uuid':
                uuid,
                'name':
                name,
                'temperature.gpu':
                temperature,
                'fan.speed':
                fan_speed,
                'utilization.gpu':
                utilization.gpu if utilization else None,
                'power.draw':
                power // 1000 if power is not None else None,
                'enforced.power.limit':
                power_limit // 1000 if power_limit is not None else None,
                # Convert bytes into MBytes
                'memory.used':
                memory.used // MB if memory else None,
                'memory.total':
                memory.total // MB if memory else None,
                'processes':
                processes,
            }
            GPUStatCollection.clean_processes()
            return gpu_info
コード例 #8
0
ファイル: gpustat.py プロジェクト: Yusnows/sermonitor
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""

            def get_process_info(nv_process):
                """Get the process information of specific pid"""
                process = {}
                if nv_process.pid not in GPUStatCollection.global_processes:
                    GPUStatCollection.global_processes[nv_process.pid] = \
                        psutil.Process(pid=nv_process.pid)
                ps_process = GPUStatCollection.global_processes[nv_process.pid]

                # TODO: ps_process is being cached, but the dict below is not.
                process['username'] = ps_process.username()
                # cmdline returns full path;
                # as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:
                    # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                    process['full_command'] = ['?']
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                    process['full_command'] = _cmdline
                # Bytes to MBytes
                # if drivers are not TTC this will be None.
                usedmem = (nv_process.usedGpuMemory // MB if
                           nv_process.usedGpuMemory else None)
                process['gpu_memory_usage'] = usedmem
                # process['gpu_memory_usage'] = ("%d MiB" % usedmem if usedmem is not None else usedmem)
                process['cpu_percent'] = ps_process.cpu_percent()
                # process['cpu_memory_usage'] = "%d MiB" % (
                #     round((ps_process.memory_percent() / 100.0) * psutil.virtual_memory().total) // MB)
                process['cpu_memory_usage'] = (
                    round((ps_process.memory_percent() / 100.0) * psutil.virtual_memory().total) // MB)
                process['pid'] = nv_process.pid
                return process

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU
                )
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                fan_speed = N.nvmlDeviceGetFanSpeed(handle)
            except N.NVMLError:
                fan_speed = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                utilization_enc = N.nvmlDeviceGetEncoderUtilization(handle)
            except N.NVMLError:
                utilization_enc = None  # Not supported

            try:
                utilization_dec = N.nvmlDeviceGetDecoderUtilization(handle)
            except N.NVMLError:
                utilization_dec = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except N.NVMLError:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except N.NVMLError:
                power_limit = None

            try:
                nv_comp_processes = \
                    N.nvmlDeviceGetComputeRunningProcesses(handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = \
                    N.nvmlDeviceGetGraphicsRunningProcesses(handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None
            else:
                processes = []
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                # A single process might run in both of graphics and compute mode,
                # However we will display the process only once
                seen_pids = set()
                for nv_process in nv_comp_processes + nv_graphics_processes:
                    if nv_process.pid in seen_pids:
                        continue
                    seen_pids.add(nv_process.pid)
                    try:
                        process = get_process_info(nv_process)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass
                    except FileNotFoundError:
                        # Ignore the exception which probably has occured
                        # from psutil, due to a non-existent PID (see #95).
                        # The exception should have been translated, but
                        # there appears to be a bug of psutil. It is unlikely
                        # FileNotFoundError is thrown in different situations.
                        pass

                # TODO: Do not block if full process info is not requested
                time.sleep(0.1)
                for process in processes:
                    pid = process['pid']
                    cache_process = GPUStatCollection.global_processes[pid]
                    try:
                        process['cpu_percent'] = cache_process.cpu_percent()
                    except psutil.NoSuchProcess:
                        process['cpu_percent'] = 0.0
                    except FileNotFoundError:
                        # Ignore the exception which probably has occured
                        # from psutil, due to a non-existent PID (see #95).
                        # The exception should have been translated, but
                        # there appears to be a bug of psutil. It is unlikely
                        # FileNotFoundError is thrown in different situations.
                        process['cpu_percent'] = 0.0
                        pass

            index = N.nvmlDeviceGetIndex(handle)
            gpu_info = {
                'index': index,
                'uuid': uuid,
                'name': name,
                'temperature.gpu': temperature,
                'fan.speed': fan_speed,
                'utilization.gpu': utilization.gpu if utilization else 0,
                'utilization.enc':
                    utilization_enc[0] if utilization_enc else None,
                'utilization.dec':
                    utilization_dec[0] if utilization_dec else None,
                'power.draw': power // 1000 if power is not None else 0,
                'enforced.power.limit': power_limit // 1000
                if power_limit is not None else 0,
                # Convert bytes into MBytes
                'memory.used': memory.used // MB if memory else 0,
                'memory.total': memory.total // MB if memory else 0,
                'processes': processes,
            }
            GPUStatCollection.clean_processes()
            return gpu_info
コード例 #9
0
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""
            def get_last_used(index):
                last_useds = []
                if not os.path.exists('gpu_history.pkl'):
                    pickle.dump({}, open('gpu_history.pkl', 'wb'))
                with open('gpu_history.pkl', 'rb') as f:
                    history = pickle.load(f)
                    if platform.node() in history:
                        for user, last_used in history[
                                platform.node()][index].items():
                            # 1 day = 24 hours, 1 hour = 3600 seconds
                            used_before = (datetime.now() - last_used['last_used']).days * 24 + \
                                          (datetime.now() - last_used['last_used']).seconds / 3600
                            last_useds.append((user, used_before))
                        return last_useds
                    else:
                        return []

            def get_process_info(nv_process):
                """Get the process information of specific pid"""
                process = {}
                if nv_process.pid not in GPUStatCollection.global_processes:
                    GPUStatCollection.global_processes[nv_process.pid] = \
                        psutil.Process(pid=nv_process.pid)
                ps_process = GPUStatCollection.global_processes[nv_process.pid]
                process['username'] = ps_process.username()
                # cmdline returns full path;
                # as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:
                    # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                    process['full_command'] = ['?']
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                    process['full_command'] = _cmdline
                # Bytes to MBytes
                process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB
                process['cpu_percent'] = ps_process.cpu_percent()
                process['cpu_memory_usage'] = \
                    round((ps_process.memory_percent() / 100.0) *
                          psutil.virtual_memory().total)
                process['pid'] = nv_process.pid
                return process

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU)
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                fan_speed = N.nvmlDeviceGetFanSpeed(handle)
            except N.NVMLError:
                fan_speed = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except N.NVMLError:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except N.NVMLError:
                power_limit = None

            try:
                nv_comp_processes = \
                    N.nvmlDeviceGetComputeRunningProcesses(handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = \
                    N.nvmlDeviceGetGraphicsRunningProcesses(handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None
            else:
                processes = []
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in nv_comp_processes + nv_graphics_processes:
                    try:
                        process = get_process_info(nv_process)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

                # TODO: Do not block if full process info is not requested
                time.sleep(0.1)
                for process in processes:
                    pid = process['pid']
                    cache_process = GPUStatCollection.global_processes[pid]
                    process['cpu_percent'] = cache_process.cpu_percent()

            index = N.nvmlDeviceGetIndex(handle)
            last_used = get_last_used(index)
            gpu_info = {
                'index':
                index,
                'uuid':
                uuid,
                'name':
                name,
                'temperature.gpu':
                temperature,
                'fan.speed':
                fan_speed,
                'utilization.gpu':
                utilization.gpu if utilization else None,
                'power.draw':
                power // 1000 if power is not None else None,
                'enforced.power.limit':
                power_limit // 1000 if power_limit is not None else None,
                # Convert bytes into MBytes
                'memory.used':
                memory.used // MB if memory else None,
                'memory.total':
                memory.total // MB if memory else None,
                'processes':
                processes,
                'last_used':
                last_used,
            }
            GPUStatCollection.clean_processes()
            return gpu_info
コード例 #10
0
ファイル: gpustat.py プロジェクト: asdkant/gpustat
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""
            def get_process_info(pid):
                """Get the process information of specific pid"""
                process = {}
                ps_process = psutil.Process(pid=pid)
                process['username'] = ps_process.username()
                # cmdline returns full path; as in `ps -o comm`, get short cmdnames.
                process['command'] = os.path.basename(ps_process.cmdline()[0])
                # Bytes to MBytes
                process['gpu_memory_usage'] = int(nv_process.usedGpuMemory /
                                                  1024 / 1024)
                process['pid'] = nv_process.pid
                return process

            def _decode(b):
                if isinstance(b, bytes):
                    return b.decode()  # for python3, to unicode
                return b

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU)
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            processes = []
            try:
                nv_comp_processes = N.nvmlDeviceGetComputeRunningProcesses(
                    handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = N.nvmlDeviceGetGraphicsRunningProcesses(
                    handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None  # Not supported (in both cases)
            else:
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in (nv_comp_processes + nv_graphics_processes):
                    # TODO: could be more information such as system memory usage,
                    # CPU percentage, create time etc.
                    try:
                        process = get_process_info(nv_process.pid)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

            gpu_info = {
                'index': index,
                'uuid': uuid,
                'name': name,
                'temperature.gpu': temperature,
                'utilization.gpu': utilization.gpu if utilization else None,
                # Convert bytes into MBytes
                'memory.used':
                int(memory.used / 1024 / 1024) if memory else None,
                'memory.total':
                int(memory.total / 1024 / 1024) if memory else None,
                'processes': processes,
            }
            return gpu_info
コード例 #11
0
ファイル: gpu_Monitor.py プロジェクト: AbuShawarib/gpuMonitor
 def info_refresh(self):
     
     try:
         stat = open("/proc/stat")
         self.statlines = stat.read().splitlines()[1:-1]
         stat.close()
         
     except IOError:
         print("Problem opening /proc/stat, exiting..")
         pynvml.nvmlShutdown()
         quit()
     
     for i in range(self.corecount):
         for j in self.statlines[i].split()[1:]: #remove cpu#
            self.total[i]+= int(j)
         self.idle[i] = int(self.statlines[i].split()[4])
     
     for i in range(self.corecount):
         if (self.total[i] - self.prev_total[i]) == 0:
             self.prev_idle[i] = self.idle[i]
             self.prev_total[i] = self.total[i]
             break
         
         self.cpu_prog_bars[i].set_fraction(1 - ((self.idle[i] - self.prev_idle[i]) / (self.total[i] - self.prev_total[i])) )
         self.prev_idle[i] = self.idle[i]
         self.prev_total[i] = self.total[i]
         self.idle[i] = 0
         self.total[i] = 0
     
     for i in range(self.deviceCount):
         
         util = pynvml.nvmlDeviceGetUtilizationRates(self.gpu_handles[i])
         temp = pynvml.nvmlDeviceGetTemperature(self.gpu_handles[i], pynvml.NVML_TEMPERATURE_GPU)
         memInfo = pynvml.nvmlDeviceGetMemoryInfo(self.gpu_handles[i])
         (encoder_util, sPeriod) = pynvml.nvmlDeviceGetEncoderUtilization(self.gpu_handles[i])
         (decoder_util, sPeriod) = pynvml.nvmlDeviceGetDecoderUtilization(self.gpu_handles[i])
         
         mem_total = memInfo.total / 1024 / 1024
         mem_used = memInfo.used / 1024 / 1024
         
         self.gpu_prog_bars[i*6].set_text("GPU: %d%%" % util.gpu)
         self.gpu_prog_bars[i*6].set_fraction(util.gpu / 100)
         ########
         self.util_history.append(util.gpu)
         self.util_graph.queue_draw()
         
         self.temp_history.append(temp)
         self.temp_graph.queue_draw()
         ########
         self.gpu_prog_bars[i*6 +1].set_text("Memory Utilization: %d%%" % util.memory)
         self.gpu_prog_bars[i*6 +1].set_fraction(util.memory / 100)
         
         self.gpu_prog_bars[i*6 +4].set_text("Encoder: %d%%" % encoder_util)
         self.gpu_prog_bars[i*6 +5].set_text("Decoder: %d%%" % decoder_util)
         self.gpu_prog_bars[i*6 +4].set_fraction(encoder_util / 100)
         self.gpu_prog_bars[i*6 +5].set_fraction(decoder_util / 100)
         
         self.gpu_prog_bars[i*6 +2].set_text("Memory Usage: %d MiB/%d MiB" % (mem_used, mem_total))
         self.gpu_prog_bars[i*6 +2].set_fraction(mem_used / mem_total)
         
         self.gpu_prog_bars[i*6 +3].set_text("Temperature: %d °C" % temp)
         if temp > 100:
            temp = 100
         elif temp < 0:
             temp = 0
         self.gpu_prog_bars[i*6 +3].set_fraction(temp / 100)
         
         
     #--proc--
     procs = pynvml.nvmlDeviceGetGraphicsRunningProcesses(self.gpu_handles[0])
     
     proc_liststore = Gtk.ListStore(int, str, int)
     
     for p in procs:
         pid = p.pid
         try:
             path = pynvml.nvmlSystemGetProcessName(p.pid).decode('utf-8')
         except:
             self.exit()
         if (p.usedGpuMemory == None):
             mem = 0
         else:
             mem = (p.usedGpuMemory / 1024 / 1024)
         proc_liststore.append([pid, path, mem])
     self.tree.set_model(proc_liststore)
     return True
コード例 #12
0
ファイル: core.py プロジェクト: wookayin/gpustat
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""

            def get_process_info(nv_process):
                """Get the process information of specific pid"""
                process = {}
                ps_process = psutil.Process(pid=nv_process.pid)
                process['username'] = ps_process.username()
                # cmdline returns full path;
                # as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:
                    # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                # Bytes to MBytes
                process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB
                process['pid'] = nv_process.pid
                return process

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU
                )
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except N.NVMLError:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except N.NVMLError:
                power_limit = None

            try:
                nv_comp_processes = \
                    N.nvmlDeviceGetComputeRunningProcesses(handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = \
                    N.nvmlDeviceGetGraphicsRunningProcesses(handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None
            else:
                processes = []
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in nv_comp_processes + nv_graphics_processes:
                    # TODO: could be more information such as system memory
                    # usage, CPU percentage, create time etc.
                    try:
                        process = get_process_info(nv_process)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

            index = N.nvmlDeviceGetIndex(handle)
            gpu_info = {
                'index': index,
                'uuid': uuid,
                'name': name,
                'temperature.gpu': temperature,
                'utilization.gpu': utilization.gpu if utilization else None,
                'power.draw': power // 1000 if power is not None else None,
                'enforced.power.limit': power_limit // 1000
                if power_limit is not None else None,
                # Convert bytes into MBytes
                'memory.used': memory.used // MB if memory else None,
                'memory.total': memory.total // MB if memory else None,
                'processes': processes,
            }
            return gpu_info
コード例 #13
0
    def get_gpu_info(handle):
        """Get one GPU information specified by nvml handle"""
        def get_process_info(pid):
            """Get the process information of specific pid"""
            process = {}
            ps_process = psutil.Process(pid=pid)
            process['username'] = ps_process.username()
            # cmdline returns full path; as in `ps -o comm`, get short cmdnames.
            _cmdline = ps_process.cmdline()
            if not _cmdline:  # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                process['command'] = '?'
            else:
                process['command'] = os.path.basename(_cmdline[0])
            # Bytes to MBytes
            process['gpu_memory_usage'] = int(nv_process.usedGpuMemory / 1024 /
                                              1024)
            process['pid'] = nv_process.pid
            return process

        def _decode(b):
            if isinstance(b, bytes):
                return b.decode()  # for python3, to unicode
            return b

        name = _decode(N.nvmlDeviceGetName(handle))
        uuid = _decode(N.nvmlDeviceGetUUID(handle))

        try:
            minor = int(N.nvmlDeviceGetMinorNumber(handle))
        except N.NVMLError:
            minor = None  # Not supported

        try:
            bus_id = _decode(N.nvmlDeviceGetPciInfo(handle).busId)
        except N.NVMLError:
            bus_id = None  # Not supported

        try:
            serial = _decode(N.nvmlDeviceGetSerial(handle))
        except N.NVMLError:
            serial = None  # Not supported

        try:
            temperature = N.nvmlDeviceGetTemperature(handle,
                                                     N.NVML_TEMPERATURE_GPU)
        except N.NVMLError:
            temperature = None  # Not supported

        try:
            memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
        except N.NVMLError:
            memory = None  # Not supported

        try:
            utilization = N.nvmlDeviceGetUtilizationRates(handle)
        except N.NVMLError:
            utilization = None  # Not supported

        try:
            power = N.nvmlDeviceGetPowerUsage(handle)
        except (N.NVMLError, N.NVMLError_NotSupported):
            power = None

        try:
            power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
        except (N.NVMLError, N.NVMLError_NotSupported):
            power_limit = None

        processes = []
        try:
            nv_comp_processes = N.nvmlDeviceGetComputeRunningProcesses(handle)
        except N.NVMLError:
            nv_comp_processes = None  # Not supported
        try:
            nv_graphics_processes = N.nvmlDeviceGetGraphicsRunningProcesses(
                handle)
        except N.NVMLError:
            nv_graphics_processes = None  # Not supported

        if nv_comp_processes is None and nv_graphics_processes is None:
            processes = None  # Not supported (in both cases)
        else:
            nv_comp_processes = nv_comp_processes or []
            nv_graphics_processes = nv_graphics_processes or []
            for nv_process in (nv_comp_processes + nv_graphics_processes):
                # TODO: could be more information such as system memory usage,
                # CPU percentage, create time etc.
                try:
                    process = get_process_info(nv_process.pid)
                    processes.append(process)
                except psutil.NoSuchProcess:
                    # TODO: add some reminder for NVML broken context
                    # e.g. nvidia-smi reset  or  reboot the system
                    pass

        gpu_info = {
            'index': index,
            'uuid': uuid,
            'name': name,
            'minor': minor,
            'bus_id': bus_id,
            'serial': serial,
            'temperature_gpu': temperature,
            'utilization_gpu': utilization.gpu if utilization else None,
            'power_draw': int(power / 1000) if power is not None else None,
            'power_limit':
            int(power_limit / 1000) if power is not None else None,
            'memory_free': int(memory.free) if memory else None,
            'memory_used': int(memory.used) if memory else None,
            'memory_total': int(memory.total) if memory else None,
            'memory_utilization': utilization.memory if utilization else None,
            'processes': processes,
        }
        return gpu_info
コード例 #14
0
        def get_gpu_info(handle):
            """Get one GPU information specified by nvml handle"""
            def get_process_info(pid):
                """Get the process information of specific pid"""
                process = {}
                ps_process = psutil.Process(pid=pid)
                process['username'] = ps_process.username()
                # cmdline returns full path; as in `ps -o comm`, get short cmdnames.
                _cmdline = ps_process.cmdline()
                if not _cmdline:  # sometimes, zombie or unknown (e.g. [kworker/8:2H])
                    process['command'] = '?'
                else:
                    process['command'] = os.path.basename(_cmdline[0])
                # Bytes to MBytes
                process['gpu_memory_usage'] = int(nv_process.usedGpuMemory /
                                                  1024 / 1024)
                process['pid'] = nv_process.pid

                # For docker
                cmd = 'cat /proc/{}/cgroup'.format(nv_process.pid)
                ret = subprocess.check_output(cmd.split())
                container_id = str(ret).split('/')[2][:12]
                process['container_id'] = container_id

                cmd = 'docker ps -a'
                ret = subprocess.check_output(cmd.split())
                docker_data = str(ret).split('\\n')[1:-1]
                for personal in docker_data:
                    personal_data = personal.split()
                    if container_id == personal_data[0]:
                        process['container_user_name'] = personal_data[-1]

                return process

            def _decode(b):
                if isinstance(b, bytes):
                    return b.decode()  # for python3, to unicode
                return b

            name = _decode(N.nvmlDeviceGetName(handle))
            uuid = _decode(N.nvmlDeviceGetUUID(handle))

            try:
                temperature = N.nvmlDeviceGetTemperature(
                    handle, N.NVML_TEMPERATURE_GPU)
            except N.NVMLError:
                temperature = None  # Not supported

            try:
                memory = N.nvmlDeviceGetMemoryInfo(handle)  # in Bytes
            except N.NVMLError:
                memory = None  # Not supported

            try:
                utilization = N.nvmlDeviceGetUtilizationRates(handle)
            except N.NVMLError:
                utilization = None  # Not supported

            try:
                power = N.nvmlDeviceGetPowerUsage(handle)
            except:
                power = None

            try:
                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
            except:
                power_limit = None

            processes = []
            try:
                nv_comp_processes = N.nvmlDeviceGetComputeRunningProcesses(
                    handle)
            except N.NVMLError:
                nv_comp_processes = None  # Not supported
            try:
                nv_graphics_processes = N.nvmlDeviceGetGraphicsRunningProcesses(
                    handle)
            except N.NVMLError:
                nv_graphics_processes = None  # Not supported

            if nv_comp_processes is None and nv_graphics_processes is None:
                processes = None  # Not supported (in both cases)
            else:
                nv_comp_processes = nv_comp_processes or []
                nv_graphics_processes = nv_graphics_processes or []
                for nv_process in (nv_comp_processes + nv_graphics_processes):
                    # TODO: could be more information such as system memory usage,
                    # CPU percentage, create time etc.
                    try:
                        process = get_process_info(nv_process.pid)
                        processes.append(process)
                    except psutil.NoSuchProcess:
                        # TODO: add some reminder for NVML broken context
                        # e.g. nvidia-smi reset  or  reboot the system
                        pass

            gpu_info = {
                'index':
                index,
                'uuid':
                uuid,
                'name':
                name,
                'temperature.gpu':
                temperature,
                'utilization.gpu':
                utilization.gpu if utilization else None,
                'power.draw':
                int(power / 1000) if power is not None else None,
                'enforced.power.limit':
                int(power_limit / 1000) if power is not None else None,
                # Convert bytes into MBytes
                'memory.used':
                int(memory.used / 1024 / 1024) if memory else None,
                'memory.total':
                int(memory.total / 1024 / 1024) if memory else None,
                'processes':
                processes,
            }
            return gpu_info
コード例 #15
0
        # power state
        power_used = nvmlDeviceGetPowerUsage(handle) / 1000
        power_limit = nvmlDeviceGetPowerManagementDefaultLimit(handle) / 1000
        power_used = int(power_used)
        power_limit = int(power_limit)
        power_rate = int(power_used / power_limit * 100)
        msg = pack_msg([power_used, power_limit], 'W')
        info['Power Util'] = getBar(power_rate, msg)

        # fan speed, temperature
        fan_speed = nvmlDeviceGetFanSpeed(handle)
        temp = nvmlDeviceGetTemperature(handle, 0)
        msg = f"{temp}C"
        info['Fan Speed'] = getBar(fan_speed, msg)

        message = [f"{k} \t{v}" for k, v in info.items()]
        print('\n'.join(message))

        # graphic processes
        graphic_processes = nvmlDeviceGetGraphicsRunningProcesses(handle)
        header = "\n=== Graphic Processes ==="
        show_process(header, graphic_processes)

        # graphic processes
        compute_processes = nvmlDeviceGetComputeRunningProcesses(handle)
        header = "\n=== Compute Processes ==="
        show_process(header, compute_processes)

    nvmlShutdown()