Beispiel #1
0
    def check(zone=None, slowdown=80, _max=90):
        # search for hottest or use given zone
        hottest = 0
        for x in [zone] if zone else range(3):
            try:
                with open("/sys/class/thermal/thermal_zone%s/temp" % x) as f:
                    reading = f.read()
                    value = int(reading.strip()[:-3])
                    if value > hottest:
                        hottest = value
            except OSError:
                continue

        if not hottest:
            raise Exception("Could not find any thermal zone")

        status = {
            "value": "%s°C" % hottest,
            "bar_min": "15°C",
            "bar_max": "%s°C" % _max,
            "bar_percent": bar_percent(hottest, _max, 15),
        }

        if hottest < slowdown:
            status["healthy"] = True
            status["reason"] = "CPU temperature nominal"
        else:
            status["healthy"] = False
            status["reason"] = "CPU overheated"

        return status
Beispiel #2
0
 def check(_min=216, _max=253):
     info = get_ups_data()
     return {
         "bar_min": "%sV" % _min,
         "bar_max": "%sV" % _max,
         "bar_percent": bar_percent(info["LINEV"], _max, _min),
         "value": "%sV" % info["LINEV"],
         "healthy": (info["LINEV"] < _max) and (info["LINEV"] > _min),
     }
Beispiel #3
0
    def check(host, _min=10, _max=35):
        r = requests.get("http://%s/fresh.xml" % host, timeout=5)
        r.raise_for_status()
        root = ET.fromstring(r.text)
        value = root[0].attrib["val"]
        value = int(float(value))

        return {
            "value": "%s&deg;C" % value,
            "bar_min": "%s&deg;C" % _min,
            "bar_max": "%s&deg;C" % _max,
            "bar_percent": bar_percent(value, _max, _min),
            "healthy": value < _max and value > _min,
        }
Beispiel #4
0
    def check(slowdown=88, _max=93):
        result = run(
            ["nvidia-smi", "-q", "-d", "TEMPERATURE"],
            timeout=10,
            check=True,
            stdout=PIPE,
        )

        state = dict()
        for line in result.stdout.decode().splitlines():
            try:
                key, val = line.split(":")
            except ValueError:
                continue
            key, val = key.strip(), val.strip()
            state[key] = val

        value = int(state["GPU Current Temp"][:-2])

        try:
            slowdown = int(state["GPU Slowdown Temp"][:-2])
            _max = int(state["GPU Shutdown Temp"][:-2])
        except ValueError:
            # not specified, so dot change the defaults
            pass

        status = {
            "value": "%s&deg;C" % value,
            "bar_min": "15&deg;C",
            "bar_max": "%s&deg;C" % _max,
            "bar_percent": bar_percent(value, _max, 15),
        }

        if value < slowdown:
            status["healthy"] = True
            status["reason"] = "GPU temperature nominal"
        else:
            status["healthy"] = False
            status["reason"] = "GPU overheated"

        return status
Beispiel #5
0
    def check(_max=300):
        # "return normalised % load (avg num of processes waiting per processor)"
        load = os.getloadavg()[0]
        load = load / multiprocessing.cpu_count()
        value = int(load * 100)

        status = {
            "value": "%s%%" % value,
            "bar_min": "0%",
            "bar_max": "100%",
            "bar_percent": bar_percent(value, 100),
            "healthy": value < _max,
        }

        if value < _max:
            status["healthy"] = True
            status["reason"] = "CPU usage nominal"
        else:
            status["healthy"] = False
            status["reason"] = "CPU overloaded"

        return status
Beispiel #6
0
    def check(mountpoint="/"):
        s = os.statvfs(mountpoint)
        free = s.f_bsize * s.f_bavail
        total = s.f_bsize * s.f_blocks
        usage = total - free
        percent = bar_percent(usage, total)

        status = {
            "value": human_bytes(usage),
            "bytes": usage,
            "bar_min": "0 GB",
            "bar_max": human_bytes(total),
            "bar_percent": percent,
        }

        if usage < 0.9 * total:
            status["healthy"] = True
            status["reason"] = "Disk usage nominal"
        else:
            status["healthy"] = False
            status["reason"] = "Disk is nearly full"

        return status
Beispiel #7
0
    def check():
        # http://www.linuxatemyram.com/
        with open("/proc/meminfo") as f:
            lines = f.readlines()

        # in kB
        info = {}

        for line in lines:
            m = re.search("(\w+):\s*(\d+)", line)
            if m:
                info[m.group(1)] = int(m.group(2))

        used = info["MemTotal"] - info["MemFree"] - info["Buffers"] - info["Cached"]

        total = info["MemTotal"] * 1024

        # used by applications, not cache/buffers
        value = used * 1024

        status = {
            "value": human_bytes(value),
            "bytes": value,
            "bar_min": "0 GB",
            "bar_max": human_bytes(total),
            "bar_percent": bar_percent(value, total),
        }

        if value < 0.9 * total:
            status["healthy"] = True
            status["reason"] = "RAM usage nominal"
        else:
            status["healthy"] = False
            status["reason"] = "RAM usage too high"

        return status