Beispiel #1
0
class SelfTestAlertSource(AlertSource):
    level = AlertLevel.WARNING
    title = "Self-test error"

    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        alerts = []
        regexp = re.compile(r"\[(.*)\] (.*)")
        if os.path.exists(ALERT_FILE):
            with open(ALERT_FILE) as f:
                for line in f:
                    line = line.rstrip()
                    # Line looks like [PASS|FAIL]<text>, maybe other tags
                    match = regexp.match(line)
                    level = AlertLevel.WARNING
                    if match:
                        if match.group(1) in (TEST_WARNING):
                            level = AlertLevel.WARNING
                        elif match.group(1) in (TEST_FAIL, TEST_CRITICAL):
                            level = AlertLevel.WARNING
                        elif match.group(1) in (TEST_PASS):
                            continue
                        alerts.append(Alert(match.group(2), level=level))
                    else:
                        alerts.append(Alert(line, level=level))

        return alerts
Beispiel #2
0
class UpdateFailedAlertSource(FilePresenceAlertSource):
    level = AlertLevel.CRITICAL
    title = "Update failed. Check /data/update.failed for further details"

    schedule = IntervalSchedule(timedelta(hours=1))

    path = "/data/update.failed"
Beispiel #3
0
class VolumeVersionAlertSource(ThreadedAlertSource):
    level = AlertLevel.WARNING
    title = "ZFS version is out of date"

    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        for pool in self.middleware.call_sync("pool.query"):
            if not self.is_upgraded(pool):
                alerts.append(Alert(
                    "New feature flags are available for volume %s. Refer "
                    "to the \"Upgrading a ZFS Pool\" section of the User "
                    "Guide for instructions.",
                    pool["name"],
                ))

        proc = subprocess.Popen(
            "zfs upgrade | grep FILESYSTEM",
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            encoding="utf8",
        )
        output = proc.communicate()[0].strip(" ").strip("\n")
        if output:
            alerts.append(Alert(
                "ZFS filesystem version is out of date. Consider upgrading"
                " using \"zfs upgrade\" command line."
            ))

        return alerts

    def is_upgraded(self, pool):
        if not pool["is_decrypted"]:
            return True

        try:
            version = self.middleware.call_sync("notifier.zpool_version", pool["name"])
        except ValueError:
            return True

        if version == "-":
            proc = subprocess.Popen([
                "zpool",
                "get",
                "-H", "-o", "property,value",
                "all",
                pool["name"],
            ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf8")
            data = proc.communicate()[0].strip("\n")
            for line in data.split("\n"):
                if not line.startswith("feature") or "\t" not in line:
                    continue
                prop, value = line.split("\t", 1)
                if value not in ("active", "enabled"):
                    return False
            return True

        return False
Beispiel #4
0
class HasUpdateAlertSource(ThreadedAlertSource):
    level = AlertLevel.INFO
    title = "There is a new update available"

    schedule = IntervalSchedule(timedelta(hours=1))

    def check_sync(self):
        try:
            self.middleware.call_sync("datastore.query", "system.update", None, {"get": True})
        except IndexError:
            self.middleware.call_sync("datastore.insert", "system.update", {
                "upd_autocheck": True,
                "upd_train": "",
            })

        path = self.middleware.call_sync("update.get_update_location")
        if not path:
            return

        try:
            updates = PendingUpdates(path)
        except Exception:
            updates = None

        if updates:
            return Alert("There is a new update available! Apply it in System -> Update tab.")
Beispiel #5
0
class IPMISELSpaceLeftAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        if not has_ipmi():
            return

        return self._produce_alert_for_ipmitool_output(await ipmitool(
            ["sel", "info"]))

    def _produce_alert_for_ipmitool_output(self, output):
        sel_information = parse_sel_information(output)
        try:
            percent_used = int(sel_information["Percent Used"].rstrip("%"))
        except ValueError:
            return

        if percent_used > 90:
            return Alert(
                IPMISELSpaceLeftAlertClass,
                {
                    "free": sel_information["Free Space"],
                    "used": sel_information["Percent Used"],
                },
                key=None,
            )
Beispiel #6
0
class HTTPD_SSL_AlertSource(AlertSource):
    level = AlertLevel.WARNING
    title = "FreeNAS HTTP server SSL misconfiguration"

    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        alerts = []

        if os.path.exists("/tmp/alert_invalid_ssl_nginx"):
            alerts.append(
                Alert(
                    "FreeNAS does not support certificates with keys shorter than 1024 bits. "
                    "HTTPS will not be enabled until a certificate having at least 1024 bit "
                    "keylength is provided", ))

        for cert_name in glob.glob("/var/tmp/alert_invalidcert_*"):
            alerts.append(
                Alert(
                    "The Certificate: %(cert_name)s is either malformed "
                    "or invalid and cannot be used for any services. "
                    "This Alert will remain here until the certificate is deleted",
                    {"cert_name": cert_name.split("_", 2)[-1]},
                ))

        for CA_name in glob.glob("/var/tmp/alert_invalidCA_*"):
            alerts.append(
                Alert(
                    "The Certificate Authority(CA): %(CA_name)s is either "
                    "malformed or invalid and cannot be used for any services. "
                    "This Alert will remain here until the CA is deleted",
                    {"CA_name": CA_name.split("_", 2)[-1]},
                ))

        return alerts
Beispiel #7
0
class VolumeVersionAlertSource(ThreadedAlertSource):
    level = AlertLevel.WARNING
    title = "ZFS version is out of date"

    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        for pool in self.middleware.call_sync("pool.query"):
            if not self.middleware.call_sync('pool.is_upgraded', pool["id"]):
                alerts.append(
                    Alert(
                        "New feature flags are available for volume %s. Refer "
                        "to the \"Upgrading a ZFS Pool\" subsection in the "
                        "User Guide \"Installing and Upgrading\" chapter "
                        "and \"Upgrading\" section for more instructions.",
                        pool["name"],
                    ))

        proc = subprocess.Popen(
            "zfs upgrade | grep FILESYSTEM",
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            encoding="utf8",
        )
        output = proc.communicate()[0].strip(" ").strip("\n")
        if output:
            alerts.append(
                Alert(
                    "ZFS filesystem version is out of date. Consider upgrading"
                    " using \"zfs upgrade\" command line."))

        return alerts
Beispiel #8
0
class NTPHealthCheckAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(hours=12))
    run_on_backup_node = False

    async def check(self):
        uptime_seconds = time.clock_gettime(time.CLOCK_MONOTONIC_RAW)
        if uptime_seconds < 300:
            return

        try:
            peers = await self.middleware.call("system.ntpserver.peers")
        except Exception:
            self.middleware.logger.warning("Failed to retrieve peers.", exc_info=True)
            peers = []

        if not peers:
            return

        active_peer = [x for x in peers if x['status'].endswith('PEER')]
        if not active_peer:
            return Alert(
                NTPHealthCheckAlertClass,
                {'reason': f'No NTP peers: {[{x["remote"]: x["status"]} for x in peers]}'}
            )

        peer = active_peer[0]
        if peer['offset'] < 300000:
            return

        return Alert(
            NTPHealthCheckAlertClass,
            {'reason': f'{peer["remote"]} has an offset of {peer["offset"]}, which exceeds permitted value of 5 minutes.'}
        )
Beispiel #9
0
class IPMISELAlertSource(AlertSource, DismissableAlertSource):
    level = AlertLevel.WARNING
    title = "IPMI System Event"

    schedule = IntervalSchedule(timedelta(minutes=5))

    dismissed_datetime_kv_key = "alert:ipmi_sel:dismissed_datetime"

    async def check(self):
        if not has_ipmi():
            return

        return await self._produce_alerts_for_ipmitool_output(
            (await run(["ipmitool", "-c", "sel", "elist"],
                       encoding="utf8")).stdout)

    async def dismiss(self, alerts):
        await self.middleware.call("keyvalue.set",
                                   self.dismissed_datetime_kv_key,
                                   max(alert.datetime for alert in alerts))
        return []

    async def _produce_alerts_for_ipmitool_output(self, output):
        alerts = []

        records = parse_ipmitool_output(output)

        if records:
            if await self.middleware.call("keyvalue.has_key",
                                          self.dismissed_datetime_kv_key):
                dismissed_datetime = (await self.middleware.call(
                    "keyvalue.get",
                    self.dismissed_datetime_kv_key)).replace(tzinfo=None)
            else:
                # Prevent notifying about existing alerts on first install/upgrade
                dismissed_datetime = max(record.datetime for record in records)
                await self.middleware.call("keyvalue.set",
                                           self.dismissed_datetime_kv_key,
                                           dismissed_datetime)

            for record in records:
                if record.datetime <= dismissed_datetime:
                    continue

                title = "%(sensor)s %(direction)s %(event)s"
                if record.verbose is not None:
                    title += ": %(verbose)s"

                args = dict(record._asdict())
                args.pop("id")
                args.pop("datetime")

                alerts.append(
                    Alert(
                        title=title,
                        args=args,
                        datetime=record.datetime,
                    ))

        return alerts
Beispiel #10
0
class ISCSIPortalIPAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=60))

    async def check(self):
        if not await self.middleware.call('service.started', 'iscsitarget'):
            return

        in_use_ips = {
            i['address']
            for i in await self.middleware.call('interface.ip_in_use',
                                                {'any': True})
        }
        portals = {
            p['id']: p
            for p in await self.middleware.call('iscsi.portal.query')
        }
        ips = []
        for target in await self.middleware.call('iscsi.target.query'):
            for group in target['groups']:
                ips.extend(
                    map(
                        lambda ip: ip['ip'],
                        filter(lambda a: a['ip'] not in in_use_ips,
                               portals[group['portal']]['listen'])))

        if ips:
            return Alert(ISCSIPortalIPAlertClass, ', '.join(ips))
Beispiel #11
0
class HasUpdateAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(hours=1))

    run_on_backup_node = False

    def check_sync(self):
        try:
            self.middleware.call_sync("datastore.query", "system.update", [],
                                      {"get": True})
        except IndexError:
            self.middleware.call_sync("datastore.insert", "system.update", {
                "upd_autocheck": True,
                "upd_train": "",
            })

        path = self.middleware.call_sync("update.get_update_location")
        if not path:
            return

        updates = None
        try:
            if PendingUpdates:
                updates = PendingUpdates(path)
        except Exception:
            pass

        if updates:
            return Alert(HasUpdateAlertClass)
Beispiel #12
0
class ZpoolCapacityAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        alerts = []
        for pool in await self.middleware.call("zfs.pool.query"):
            try:
                capacity = int(pool["properties"]["capacity"]["parsed"])
            except (KeyError, ValueError):
                continue

            for target_capacity, klass in [
                (90, ZpoolCapacityCriticalAlertClass),
                (80, ZpoolCapacityWarningAlertClass),
                (70, ZpoolCapacityNoticeAlertClass),
            ]:
                if capacity >= target_capacity:
                    alerts.append(
                        Alert(
                            klass,
                            {
                                "volume": pool["name"],
                                "capacity": capacity,
                            },
                            key=[pool["name"]],
                        ))
                    break
                elif capacity == target_capacity - 1:
                    # If pool capacity is 89%, 79%, 69%, leave the alert in its previous state.
                    # In other words, don't flap alert in case if pool capacity is oscilating around threshold value.
                    raise UnavailableException()

        return alerts
Beispiel #13
0
class LDAPBindAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=10))
    run_on_backup_node = False

    async def check(self):
        if (await self.middleware.call('ldap.get_state')) == 'DISABLED':
            return

        try:
            await self.middleware.call("ldap.started")
        except Exception as e:
            return Alert(LDAPBindAlertClass, {'ldaperr': str(e)}, key=None)
Beispiel #14
0
class ZpoolCapacityAlertSource(ThreadedAlertSource):
    level = AlertLevel.WARNING
    title = "The capacity for the volume is above recommended value"

    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        pools = [
            pool["name"] for pool in self.middleware.call_sync("pool.query")
        ] + ["freenas-boot"]
        for pool in pools:
            proc = subprocess.Popen([
                "zpool",
                "list",
                "-H",
                "-o",
                "cap",
                pool.encode("utf8"),
            ],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    encoding="utf8")
            data = proc.communicate()[0]
            if proc.returncode != 0:
                continue
            try:
                cap = int(data.strip("\n").replace("%", ""))
            except ValueError:
                continue

            msg = (
                "The capacity for the volume \"%(volume)s\" is currently at "
                "%(capacity)d%%, while the recommended value is below 80%%.")
            level = None
            if cap >= 90:
                level = AlertLevel.CRITICAL
            elif cap >= 80:
                level = AlertLevel.WARNING
            if level:
                alerts.append(
                    Alert(
                        msg,
                        {
                            "volume": pool,
                            "capacity": cap,
                        },
                        key=[pool, level.name],
                        level=level,
                    ))

        return alerts
class VolumeVersionAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        alerts = []
        for pool in await self.middleware.call("pool.query"):
            if not await self.middleware.call("pool.is_upgraded", pool["id"]):
                alerts.append(Alert(
                    VolumeVersionAlertClass,
                    pool["name"],
                ))

        return alerts
Beispiel #16
0
class VolumeVersionAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        for pool in self.middleware.call_sync("pool.query"):
            if not self.middleware.call_sync('pool.is_upgraded', pool["id"]):
                alerts.append(Alert(
                    VolumeVersionAlertClass,
                    pool["name"],
                ))

        return alerts
Beispiel #17
0
class ActiveDirectoryDomainBindAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=10))

    async def check(self):
        if (await self.middleware.call('activedirectory.get_state')
            ) == 'DISABLED':
            return

        try:
            await self.middleware.call("activedirectory.started")
        except Exception as e:
            return Alert(ActiveDirectoryDomainBindAlertClass,
                         {'wberr': str(e)},
                         key=None)
Beispiel #18
0
class AlertSource:
    schedule = IntervalSchedule(timedelta())

    run_on_backup_node = True

    def __init__(self, middleware):
        self.middleware = middleware

    @property
    def name(self):
        return self.__class__.__name__.replace("AlertSource", "")

    async def check(self):
        raise NotImplementedError
Beispiel #19
0
class IPMISELAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    dismissed_datetime_kv_key = "alert:ipmi_sel:dismissed_datetime"

    async def check(self):
        if not has_ipmi():
            return

        return await self._produce_alerts_for_ipmitool_output(
            (await run(["ipmitool", "-c", "sel", "elist"],
                       encoding="utf8")).stdout)

    async def _produce_alerts_for_ipmitool_output(self, output):
        alerts = []

        records = parse_ipmitool_output(output)

        if records:
            if await self.middleware.call("keyvalue.has_key",
                                          self.dismissed_datetime_kv_key):
                dismissed_datetime = ((await self.middleware.call(
                    "keyvalue.get",
                    self.dismissed_datetime_kv_key)).replace(tzinfo=None))
            else:
                # Prevent notifying about existing alerts on first install/upgrade
                dismissed_datetime = max(record.datetime for record in records)
                await self.middleware.call("keyvalue.set",
                                           self.dismissed_datetime_kv_key,
                                           dismissed_datetime)

            for record in records:
                if record.datetime <= dismissed_datetime:
                    continue

                args = dict(record._asdict())
                args.pop("id")
                args.pop("datetime")

                alerts.append(
                    Alert(
                        IPMISELAlertClass,
                        args,
                        key=[args, record.datetime.isoformat()],
                        datetime=record.datetime,
                    ))

        return alerts
Beispiel #20
0
class AlertSource:
    schedule = IntervalSchedule(timedelta())

    products = ("CORE", "ENTERPRISE", "SCALE", "SCALE_ENTERPRISE")
    failover_related = False
    run_on_backup_node = True

    def __init__(self, middleware):
        self.middleware = middleware

    @property
    def name(self):
        return self.__class__.__name__.replace("AlertSource", "")

    async def check(self):
        raise NotImplementedError
Beispiel #21
0
class NISBindAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=10))
    run_on_backup_node = False

    async def check(self):
        if (await self.middleware.call('nis.get_state')) == 'DISABLED':
            return

        try:
            await self.middleware.call("nis.started")
        except Exception as e:
            await self.middleware.call('nis.set_state', DSStatus['FAULTED'])
            return Alert(
                NISBindAlertClass,
                {'niserr': str(e)},
                key=None
            )
Beispiel #22
0
class ZpoolCapacityAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        pools = [
            pool["name"] for pool in self.middleware.call_sync("pool.query")
        ] + ["freenas-boot"]
        for pool in pools:
            proc = subprocess.Popen([
                "zpool",
                "list",
                "-H",
                "-o",
                "cap",
                pool.encode("utf8"),
            ],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    encoding="utf8")
            data = proc.communicate()[0]
            if proc.returncode != 0:
                continue
            try:
                cap = int(data.strip("\n").replace("%", ""))
            except ValueError:
                continue

            klass = None
            if cap >= 90:
                klass = ZpoolCapacityWarningAlertClass
            elif cap >= 80:
                klass = ZpoolCapacityCriticalAlertClass
            if klass:
                alerts.append(
                    Alert(
                        klass,
                        {
                            "volume": pool,
                            "capacity": cap,
                        },
                        key=[pool],
                    ))

        return alerts
Beispiel #23
0
class UpdateNotAppliedAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=10))

    def check_sync(self):
        if os.path.exists(UPDATE_APPLIED_SENTINEL):
            try:
                with open(UPDATE_APPLIED_SENTINEL, "rb") as f:
                    data = json.loads(f.read().decode("utf8"))
            except Exception:
                log.error(
                    "Could not load UPDATE APPLIED SENTINEL located at {0}".
                    format(UPDATE_APPLIED_SENTINEL),
                    exc_info=True)
                return

            update_applied, msg = is_update_applied(data["update_version"],
                                                    create_alert=False)
            if update_applied:
                return Alert(UpdateNotAppliedAlertClass, msg)
Beispiel #24
0
class AlertSource:
    level = NotImplemented
    title = NotImplemented

    hardware = False

    onetime = False
    schedule = IntervalSchedule(timedelta())

    run_on_passive_node = True

    def __init__(self, middleware):
        self.middleware = middleware

    @property
    def name(self):
        return self.__class__.__name__.replace("AlertSource", "")

    async def check(self):
        raise NotImplementedError
Beispiel #25
0
class IPMISELSpaceLeftAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    async def check(self):
        if not has_ipmi():
            return

        return self._produce_alert_for_ipmitool_output(
            (await run(["ipmitool", "sel", "info"], encoding="utf8")).stdout)

    def _produce_alert_for_ipmitool_output(self, output):
        sel_information = parse_sel_information(output)
        if int(sel_information["Percent Used"].rstrip("%")) > 90:
            return Alert(
                IPMISELSpaceLeftAlertClass,
                {
                    "free": sel_information["Free Space"],
                    "used": sel_information["Percent Used"],
                },
                key=None,
            )
Beispiel #26
0
class SMARTAlertSource(ThreadedAlertSource):
    level = AlertLevel.CRITICAL
    title = "SMART error"

    hardware = True

    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []

        with SmartAlert() as sa:
            for msgs in sa.data.values():
                if not msgs:
                    continue
                for msg in msgs:
                    if msg is None:
                        continue
                    alerts.append(Alert(msg))

        return alerts
Beispiel #27
0
class NVDIMMAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    products = ("ENTERPRISE", )

    def check_sync(self):
        alerts = []

        i = 0
        while True:
            try:
                critical_health = sysctl.filter(
                    f"dev.nvdimm.{i}.critical_health")[0].value
                nvdimm_health = sysctl.filter(
                    f"dev.nvdimm.{i}.nvdimm_health")[0].value
                es_health = sysctl.filter(f"dev.nvdimm.{i}.es_health")[0].value
            except IndexError:
                return alerts
            else:
                alerts.extend(
                    produce_nvdimm_alerts(i, critical_health, nvdimm_health,
                                          es_health))
                i += 1
Beispiel #28
0
class VolumeVersionAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []
        for pool in self.middleware.call_sync("pool.query"):
            if not self.middleware.call_sync('pool.is_upgraded', pool["id"]):
                alerts.append(Alert(
                    VolumeVersionAlertClass,
                    pool["name"],
                ))

        proc = subprocess.Popen(
            "zfs upgrade | grep FILESYSTEM",
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            encoding="utf8",
        )
        output = proc.communicate()[0].strip(" ").strip("\n")
        if output:
            alerts.append(Alert(ZfsVersionOutOfDateAlertClass))

        return alerts
Beispiel #29
0
class QuotaAlertSource(ThreadedAlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    def check_sync(self):
        alerts = []

        datasets = self.middleware.call_sync(
            "zfs.dataset.query_for_quota_alert")

        pool_sizes = {}
        for d in datasets:
            d["name"] = d["name"]["rawvalue"]

            if "/" not in d["name"]:
                pool_sizes[d["name"]] = int(d["available"]["rawvalue"]) + int(
                    d["used"]["rawvalue"])

            for k, default in [("org.freenas:quota_warning", 80),
                               ("org.freenas:quota_critical", 95),
                               ("org.freenas:refquota_warning", 80),
                               ("org.freenas:refquota_critical", 95)]:
                try:
                    d[k] = int(d[k]["rawvalue"])
                except (KeyError, ValueError):
                    d[k] = default

        # call this outside the for loop since we don't need to check
        # for every dataset that could be potentially be out of quota...
        hostname = self.middleware.call_sync("system.hostname")
        datasets = sorted(datasets, key=lambda ds: ds["name"])
        for dataset in datasets:
            for quota_property in ["quota", "refquota"]:
                try:
                    quota_value = int(dataset[quota_property]["rawvalue"])
                except (AttributeError, KeyError, ValueError):
                    continue

                if quota_value == 0:
                    continue

                if quota_property == "quota":
                    # We can't use "used" property since it includes refreservation

                    # But if "refquota" is smaller than "quota", then "available" will be reported with regards to
                    # that smaller value, and we will get false positive
                    try:
                        refquota_value = int(dataset["refquota"]["rawvalue"])
                    except (AttributeError, KeyError, ValueError):
                        continue
                    else:
                        if refquota_value and refquota_value < quota_value:
                            continue

                    # Quota larger than dataset available size will never be exceeded,
                    # but will break out logic
                    if quota_value > pool_sizes[dataset["name"].split("/")[0]]:
                        continue

                    used = quota_value - int(dataset["available"]["rawvalue"])
                elif quota_property == "refquota":
                    used = int(dataset["usedbydataset"]["rawvalue"])
                else:
                    raise RuntimeError()

                used_fraction = 100 * used / quota_value

                critical_threshold = dataset[
                    f"org.freenas:{quota_property}_critical"]
                warning_threshold = dataset[
                    f"org.freenas:{quota_property}_warning"]
                if critical_threshold != 0 and used_fraction >= critical_threshold:
                    klass = QuotaCriticalAlertClass
                elif warning_threshold != 0 and used_fraction >= warning_threshold:
                    klass = QuotaWarningAlertClass
                else:
                    continue

                quota_name = quota_property[0].upper() + quota_property[1:]
                args = {
                    "name": quota_name,
                    "dataset": dataset["name"],
                    "used_fraction": used_fraction,
                    "used": format_size(used),
                    "quota_value": format_size(quota_value),
                }

                mail = None
                owner = self._get_owner(dataset)
                if owner != 0:
                    try:
                        self.middleware.call_sync('user.get_user_obj',
                                                  {'uid': owner})
                        user_exists = True
                    except KeyError:
                        user_exists = False
                        to = None
                        logger.debug("Unable to query bsduser with uid %r",
                                     owner)

                    if user_exists:
                        try:
                            bsduser = self.middleware.call_sync(
                                "datastore.query",
                                "account.bsdusers",
                                [["bsdusr_uid", "=", owner]],
                                {"get": True},
                            )
                            to = bsduser["bsdusr_email"] or None
                        except IndexError:
                            to = None

                    if to is not None:
                        mail = {
                            "to": [to],
                            "subject":
                            f"{hostname}: {quota_name} exceeded on dataset {dataset['name']}",
                            "text": klass.text % args
                        }

                alerts.append(
                    Alert(
                        klass,
                        args=args,
                        key=[dataset["name"], quota_property],
                        mail=mail,
                    ))

        return alerts

    def _get_owner(self, dataset):
        mountpoint = None
        if dataset["mounted"]["value"] == "yes":
            if dataset["mountpoint"]["value"] == "legacy":
                for m in (getmntinfo() if getmntinfo else []):
                    if m.source == dataset["name"]:
                        mountpoint = m.dest
                        break
            else:
                mountpoint = dataset["mountpoint"]["value"]
        if mountpoint is None:
            logger.debug(
                "Unable to get mountpoint for dataset %r, assuming owner = root",
                dataset["name"])
            uid = 0
        else:
            try:
                stat_info = os.stat(mountpoint)
            except Exception:
                logger.debug(
                    "Unable to stat mountpoint %r, assuming owner = root",
                    mountpoint)
                uid = 0
            else:
                uid = stat_info.st_uid

        return uid
Beispiel #30
0
class IPMISELAlertSource(AlertSource):
    schedule = IntervalSchedule(timedelta(minutes=5))

    dismissed_datetime_kv_key = "alert:ipmi_sel:dismissed_datetime"

    # https://github.com/openbmc/ipmitool/blob/master/include/ipmitool/ipmi_sel.h#L297

    IPMI_SENSORS = (
        "Redundancy State",
        "Temperature",
        "Voltage",
        "Current",
        "Fan",
        "Physical Security",
        "Platform Security",
        "Processor",
        "Power Supply",
        "Memory",
        "System Firmware Error",
        "Critical Interrupt",
        "Management Subsystem Health",
        "Battery",
    )

    IPMI_EVENTS_WHITELIST = (
        ("Power Unit", "Soft-power control failure"),
        ("Power Unit", "Failure detected"),
        ("Power Unit", "Predictive failure"),
        ("Event Logging Disabled", "Log full"),
        ("Event Logging Disabled", "Log almost full"),
        ("System Event", "Undetermined system hardware failure"),
        ("Cable/Interconnect", "Config Error"),
    )

    IPMI_EVENTS_BLACKLIST = (
        ("Redundancy State", "Fully Redundant"),
        ("Processor", "Presence detected"),
        ("Power Supply", "Presence detected"),
    )

    async def check(self):
        if not has_ipmi():
            return

        return await self._produce_alerts_for_ipmitool_output(await ipmitool(
            ["-c", "sel", "elist"]))

    async def _produce_alerts_for_ipmitool_output(self, output):
        alerts = []

        records = parse_ipmitool_output(output)

        records = [
            record for record in records
            if ((any(
                record.sensor.startswith(f"{sensor} #0x")
                for sensor in self.IPMI_SENSORS) or any(
                    record.sensor.startswith(f"{sensor} #0x")
                    and record.event == event
                    for sensor, event in self.IPMI_EVENTS_WHITELIST))
                and not any(
                    record.sensor.startswith(f"{sensor} #0x")
                    and record.event == event
                    for sensor, event in self.IPMI_EVENTS_BLACKLIST))
        ]

        if records:
            if await self.middleware.call("keyvalue.has_key",
                                          self.dismissed_datetime_kv_key):
                dismissed_datetime = ((await self.middleware.call(
                    "keyvalue.get",
                    self.dismissed_datetime_kv_key)).replace(tzinfo=None))
            else:
                # Prevent notifying about existing alerts on first install/upgrade
                dismissed_datetime = max(record.datetime for record in records)
                await self.middleware.call("keyvalue.set",
                                           self.dismissed_datetime_kv_key,
                                           dismissed_datetime)

            for record in records:
                if record.datetime <= dismissed_datetime:
                    continue

                args = dict(record._asdict())
                args.pop("id")
                args.pop("datetime")

                alerts.append(
                    Alert(
                        IPMISELAlertClass,
                        args,
                        key=[args, record.datetime.isoformat()],
                        datetime=record.datetime,
                    ))

        return alerts