async def check(self): if not await self.middleware.call('failover.licensed'): return [] elif not await self.middleware.call('failover.internal_interfaces'): return [Alert(FailoverInterfaceNotFoundAlertClass)] try: if not await self.middleware.call('failover.call_remote', 'system.ready'): raise UnavailableException() local_version = await self.middleware.call('system.version') remote_version = await self.middleware.call( 'failover.call_remote', 'system.version') if local_version != remote_version: return [Alert(TrueNASVersionsMismatchAlertClass)] local = await self.middleware.call('failover.vip.get_states') remote = await self.middleware.call('failover.call_remote', 'failover.vip.get_states') if err := await self.middleware.call('failover.vip.check_states', local, remote): return [ Alert(VRRPStatesDoNotAgreeAlertClass, {'error': i}) for i in err ] except CallError as e: if e.errno != errno.ECONNREFUSED: return [Alert(FailoverStatusCheckFailedAlertClass, [str(e)])] if await self.middleware.call('failover.status') in ('ERROR', 'UNKNOWN'): return [Alert(FailoverFailedAlertClass)] return []
async def check(self): alerts = [] for pool in await self.middleware.call("zfs.pool.query"): try: capacity = int(pool["properties"]["capacity"]["parsed"]) except (KeyError, ValueError): continue for target_capacity, klass in [ (90, ZpoolCapacityCriticalAlertClass), (80, ZpoolCapacityWarningAlertClass), (70, ZpoolCapacityNoticeAlertClass), ]: if capacity >= target_capacity: alerts.append( Alert( klass, { "volume": pool["name"], "capacity": capacity, }, key=[pool["name"]], )) break elif capacity == target_capacity - 1: # If pool capacity is 89%, 79%, 69%, leave the alert in its previous state. # In other words, don't flap alert in case if pool capacity is oscilating around threshold value. raise UnavailableException() return alerts
def check_sync(self): rrd_size_alert_threshold = 1073741824 try: used = psutil.disk_usage('/var/db/collectd/rrd').used except FileNotFoundError: raise UnavailableException() if used > rrd_size_alert_threshold: return Alert('Reporting database size (%s) is above 1 GiB', args=[humanfriendly.format_size(used)])
def check_sync(self): rrd_size_alert_threshold = 1073741824 try: used = psutil.disk_usage('/var/db/collectd/rrd').used except FileNotFoundError: raise UnavailableException() if used > rrd_size_alert_threshold: return Alert(ReportingDbAlertClass, args=humanfriendly.format_size(used), key=None)
async def __call__(self, args): result = await run(["ipmitool"] + args, check=False, encoding="utf8", errors="ignore") if result.returncode != 0: self.errors += 1 if self.errors < 5: raise UnavailableException() raise CallError( f"ipmitool failed (code={result.returncode}): {result.stderr}") self.errors = 0 return result.stdout
def check_sync(self): rrd_size_alert_threshold = 1610611911 # bytes try: used = shutil.disk_usage('/var/db/collectd/rrd').used except FileNotFoundError: raise UnavailableException() if used > rrd_size_alert_threshold: # zfs list reports in kibi/mebi/gibi(bytes) but # format_size() calculates in kilo/mega/giga by default # so the report that we send the user needs to match # up with what zfs list reports as to not confuse anyone used = format_size(used, binary=True) threshold = format_size(rrd_size_alert_threshold, binary=True) return Alert(ReportingDbAlertClass, { 'used': used, 'threshold': threshold }, key=None)
def check_sync(self): try: used = shutil.disk_usage('/var/db/collectd/rrd').used except FileNotFoundError: raise UnavailableException() threshold = 1073741824 + len( self.middleware.call_sync('disk.query')) * 1024 * 1024 if used > threshold: # zfs list reports in kibi/mebi/gibi(bytes) but # format_size() calculates in kilo/mega/giga by default # so the report that we send the user needs to match # up with what zfs list reports as to not confuse anyone used = format_size(used, binary=True) threshold = format_size(threshold, binary=True) return Alert(ReportingDbAlertClass, { 'used': used, 'threshold': threshold }, key=None)
def check_sync(self): alerts = [] if not self.middleware.call_sync('failover.licensed'): return alerts if not self.middleware.call_sync('failover.internal_interfaces'): alerts.append(Alert(FailoverInterfaceNotFoundAlertClass)) return alerts try: self.middleware.call_sync('failover.call_remote', 'core.ping') local_version = self.middleware.call_sync('system.version') remote_version = self.middleware.call_sync('failover.call_remote', 'system.version') if local_version != remote_version: return [Alert(TrueNASVersionsMismatchAlertClass)] if not self.middleware.call_sync('failover.call_remote', 'system.ready'): raise UnavailableException() local = self.middleware.call_sync('failover.vip.get_states') remote = self.middleware.call_sync('failover.call_remote', 'failover.vip.get_states') errors = self.middleware.call_sync('failover.vip.check_states', local, remote) for error in errors: alerts.append( Alert( CARPStatesDoNotAgreeAlertClass, {"error": error}, )) except CallError as e: if e.errno != errno.ECONNREFUSED: return [Alert(FailoverStatusCheckFailedAlertClass, [str(e)])] status = self.middleware.call_sync('failover.status') if status == 'ERROR': errmsg = None if os.path.exists('/tmp/.failover_failed'): with open('/tmp/.failover_failed', 'r') as fh: errmsg = fh.read() if not errmsg: errmsg = 'Unknown error' alerts.append(Alert(FailoverFailedAlertClass, [errmsg])) elif status not in ('MASTER', 'BACKUP', 'SINGLE'): alerts.append(Alert(ExternalFailoverLinkStatusAlertClass)) internal_ifaces = self.middleware.call_sync( 'failover.internal_interfaces') if internal_ifaces: p1 = subprocess.Popen( "/sbin/ifconfig %s|grep -E 'vhid (10|20) '|grep 'carp:'" % internal_ifaces[0], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, encoding='utf8', ) stdout = p1.communicate()[0].strip() if status != "SINGLE" and stdout.count("\n") != 1: alerts.append(Alert(InternalFailoverLinkStatusAlertClass)) if status != "SINGLE": try: if sysctl.filter('kern.cam.ctl.ha_link')[0].value == 1: alerts.append(Alert(CTLHALinkAlertClass)) except Exception: pass if status == 'BACKUP': fobj = None try: with open(FAILOVER_JSON, 'r') as f: fobj = json.loads(f.read()) except Exception: pass try: if len(fobj['phrasedvolumes']) > 0: keys = self.middleware.call_sync( 'failover.encryption_keys')['geli'] not_found = False for pool in fobj['phrasedvolumes']: if pool not in keys: not_found = True alerts.append( Alert(NoFailoverPassphraseKeysAlertClass, {'pool': pool})) if not_found: # Kick a syncfrompeer if we don't. self.middleware.call_sync( 'failover.call_remote', 'failover.sync_keys_to_remote_node') except Exception: pass return alerts