def __init__(self, id: int, args: Dict[str, str], monitor_def: ActiveMonitorDef, state: str, state_ts: float, msg: str, alert_id: Union[int, None], checks_enabled: bool, alerts_enabled: bool, manager: ActiveMonitorManager) -> None: self.id = id self.args = args self.monitor_def = monitor_def self.state = state self.manager = manager self.last_check_state = None # type: Optional[str] self.consecutive_checks = 0 self.last_check = time.time() self.msg = msg self.alert_id = alert_id self.state_ts = state_ts if not self.state_ts: self.state_ts = time.time() self.monitoring = False self.deleted = False self.checks_enabled = checks_enabled self.alerts_enabled = alerts_enabled self._pending_reset = False self.scheduled_job = None # type: Optional[asyncio.Handle] self.scheduled_job_ts = 0.0 event.running('CREATE_ACTIVE_MONITOR', monitor=self) stats.inc('num_monitors', 'ACT_MON')
async def fetch_row(self, query: str, args: Optional[Iterable]=None) -> List: """Run a query and fetch a single returned row.""" stats.inc('queries', 'SQL') async with self.pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute(query, args) ret = await cur.fetchone() return ret
async def operation(self, query: str, args: Optional[Iterable]=None) -> Any: """Run a sql operation (query). Ie. insert, update etc. not select. Returns the row id of the created row if any. """ stats.inc('queries', 'SQL') async with self.pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute(query, args) ret = cur.lastrowid await conn.commit() return ret
def listen(self, callback: Callable, *, event_filter: Optional[List[str]] = None, active_monitor_filter: Optional[List[Union[str, int]]] = None) -> EventListener: """Set a callback function that will receive events. Two filters can be used when selecting which events the callback will receive. event_filter can be a list of event names that must match. active_monitor_filter can be a list of active monitor ids that must match. """ stats.inc('num_listeners', 'EVENT') listener = EventListener(self, callback, event_filter=event_filter, active_monitor_filter=active_monitor_filter) self.listeners.add(listener) return listener
async def transact(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: """Create a db cursor and hand it to a callback. This can be used to simulate transactions. commit will be called when the callback returns. If an exception is raised in the callback a rollback is performed. """ stats.inc('transactions', 'SQL') async with self.pool.acquire() as conn: async with conn.cursor() as cur: try: ret = await func(cur, *args, **kwargs) except: await conn.rollback() raise else: await conn.commit() return ret
def running(self, event_name: str, **kwargs: Any) -> None: """An event is running. Listener callbacks will be called with: callback(listener-dict, event-name, timestamp, arg-dict) """ stats.inc('events_fired', 'EVENT') if not self.listeners: return timestamp = time.time() for listener in self.listeners: if not listener.wants_event(event_name, kwargs): continue try: t = listener.callback(listener, event_name, timestamp, kwargs) asyncio.ensure_future(t) except Exception as e: log.msg('Failed to run event listener callback: %s' % str(e))
async def _run_monitor(self, monitor_id: int) -> None: monitor = self.monitors.get(monitor_id) if not monitor: log.debug('Skipping scheduled job for missing monitor %s' % monitor_id) return None monitor.scheduled_job = None if self.num_running_jobs > self.max_concurrent_jobs: log.msg('Deferred monitor %s due to to many running jobs' % monitor) self.schedule_monitor(monitor, random.randint(10, 30)) stats.inc('jobs_deferred', 'ACT_MON') return None self.num_running_jobs += 1 stats.inc('total_jobs_run', 'ACT_MON') stats.inc('cur_running_jobs', 'ACT_MON') try: await monitor.run() except Exception as e: stats.dec('cur_running_jobs', 'ACT_MON') self.num_running_jobs -= 1 log.msg('Monitor run raised error: %s' % (str(e))) if not monitor.scheduled_job: self.schedule_monitor(monitor, DEFAULT_MONITOR_INTERVAL) raise self.num_running_jobs -= 1 stats.dec('cur_running_jobs', 'ACT_MON')
async def middleware_handler(request: web.Request) -> web.Response: stats.inc('num_calls', 'WEBMGMT') log.msg('Received request: %s' % request, 'WEBMGMT') return await handler(request)
async def handle_check_result(self, check_state: str, msg: str) -> None: if check_state == 'UP' and self.state == 'UP': # Introduce a slight variation in monitoring intervals when # everything is going ok for a monitor. This will help spread # the service check times out. self.manager.schedule_monitor( self, DEFAULT_MONITOR_INTERVAL + random.randint(-5, 5)) stats.inc('checks_up', 'ACT_MON') elif check_state == 'UP' and self.state != 'UP': self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) await self.state_change('UP', msg) stats.inc('checks_up', 'ACT_MON') elif check_state == 'DOWN' and self.state == 'DOWN': self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) stats.inc('checks_down', 'ACT_MON') elif check_state == 'DOWN' and self.state == 'UNKNOWN': await self.state_change('DOWN', msg) self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) stats.inc('checks_down', 'ACT_MON') elif check_state == 'DOWN' and self.state != 'DOWN': if self.consecutive_checks >= DOWN_THRESHOLD: await self.state_change('DOWN', msg) self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) else: self.manager.schedule_monitor(self, 30) stats.inc('checks_down', 'ACT_MON') elif check_state == 'UNKNOWN' and self.state == 'UNKNOWN': self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) stats.inc('checks_unknown', 'ACT_MON') elif check_state == 'UNKNOWN' and self.state != 'UNKNOWN': if self.consecutive_checks >= UNKNOWN_THRESHOLD: await self.state_change('UNKNOWN', msg) self.manager.schedule_monitor(self, DEFAULT_MONITOR_INTERVAL) else: self.manager.schedule_monitor(self, 120) stats.inc('checks_unknown', 'ACT_MON') event.running('ACTIVE_MONITOR_CHECK_RESULT', monitor=self, check_state=check_state, msg=msg)