class PagerDutyDaemon(Daemon): pagerduty_opts = { 'pagerduty_subdomain': '', 'pagerduty_api_key': '' } def __init__(self, prog, **kwargs): config.register_opts(PagerDutyDaemon.pagerduty_opts) Daemon.__init__(self, prog, kwargs) def run(self): pd = PagerDutyMessage() pd.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): pd.should_stop = True
def run(self): onhold = dict() # Start token bucket thread tokens = LeakyBucket(tokens=20, rate=30) tokens.start() mailer = MailerMessage(onhold, tokens) mailer.start() sender = MailSender(onhold, tokens) sender.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): mailer.should_stop = True
class MailerDaemon(Daemon): def run(self): onhold = dict() # Start token bucket thread tokens = LeakyBucket(tokens=20, rate=30) tokens.start() mailer = MailerMessage(onhold, tokens) mailer.start() sender = MailSender(onhold, tokens) sender.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): mailer.should_stop = True
class LoggerDaemon(Daemon): logger_opts = { 'es_host': 'localhost', 'es_port': 9200, 'es_index': 'alerta-%Y.%m.%d', # NB. Kibana config must match this index } def __init__(self, prog, **kwargs): config.register_opts(LoggerDaemon.logger_opts) Daemon.__init__(self, prog, kwargs) def run(self): logger = LoggerMessage() logger.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): logger.should_stop = True
class PagerDutyDaemon(Daemon): pagerduty_opts = {'pagerduty_subdomain': '', 'pagerduty_api_key': ''} def __init__(self, prog, **kwargs): config.register_opts(PagerDutyDaemon.pagerduty_opts) Daemon.__init__(self, prog, kwargs) def run(self): pd = PagerDutyMessage() pd.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): pd.should_stop = True
def run(self): logger = LoggerMessage() logger.start() api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(origin=__name__, tags=[__version__]) api.send(heartbeat) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): logger.should_stop = True
def run(self): self.running = True # Create internal queue self.queue = Queue.Queue() self.api = ApiClient() self.dedup = DeDup() self.carbon = Carbon() # graphite metrics # Initialiase ping targets ping_list = init_targets() # Start worker threads LOG.debug('Starting %s worker threads...', CONF.server_threads) for i in range(CONF.server_threads): w = WorkerThread(self.api, self.queue, self.dedup, self.carbon) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName())
def run(self): self.running = True self.api = ApiClient() while not self.shuttingdown: try: self.queryDynect() if self.updating: self.alertDynect() self.last_info = self.info LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True
class IrcbotServer(threading.Thread, irc.bot.SingleServerIRCBot): def __init__(self): LOG.info('Connecting to IRC server %s:%s', CONF.irc_host, CONF.irc_port) irc.bot.SingleServerIRCBot.__init__(self, [(CONF.irc_host, CONF.irc_port)], CONF.irc_user, CONF.irc_user) threading.Thread.__init__(self) self.channel = CONF.irc_channel self.api = ApiClient() def run(self): self._connect() super(irc.bot.SingleServerIRCBot, self).start() LOG.info('Connected to %s:%s', CONF.irc_host, CONF.irc_port) def on_welcome(self, connection, event): connection.join(self.channel) LOG.info('Joined %s', self.channel) def on_pubmsg(self, connection, event): try: cmd, args = event.arguments[0].split(' ', 1) except ValueError: cmd = event.arguments[0] args = None self.do_command(event, cmd, args) def do_command(self, event, cmd, args): if cmd == "disconnect": self.disconnect() elif cmd == "die": self.die() elif cmd == "ack" and args: self.api.ack_alert(args) elif cmd == "delete" and args: self.api.delete_alert(args) else: self.connection.privmsg(self.channel, "huh?")
class SnmpTrapHandler(object): def __init__(self, prog, disable_flag=None): self.prog = prog self.disable_flag = disable_flag or CONF.disable_flag def start(self): LOG.info('Starting %s...' % self.prog) self.skip_on_disable() self.run() def skip_on_disable(self): if os.path.isfile(self.disable_flag): LOG.warning('Disable flag %s exists. Skipping...', self.disable_flag) sys.exit(0) def run(self): self.statsd = StatsD() # graphite metrics data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) self.statsd.metric_send('alert.snmptrap.alerts.total', 1) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def run(self): pd = PagerDutyMessage() pd.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): pd.should_stop = True
def __init__(self): LOG.info('Connecting to IRC server %s:%s', CONF.irc_host, CONF.irc_port) irc.bot.SingleServerIRCBot.__init__(self, [(CONF.irc_host, CONF.irc_port)], CONF.irc_user, CONF.irc_user) threading.Thread.__init__(self) self.channel = CONF.irc_channel self.api = ApiClient()
class DynectDaemon(Daemon): dynect_opts = { 'dynect_customer': '', 'dynect_username': '', 'dynect_password': '', } def __init__(self, prog, **kwargs): config.register_opts(DynectDaemon.dynect_opts) Daemon.__init__(self, prog, kwargs) self.info = {} self.last_info = {} self.updating = False self.dedup = DeDup(threshold=10) def run(self): self.running = True self.api = ApiClient() while not self.shuttingdown: try: self.queryDynect() if self.updating: self.alertDynect() self.last_info = self.info LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True
def run(self): ircbot = IrcbotServer() mq = IrcbotMessage(ircbot) mq.start() ircbot.start() api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) api.send(heartbeat) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): ircbot.should_stop = True
def run(self): self.statsd = StatsD() # graphite metrics data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: self.api.send(snmptrapAlert) self.statsd.metric_send('alert.snmptrap.alerts.total', 1) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.api.send(heartbeat)
class IrcbotDaemon(Daemon): ircbot_opts = { 'irc_host': 'localhost', 'irc_port': 6667, 'irc_channel': '#alerts', 'irc_user': '******', } def __init__(self, prog, **kwargs): config.register_opts(IrcbotDaemon.ircbot_opts) Daemon.__init__(self, prog, kwargs) def run(self): ircbot = IrcbotServer() mq = IrcbotMessage(ircbot) mq.start() ircbot.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): ircbot.should_stop = True
def run(self): data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: self.api.send(snmptrapAlert) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.api.send(heartbeat)
def main(self): if CONF.heartbeat: heartbeat = Heartbeat(origin=CONF.origin, version=CONF.tags.get('Version', Version), timeout=CONF.timeout) LOG.debug(heartbeat) api = ApiClient() api.send(heartbeat) return heartbeat.get_id() else: exceptionAlert = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=CONF.value, status=CONF.status, severity=CONF.severity, environment=CONF.environment, service=CONF.service, text=CONF.text, event_type=CONF.event_type, tags=CONF.tags, origin=CONF.origin, threshold_info='n/a', # TODO(nsatterl): make this configurable? summary=CONF.summary, timeout=CONF.timeout, raw_data='n/a', # TODO(nsatterl): make this configurable? more_info=CONF.more_info, graph_urls=CONF.graph_urls, ) LOG.debug(repr(exceptionAlert)) api = ApiClient() api.send(exceptionAlert) return exceptionAlert.get_id()
def run(self): logger = LoggerMessage() logger.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): logger.should_stop = True
def run(self): self.statsd = StatsD() # graphite metrics data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) self.statsd.metric_send('alert.snmptrap.alerts.total', 1)
def main(self): if CONF.heartbeat: vtag = ''.join(CONF.tags) if CONF.tags else None heartbeat = Heartbeat( origin=CONF.origin, version=vtag or Version, timeout=CONF.timeout ) LOG.debug(repr(heartbeat)) api = ApiClient() api.send(heartbeat) return heartbeat.get_id() else: exceptionAlert = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=CONF.value, status=CONF.status, severity=CONF.severity, environment=CONF.environment, service=CONF.service, text=CONF.text, event_type=CONF.event_type, tags=CONF.tags, origin=CONF.origin, threshold_info='n/a', # TODO(nsatterl): make this configurable? summary=CONF.summary, timeout=CONF.timeout, raw_data='n/a', # TODO(nsatterl): make this configurable? more_info=CONF.more_info, graph_urls=CONF.graph_urls, ) LOG.debug(repr(exceptionAlert)) api = ApiClient() api.send(exceptionAlert) return exceptionAlert.get_id()
class CheckerClient(object): nagios_opts = { 'nagios_plugins': '/usr/lib64/nagios/plugins', } def __init__(self): config.register_opts(CheckerClient.nagios_opts) def main(self): if CONF.heartbeat: msg = Heartbeat(version=Version) else: # Run Nagios plugin check args = shlex.split( os.path.join(CONF.nagios_plugins, CONF.nagios_cmd)) LOG.info('Running %s', ' '.join(args)) try: check = subprocess.Popen(args, stdout=subprocess.PIPE) except Exception, e: LOG.error('Nagios check did not execute: %s', e) sys.exit(1) stdout = check.communicate()[0] rc = check.returncode LOG.debug('Nagios plugin %s => %s (rc=%d)', CONF.nagios_cmd, stdout, rc) if rc == 0: severity = severity_code.NORMAL elif rc == 1: severity = severity_code.WARNING elif rc == 2: severity = severity_code.CRITICAL elif rc == 3: severity = severity_code.UNKNOWN else: rc = -1 severity = severity_code.INDETERMINATE # Parse Nagios plugin check output text = '' long_text = '' perf_data = '' extra_perf_data = False for num, line in enumerate(stdout.split('\n'), start=1): if num == 1: if '|' in line: text = line.split('|')[0].rstrip(' ') perf_data = line.split('|')[1] value = perf_data.split(';')[0].lstrip(' ') else: text = line value = 'rc=%s' % rc else: if '|' in line: long_text += line.split('|')[0] perf_data += line.split('|')[1] extra_perf_data = True elif extra_perf_data is False: long_text += line else: perf_data += line LOG.debug('Short Output: %s', text) LOG.debug('Long Output: %s', long_text) LOG.debug('Perf Data: %s', perf_data) graph_urls = None msg = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=value, severity=severity, environment=CONF.environment, service=CONF.service, text=text + ' ' + long_text, event_type='nagiosAlert', tags=CONF.tags, threshold_info=CONF.nagios_cmd, timeout=CONF.timeout, raw_data=stdout, more_info=perf_data, graph_urls=graph_urls, ) if CONF.dry_run: print msg else: LOG.debug('Message => %s', repr(msg)) api = ApiClient() api.send(msg) return msg.get_id()
def main(self): api = ApiClient() query = dict() self.tag_is_key_value = True self.now = datetime.datetime.utcnow() from_time = self.now if CONF.minutes or CONF.hours or CONF.days: from_time = self.now - datetime.timedelta( days=CONF.days, minutes=CONF.minutes + CONF.hours * 60) query['from-date'] = from_time.replace(microsecond=0).isoformat( ) + ".%03dZ" % (from_time.microsecond // 1000) elif CONF.watch: query['from-date'] = from_time.replace(microsecond=0).isoformat( ) + ".%03dZ" % (from_time.microsecond // 1000) self.now = self.now.replace(tzinfo=pytz.utc) from_time = from_time.replace(tzinfo=pytz.utc) if CONF.alertid: query['id'] = CONF.alertid if CONF.environment: query['environment'] = CONF.environment if CONF.not_environment: query['environment!'] = CONF.not_environment if CONF.service: query['service'] = CONF.service if CONF.not_service: query['service!'] = CONF.not_service if CONF.resource: query['resource'] = CONF.resource if CONF.not_resource: query['resource!'] = CONF.not_resource if CONF.severity: query['severity'] = CONF.severity if CONF.not_severity: query['severity!'] = CONF.not_severity if not CONF.status: query['status'] = [ status_code.OPEN, status_code.ACK, status_code.CLOSED ] if CONF.status: query['status'] = CONF.status if CONF.not_status: query['status!'] = CONF.not_status if CONF.event: query['event'] = CONF.event if CONF.not_event: query['event!'] = CONF.not_event if CONF.group: query['group'] = CONF.group if CONF.not_group: query['group!'] = CONF.not_group if CONF.value: query['value'] = CONF.value if CONF.not_value: query['value!'] = CONF.not_value if CONF.origin: query['origin'] = CONF.origin if CONF.not_origin: query['origin!'] = CONF.not_origin if CONF.tags: for tag in CONF.tags: key, value = tag.split('=') query['tags.' + key] = value if CONF.not_tags: for tag in CONF.not_tags: key, value = tag.split('=') query['tags.' + key + '!'] = value if CONF.text: query['text'] = CONF.text if CONF.not_text: query['text!'] = CONF.not_text if CONF.event_type: query['type'] = CONF.event_type if CONF.not_event_type: query['type!'] = CONF.not_event_type if CONF.repeat: query['repeat'] = CONF.repeat if CONF.sortby: query['sort-by'] = CONF.sortby if CONF.limit: query['limit'] = CONF.limit if CONF.show == ['counts']: query['hide-alert-details'] = 'true' if CONF.oneline: CONF.format = '{i} {rd} {sa} {E} {S} {r} {g} {e} {v} {t}' if CONF.query: query['q'] = CONF.query if CONF.json: CONF.output = 'json' self.tz = pytz.timezone(CONF.timezone) if CONF.output == 'table': pt = prettytable.PrettyTable([ "Alert ID", "Last Receive Time", "Severity", "Dupl.", "Environment", "Service", "Resource", "Group", "Event", "Value" ]) col_text = [] elif not CONF.noheader: print "Alerta Report Tool" print " api server: %s:%s" % (CONF.api_host, CONF.api_port) print " timezone: %s" % CONF.timezone if CONF.minutes or CONF.hours or CONF.days: print " interval: %s - %s" % (self._format_date(from_time), self._format_date(self.now)) if CONF.show: print " show: %s" % ','.join(CONF.show) if CONF.sortby: print " sort by: %s" % CONF.sortby if CONF.alertid: print " alert id: ^%s" % ','.join(CONF.alertid) if CONF.environment: print " environment: %s" % ','.join(CONF.environment) if CONF.not_environment: print " environment: (not) %s" % ','.join(CONF.not_environment) if CONF.service: print " service: %s" % ','.join(CONF.service) if CONF.not_service: print " service: (not) %s" % ','.join(CONF.not_service) if CONF.resource: print " resource: %s" % ','.join(CONF.resource) if CONF.not_resource: print " resource: (not) %s" % ','.join(CONF.not_resource) if CONF.origin: print " origin: %s" % ','.join(CONF.origin) if CONF.not_origin: print " origin: (not) %s" % ','.join(CONF.not_origin) if CONF.severity: print " severity: %s" % ','.join(CONF.severity) if CONF.not_severity: print " severity: (not) %s" % ','.join(CONF.not_severity) if CONF.status: print " status: %s" % ','.join(CONF.status) if CONF.not_status: print " status: (not) %s" % ','.join(CONF.not_status) if CONF.event: print " event: %s" % ','.join(CONF.event) if CONF.not_event: print " event: (not) %s" % ','.join(CONF.not_event) if CONF.group: print " group: %s" % ','.join(CONF.group) if CONF.not_group: print " group: (not) %s" % ','.join(CONF.not_group) if CONF.value: print " value: %s" % ','.join(CONF.value) if CONF.not_value: print " value: (not) %s" % ','.join(CONF.not_value) if CONF.text: print " text: %s" % ','.join(CONF.text) if CONF.not_text: print " text: (not) %s" % ','.join(CONF.not_text) if CONF.tags: print " tags: %s" % ','.join(CONF.tags) if CONF.not_tags: print " tags: (not) %s" % ','.join(CONF.not_tags) if CONF.event_type: print " event type: %s" % ','.join(CONF.event_type) if CONF.not_event_type: print " event type: (not) %s" % ','.join(CONF.not_event_type) if CONF.repeat: print " repeats: %s" % CONF.repeat if CONF.limit: print " count: %d" % CONF.limit if CONF.query: print " query: %s" % CONF.query if CONF.delete: print " action: DELETE" print if 'some' in CONF.show: CONF.show.append('text') CONF.show.append('details') elif 'all' in CONF.show: CONF.show.append('text') CONF.show.append('attributes') CONF.show.append('times') CONF.show.append('details') CONF.show.append('tags') line_color = '' end_color = '' if 'color' in CONF.show or CONF.color or os.environ.get( 'CLICOLOR', None): end_color = severity_code.ENDC # Query API for alerts while True: start = time.time() try: response = api.query(query) except (KeyboardInterrupt, SystemExit): sys.exit(0) end = time.time() if response['status'] == 'error': print "ERROR: %s" % (response['message']) LOG.error('%s', response['message']) sys.exit(1) if CONF.sortby in ['createTime', 'receiveTime', 'lastReceiveTime']: alertDetails = reversed(response['alerts']['alertDetails']) else: alertDetails = response['alerts']['alertDetails'] count = 0 for alert in alertDetails: resource = alert.get('resource', None) event = alert.get('event', None) correlate = alert.get('correlatedEvents', None) group = alert.get('group', None) value = alert.get('value', None) current_status = status_code.parse_status( alert.get('status', None)) current_severity = severity_code.parse_severity( alert.get('severity', None)) previous_severity = severity_code.parse_severity( alert.get('previousSeverity', None)) environment = alert.get('environment', None) service = alert.get('service', None) text = alert.get('text', None) event_type = alert.get('type', None) tags = alert.get('tags', None) origin = alert.get('origin', None) repeat = alert.get('repeat', False) duplicate_count = int(alert.get('duplicateCount', 0)) threshold_info = alert.get('thresholdInfo', None) summary = alert.get('summary', None) timeout = alert.get('timeout', 0) alertid = alert.get('id', None) raw_data = alert.get('rawData', None) last_receive_id = alert.get('lastReceiveId', None) create_time = datetime.datetime.strptime( alert.get('createTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') create_time = create_time.replace(tzinfo=pytz.utc) create_time_epoch = time.mktime(create_time.timetuple()) receive_time = datetime.datetime.strptime( alert.get('receiveTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') receive_time = receive_time.replace(tzinfo=pytz.utc) receive_time_epoch = time.mktime(receive_time.timetuple()) last_receive_time = datetime.datetime.strptime( alert.get('lastReceiveTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') last_receive_time = last_receive_time.replace(tzinfo=pytz.utc) last_receive_time_epoch = time.mktime( last_receive_time.timetuple()) expire_time = datetime.datetime.strptime( alert.get('expireTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') expire_time = expire_time.replace(tzinfo=pytz.utc) expire_time_epoch = time.mktime(expire_time.timetuple()) trend_indication = alert.get('trendIndication', None) more_info = alert.get('moreInfo', 'n/a') graph_urls = alert.get('graphUrls', ['n/a']) delta = receive_time - create_time latency = int(delta.days * 24 * 60 * 60 * 1000 + delta.seconds * 1000 + delta.microseconds / 1000) format_kwargs = { 'I': alertid, 'i': alertid[0:8], 'r': resource, 'e': event, 'C': ','.join(correlate), 'g': group, 'v': value, 'st': current_status.capitalize(), 's': current_severity.capitalize(), 'sa': severity_code._ABBREV_SEVERITY_MAP.get( current_severity, '****'), 'sc': severity_code.name_to_code(current_severity), 'sP': previous_severity.capitalize(), 'sPa': severity_code._ABBREV_SEVERITY_MAP.get( previous_severity, '****'), 'sPc': severity_code.name_to_code(previous_severity), 'E': ','.join(environment), 'S': ','.join(service), 't': text.encode('utf-8'), 'eT': event_type, 'T': ','.join(tags), 'O': origin, 'R': repeat, 'D': duplicate_count, 'th': threshold_info, 'y': summary, 'o': timeout, 'B': raw_data, 'ti': trend_indication, 'm': more_info, 'u': ','.join(graph_urls), 'L': latency, 'lrI': last_receive_id, 'lri': last_receive_id[0:8], 'ci': create_time.replace(microsecond=0).isoformat() + ".%03dZ" % (create_time.microsecond // 1000), 'ct': create_time_epoch, 'cd': self._format_date(create_time), 'cD': utils.formatdate(create_time_epoch), 'ri': receive_time.replace(microsecond=0).isoformat() + ".%03dZ" % (receive_time.microsecond // 1000), 'rt': receive_time_epoch, 'rd': self._format_date(receive_time), 'rD': utils.formatdate(receive_time_epoch), 'li': last_receive_time.replace(microsecond=0).isoformat() + ".%03dZ" % (last_receive_time.microsecond // 1000), 'lt': last_receive_time_epoch, 'ld': self._format_date(last_receive_time), 'lD': utils.formatdate(last_receive_time_epoch), 'ei': expire_time.replace(microsecond=0).isoformat() + ".%03dZ" % (expire_time.microsecond // 1000), 'et': expire_time_epoch, 'ed': self._format_date(expire_time), 'eD': utils.formatdate(expire_time_epoch), 'n': '\n', } count += 1 if CONF.delete: try: response = api.delete(alertid) except (KeyboardInterrupt, SystemExit): sys.exit(0) print(line_color + 'DELETE %s %s' % (alertid, response['status']) + end_color) continue if 'color' in CONF.show or CONF.color or os.environ.get( 'CLICOLOR', None): line_color = severity_code._COLOR_MAP[current_severity] if CONF.output == 'json': print(line_color + json.dumps(alert, indent=4) + end_color) continue if CONF.sortby == 'createTime': displayTime = create_time elif CONF.sortby == 'receiveTime': displayTime = receive_time else: displayTime = last_receive_time if CONF.output == 'table': pt.add_row([ alertid, self._format_date(displayTime), severity_code._ABBREV_SEVERITY_MAP.get( current_severity, '****'), duplicate_count, ','.join(environment), ','.join(service), resource, group, event, value ]) if 'text' in CONF.show: col_text.append(text) continue if CONF.format: try: print line_color + CONF.format.format( **format_kwargs) + end_color except (KeyError, IndexError), e: print 'Format error: %s' % e LOG.error('Format error: %s', e) continue if 'summary' in CONF.show: print(line_color + '%s' % summary + end_color) else: print( line_color + '%s|%s|%s|%5d|%-5s|%-10s|%-18s|%12s|%16s|%12s' % (alertid[0:8], self._format_date(displayTime), severity_code._ABBREV_SEVERITY_MAP.get( current_severity, '****'), duplicate_count, ','.join(environment), ','.join(service), resource, group, event, value) + end_color) if 'text' in CONF.show: print(line_color + ' |%s' % (text.encode('utf-8')) + end_color) if 'attributes' in CONF.show: print( line_color + ' severity | %s (%s) -> %s (%s)' % (previous_severity.capitalize(), severity_code.name_to_code(previous_severity), current_severity.capitalize(), severity_code.name_to_code(current_severity)) + end_color) print(line_color + ' trend | %s' % trend_indication + end_color) print(line_color + ' status | %s' % current_status.capitalize() + end_color) print(line_color + ' resource | %s' % resource + end_color) print(line_color + ' group | %s' % group + end_color) print(line_color + ' event | %s' % event + end_color) print(line_color + ' value | %s' % value + end_color) if 'times' in CONF.show: print( line_color + ' time created | %s' % (self._format_date(create_time)) + end_color) print( line_color + ' time received | %s' % (self._format_date(receive_time)) + end_color) print( line_color + ' last received | %s' % (self._format_date(last_receive_time)) + end_color) print(line_color + ' latency | %sms' % latency + end_color) print(line_color + ' timeout | %ss' % timeout + end_color) if expire_time: print( line_color + ' expire time | %s' % (self._format_date(expire_time)) + end_color) if 'details' in CONF.show: print(line_color + ' alert id | %s' % alertid + end_color) print(line_color + ' last recv id | %s' % last_receive_id + end_color) print( line_color + ' environment | %s' % (','.join(environment)) + end_color) print(line_color + ' service | %s' % (','.join(service)) + end_color) print(line_color + ' resource | %s' % resource + end_color) print(line_color + ' type | %s' % event_type + end_color) print(line_color + ' repeat | %s' % repeat + end_color) print(line_color + ' origin | %s' % origin + end_color) print(line_color + ' more info | %s' % more_info + end_color) print(line_color + ' threshold | %s' % threshold_info + end_color) print( line_color + ' correlate | %s' % (','.join(correlate)) + end_color) print( line_color + ' graphs | %s' % (','.join(graph_urls)) + end_color) if 'tags' in CONF.show and tags: if isinstance(tags, list): self.tag_is_key_value = False for tag in enumerate(tags): print(line_color + ' tag %6s | %s' % tag + end_color) else: for tag in tags.items(): print(line_color + ' tag %6s | %s' % tag + end_color) if 'raw' in CONF.show and raw_data: print(line_color + ' | %s' % raw_data + end_color) if 'history' in CONF.show: for hist in alert['history']: if 'event' in hist: alertid = hist['id'] create_time = datetime.datetime.strptime( hist['createTime'], '%Y-%m-%dT%H:%M:%S.%fZ') create_time = create_time.replace(tzinfo=pytz.utc) event = hist['event'] receive_time = datetime.datetime.strptime( hist['receiveTime'], '%Y-%m-%dT%H:%M:%S.%fZ') receive_time = receive_time.replace( tzinfo=pytz.utc) value = hist['value'] text = hist['text'] print( line_color + ' %s|%s|%s|%-18s|%12s|%16s|%12s' % (alertid[0:8], self._format_date(receive_time), severity_code._ABBREV_SEVERITY_MAP[ hist['severity']], resource, group, event, value) + end_color) print(line_color + ' |%s' % (text.encode('utf-8')) + end_color) if 'status' in hist: update_time = datetime.datetime.strptime( hist['updateTime'], '%Y-%m-%dT%H:%M:%S.%fZ') update_time = update_time.replace(tzinfo=pytz.utc) print( line_color + ' %s|%-8s %s' % (self._format_date(update_time), hist['status'], hist['text']) + end_color) if CONF.watch: try: time.sleep(CONF.interval) except (KeyboardInterrupt, SystemExit): sys.exit(0) query['from-date'] = response['alerts']['lastTime'] else: break
from flask import request, render_template, send_from_directory from alerta.dashboard.v2 import app from alerta.common import config from alerta.common import log as logging from alerta.common.api import ApiClient Version = '2.1.0' LOG = logging.getLogger(__name__) CONF = config.CONF ApiClient() # set API variables eg. api_host, api_port # Only use when running API in stand-alone mode during testing @app.route('/alerta/dashboard/v2/assets/<path:filename>') def assets(filename): return send_from_directory(CONF.dashboard_dir, filename) @app.route('/alerta/dashboard/v2/<name>') def console(name): return render_template(name, config=CONF) @app.route('/alerta/widgets/v2/severity') def severity_widget():
def main(self): api = ApiClient() query = dict() self.tag_is_key_value = True self.now = datetime.datetime.utcnow() from_time = self.now if CONF.minutes or CONF.hours or CONF.days: from_time = self.now - datetime.timedelta(days=CONF.days, minutes=CONF.minutes + CONF.hours * 60) query['from-date'] = from_time.replace(microsecond=0).isoformat() + ".%03dZ" % (from_time.microsecond // 1000) elif CONF.watch: query['from-date'] = from_time.replace(microsecond=0).isoformat() + ".%03dZ" % (from_time.microsecond // 1000) self.now = self.now.replace(tzinfo=pytz.utc) from_time = from_time.replace(tzinfo=pytz.utc) if CONF.alertid: query['id'] = CONF.alertid if CONF.environment: query['environment'] = CONF.environment if CONF.not_environment: query['environment!'] = CONF.not_environment if CONF.service: query['service'] = CONF.service if CONF.not_service: query['service!'] = CONF.not_service if CONF.resource: query['resource'] = CONF.resource if CONF.not_resource: query['resource!'] = CONF.not_resource if CONF.severity: query['severity'] = CONF.severity if CONF.not_severity: query['severity!'] = CONF.not_severity if not CONF.status: query['status'] = [status_code.OPEN, status_code.ACK, status_code.CLOSED] if CONF.status: query['status'] = CONF.status if CONF.not_status: query['status!'] = CONF.not_status if CONF.event: query['event'] = CONF.event if CONF.not_event: query['event!'] = CONF.not_event if CONF.group: query['group'] = CONF.group if CONF.not_group: query['group!'] = CONF.not_group if CONF.value: query['value'] = CONF.value if CONF.not_value: query['value!'] = CONF.not_value if CONF.origin: query['origin'] = CONF.origin if CONF.not_origin: query['origin!'] = CONF.not_origin if CONF.tags: for tag in CONF.tags: key, value = tag.split('=') query['tags.' + key] = value if CONF.not_tags: for tag in CONF.not_tags: key, value = tag.split('=') query['tags.' + key + '!'] = value if CONF.text: query['text'] = CONF.text if CONF.not_text: query['text!'] = CONF.not_text if CONF.event_type: query['type'] = CONF.event_type if CONF.not_event_type: query['type!'] = CONF.not_event_type if CONF.repeat: query['repeat'] = CONF.repeat if CONF.sortby: query['sort-by'] = CONF.sortby if CONF.limit: query['limit'] = CONF.limit if CONF.show == ['counts']: query['hide-alert-details'] = 'true' if CONF.oneline: CONF.format = '{i} {rd} {sa} {E} {S} {r} {g} {e} {v} {t}' if CONF.query: query['q'] = CONF.query if CONF.json: CONF.output = 'json' self.tz = pytz.timezone(CONF.timezone) if CONF.output == 'table': pt = prettytable.PrettyTable(["Alert ID", "Last Receive Time", "Severity", "Dupl.", "Environment", "Service", "Resource", "Group", "Event", "Value"]) col_text = [] elif not CONF.noheader: print "Alerta Report Tool" print " api server: %s:%s" % (CONF.api_host, CONF.api_port) print " timezone: %s" % CONF.timezone if CONF.minutes or CONF.hours or CONF.days: print " interval: %s - %s" % ( self._format_date(from_time), self._format_date(self.now)) if CONF.show: print " show: %s" % ','.join(CONF.show) if CONF.sortby: print " sort by: %s" % CONF.sortby if CONF.alertid: print " alert id: ^%s" % ','.join(CONF.alertid) if CONF.environment: print " environment: %s" % ','.join(CONF.environment) if CONF.not_environment: print " environment: (not) %s" % ','.join(CONF.not_environment) if CONF.service: print " service: %s" % ','.join(CONF.service) if CONF.not_service: print " service: (not) %s" % ','.join(CONF.not_service) if CONF.resource: print " resource: %s" % ','.join(CONF.resource) if CONF.not_resource: print " resource: (not) %s" % ','.join(CONF.not_resource) if CONF.origin: print " origin: %s" % ','.join(CONF.origin) if CONF.not_origin: print " origin: (not) %s" % ','.join(CONF.not_origin) if CONF.severity: print " severity: %s" % ','.join(CONF.severity) if CONF.not_severity: print " severity: (not) %s" % ','.join(CONF.not_severity) if CONF.status: print " status: %s" % ','.join(CONF.status) if CONF.not_status: print " status: (not) %s" % ','.join(CONF.not_status) if CONF.event: print " event: %s" % ','.join(CONF.event) if CONF.not_event: print " event: (not) %s" % ','.join(CONF.not_event) if CONF.group: print " group: %s" % ','.join(CONF.group) if CONF.not_group: print " group: (not) %s" % ','.join(CONF.not_group) if CONF.value: print " value: %s" % ','.join(CONF.value) if CONF.not_value: print " value: (not) %s" % ','.join(CONF.not_value) if CONF.text: print " text: %s" % ','.join(CONF.text) if CONF.not_text: print " text: (not) %s" % ','.join(CONF.not_text) if CONF.tags: print " tags: %s" % ','.join(CONF.tags) if CONF.not_tags: print " tags: (not) %s" % ','.join(CONF.not_tags) if CONF.event_type: print " event type: %s" % ','.join(CONF.event_type) if CONF.not_event_type: print " event type: (not) %s" % ','.join(CONF.not_event_type) if CONF.repeat: print " repeats: %s" % CONF.repeat if CONF.limit: print " count: %d" % CONF.limit if CONF.query: print " query: %s" % CONF.query if CONF.ack: print " action: ACK" if CONF.delete: print " action: DELETE" print if 'some' in CONF.show: CONF.show.append('text') CONF.show.append('details') elif 'all' in CONF.show: CONF.show.append('text') CONF.show.append('attributes') CONF.show.append('times') CONF.show.append('details') CONF.show.append('tags') line_color = '' end_color = '' if 'color' in CONF.show or CONF.color or os.environ.get('CLICOLOR', None): end_color = severity_code.ENDC # Query API for alerts while True: start = time.time() try: response = api.query(query) except (KeyboardInterrupt, SystemExit): sys.exit(0) end = time.time() if response['status'] == 'error': print "ERROR: %s" % (response['message']) LOG.error('%s', response['message']) sys.exit(1) if CONF.sortby in ['createTime', 'receiveTime', 'lastReceiveTime']: alertDetails = reversed(response['alerts']['alertDetails']) else: alertDetails = response['alerts']['alertDetails'] count = 0 for alert in alertDetails: resource = alert.get('resource', None) event = alert.get('event', None) correlate = alert.get('correlatedEvents', None) group = alert.get('group', None) value = alert.get('value', None) current_status = status_code.parse_status(alert.get('status', None)) current_severity = severity_code.parse_severity(alert.get('severity', None)) previous_severity = severity_code.parse_severity(alert.get('previousSeverity', None)) environment = alert.get('environment', None) service = alert.get('service', None) text = alert.get('text', None) event_type = alert.get('type', None) tags = alert.get('tags', None) origin = alert.get('origin', None) repeat = alert.get('repeat', False) duplicate_count = int(alert.get('duplicateCount', 0)) threshold_info = alert.get('thresholdInfo', None) summary = alert.get('summary', None) timeout = alert.get('timeout', 0) alertid = alert.get('id', None) raw_data = alert.get('rawData', None) last_receive_id = alert.get('lastReceiveId', None) create_time = datetime.datetime.strptime(alert.get('createTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') create_time = create_time.replace(tzinfo=pytz.utc) create_time_epoch = time.mktime(create_time.timetuple()) receive_time = datetime.datetime.strptime(alert.get('receiveTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') receive_time = receive_time.replace(tzinfo=pytz.utc) receive_time_epoch = time.mktime(receive_time.timetuple()) last_receive_time = datetime.datetime.strptime(alert.get('lastReceiveTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') last_receive_time = last_receive_time.replace(tzinfo=pytz.utc) last_receive_time_epoch = time.mktime(last_receive_time.timetuple()) expire_time = datetime.datetime.strptime(alert.get('expireTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') expire_time = expire_time.replace(tzinfo=pytz.utc) expire_time_epoch = time.mktime(expire_time.timetuple()) trend_indication = alert.get('trendIndication', None) more_info = alert.get('moreInfo', 'n/a') graph_urls = alert.get('graphUrls', ['n/a']) delta = receive_time - create_time latency = int(delta.days * 24 * 60 * 60 * 1000 + delta.seconds * 1000 + delta.microseconds / 1000) format_kwargs = { 'I': alertid, 'i': alertid[0:8], 'r': resource, 'e': event, 'C': ','.join(correlate), 'g': group, 'v': value, 'st': current_status.capitalize(), 's': current_severity.capitalize(), 'sa': severity_code._ABBREV_SEVERITY_MAP.get(current_severity, '****'), 'sc': severity_code.name_to_code(current_severity), 'sP': previous_severity.capitalize(), 'sPa': severity_code._ABBREV_SEVERITY_MAP.get(previous_severity, '****'), 'sPc': severity_code.name_to_code(previous_severity), 'E': ','.join(environment), 'S': ','.join(service), 't': text.encode('utf-8'), 'eT': event_type, 'T': ','.join(tags), 'O': origin, 'R': repeat, 'D': duplicate_count, 'th': threshold_info, 'y': summary, 'o': timeout, 'B': raw_data, 'ti': trend_indication, 'm': more_info, 'u': ','.join(graph_urls), 'L': latency, 'lrI': last_receive_id, 'lri': last_receive_id[0:8], 'ci': create_time.replace(microsecond=0).isoformat() + ".%03dZ" % (create_time.microsecond // 1000), 'ct': create_time_epoch, 'cd': self._format_date(create_time), 'cD': utils.formatdate(create_time_epoch), 'ri': receive_time.replace(microsecond=0).isoformat() + ".%03dZ" % (receive_time.microsecond // 1000), 'rt': receive_time_epoch, 'rd': self._format_date(receive_time), 'rD': utils.formatdate(receive_time_epoch), 'li': last_receive_time.replace(microsecond=0).isoformat() + ".%03dZ" % (last_receive_time.microsecond // 1000), 'lt': last_receive_time_epoch, 'ld': self._format_date(last_receive_time), 'lD': utils.formatdate(last_receive_time_epoch), 'ei': expire_time.replace(microsecond=0).isoformat() + ".%03dZ" % (expire_time.microsecond // 1000), 'et': expire_time_epoch, 'ed': self._format_date(expire_time), 'eD': utils.formatdate(expire_time_epoch), 'n': '\n', } count += 1 if CONF.ack: try: response = api.ack(alertid) except (KeyboardInterrupt, SystemExit): sys.exit(0) print(line_color + 'ACK %s %s' % (alertid, response['status']) + end_color) continue if CONF.delete: try: response = api.delete(alertid) except (KeyboardInterrupt, SystemExit): sys.exit(0) print(line_color + 'DELETE %s %s' % (alertid, response['status']) + end_color) continue if 'color' in CONF.show or CONF.color or os.environ.get('CLICOLOR', None): line_color = severity_code._COLOR_MAP[current_severity] if CONF.output == 'json': print(line_color + json.dumps(alert, indent=4) + end_color) continue if CONF.sortby == 'createTime': displayTime = create_time elif CONF.sortby == 'receiveTime': displayTime = receive_time else: displayTime = last_receive_time if CONF.output == 'table': pt.add_row([ alertid, self._format_date(displayTime), severity_code._ABBREV_SEVERITY_MAP.get(current_severity, '****'), duplicate_count, ','.join(environment), ','.join(service), resource, group, event, value] ) if 'text' in CONF.show: col_text.append(text) continue if CONF.format: try: print line_color + CONF.format.format(**format_kwargs) + end_color except (KeyError, IndexError), e: print 'Format error: %s' % e LOG.error('Format error: %s', e) continue if 'summary' in CONF.show: print(line_color + '%s' % summary + end_color) else: print(line_color + '%s|%s|%s|%5d|%-5s|%-10s|%-18s|%12s|%16s|%12s' % ( alertid[0:8], self._format_date(displayTime), severity_code._ABBREV_SEVERITY_MAP.get(current_severity, '****'), duplicate_count, ','.join(environment), ','.join(service), resource, group, event, value) + end_color) if 'text' in CONF.show: print(line_color + ' |%s' % (text.encode('utf-8')) + end_color) if 'attributes' in CONF.show: print( line_color + ' severity | %s (%s) -> %s (%s)' % ( previous_severity.capitalize(), severity_code.name_to_code(previous_severity), current_severity.capitalize(), severity_code.name_to_code(current_severity)) + end_color) print(line_color + ' trend | %s' % trend_indication + end_color) print(line_color + ' status | %s' % current_status.capitalize() + end_color) print(line_color + ' resource | %s' % resource + end_color) print(line_color + ' group | %s' % group + end_color) print(line_color + ' event | %s' % event + end_color) print(line_color + ' value | %s' % value + end_color) if 'times' in CONF.show: print(line_color + ' time created | %s' % ( self._format_date(create_time)) + end_color) print(line_color + ' time received | %s' % ( self._format_date(receive_time)) + end_color) print(line_color + ' last received | %s' % ( self._format_date(last_receive_time)) + end_color) print(line_color + ' latency | %sms' % latency + end_color) print(line_color + ' timeout | %ss' % timeout + end_color) if expire_time: print(line_color + ' expire time | %s' % ( self._format_date(expire_time)) + end_color) if 'details' in CONF.show: print(line_color + ' alert id | %s' % alertid + end_color) print(line_color + ' last recv id | %s' % last_receive_id + end_color) print(line_color + ' environment | %s' % (','.join(environment)) + end_color) print(line_color + ' service | %s' % (','.join(service)) + end_color) print(line_color + ' resource | %s' % resource + end_color) print(line_color + ' type | %s' % event_type + end_color) print(line_color + ' repeat | %s' % repeat + end_color) print(line_color + ' origin | %s' % origin + end_color) print(line_color + ' more info | %s' % more_info + end_color) print(line_color + ' threshold | %s' % threshold_info + end_color) print(line_color + ' correlate | %s' % (','.join(correlate)) + end_color) print(line_color + ' graphs | %s' % (','.join(graph_urls)) + end_color) if 'tags' in CONF.show and tags: if isinstance(tags, list): self.tag_is_key_value = False for tag in enumerate(tags): print(line_color + ' tag %6s | %s' % tag + end_color) else: for tag in tags.items(): print(line_color + ' tag %6s | %s' % tag + end_color) if 'raw' in CONF.show and raw_data: print(line_color + ' | %s' % raw_data + end_color) if 'history' in CONF.show: for hist in alert['history']: if 'event' in hist: alertid = hist['id'] create_time = datetime.datetime.strptime(hist['createTime'], '%Y-%m-%dT%H:%M:%S.%fZ') create_time = create_time.replace(tzinfo=pytz.utc) event = hist['event'] receive_time = datetime.datetime.strptime(hist['receiveTime'], '%Y-%m-%dT%H:%M:%S.%fZ') receive_time = receive_time.replace(tzinfo=pytz.utc) value = hist['value'] text = hist['text'] print(line_color + ' %s|%s|%s|%-18s|%12s|%16s|%12s' % (alertid[0:8], self._format_date(receive_time), severity_code._ABBREV_SEVERITY_MAP[hist['severity']], resource, group, event, value) + end_color) print(line_color + ' |%s' % (text.encode('utf-8')) + end_color) if 'status' in hist: update_time = datetime.datetime.strptime(hist['updateTime'], '%Y-%m-%dT%H:%M:%S.%fZ') update_time = update_time.replace(tzinfo=pytz.utc) print(line_color + ' %s|%-8s %s' % ( self._format_date(update_time), hist['status'], hist['text']) + end_color) if CONF.watch: try: time.sleep(CONF.interval) except (KeyboardInterrupt, SystemExit): sys.exit(0) query['from-date'] = response['alerts']['lastTime'] else: break
class DynectDaemon(Daemon): dynect_opts = { 'dynect_customer': '', 'dynect_username': '', 'dynect_password': '', } def __init__(self, prog, **kwargs): config.register_opts(DynectDaemon.dynect_opts) Daemon.__init__(self, prog, kwargs) self.info = {} self.last_info = {} self.updating = False self.dedup = DeDup(threshold=10) def run(self): self.running = True self.api = ApiClient() while not self.shuttingdown: try: self.queryDynect() if self.updating: self.alertDynect() self.last_info = self.info LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) self.api.send(heartbeat) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True self.running = False def alertDynect(self): for resource in self.info: if resource not in self.last_info: continue if resource.startswith('gslb-'): # gslb status = ok | unk | trouble | failover text = 'GSLB status is %s.' % self.info[resource]['status'] if self.info[resource]['status'] == 'ok': event = 'GslbOK' severity = severity_code.NORMAL else: event = 'GslbNotOK' severity = severity_code.CRITICAL correlate = ['GslbOK', 'GslbNotOK'] elif resource.startswith('pool-'): # pool status = up | unk | down # pool serve_mode = obey | always | remove | no # pool weight (1-15) if 'down' in self.info[resource]['status']: event = 'PoolDown' severity = severity_code.MAJOR text = 'Pool is down' elif 'obey' not in self.info[resource]['status']: event = 'PoolServe' severity = severity_code.MAJOR text = 'Pool with an incorrect serve mode' elif self.check_weight(self.info[resource]['gslb'], resource) is False: event = 'PoolWeightError' severity = severity_code.MINOR text = 'Pool with an incorrect weight' else: event = 'PoolUp' severity = severity_code.NORMAL text = 'Pool status is normal' correlate = ['PoolUp', 'PoolDown', 'PoolServe', 'PoolWeightError'] else: LOG.warning('Unknown resource type: %s', resource) continue # Defaults group = 'GSLB' value = self.info[resource]['status'] environment = 'PROD' service = ['Network'] tags = list() timeout = None raw_data = self.info[resource]['rawData'] dynectAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='serviceAlert', tags=tags, timeout=timeout, raw_data=raw_data, ) suppress = Transformers.normalise_alert(dynectAlert) if suppress: LOG.info('Suppressing %s alert', dynectAlert.event) LOG.debug('%s', dynectAlert) continue if self.dedup.is_send(dynectAlert): self.api.send(dynectAlert) def check_weight(self, parent, resource): weight = self.info[resource]['status'].split(':')[2] for pool in [resource for resource in self.info if resource.startswith('pool') and self.info[resource]['gslb'] == parent]: if self.info[pool]['status'].split(':')[1] == 'no': LOG.warning('Skipping %s because not serving for pool %s', pool, self.info[pool]['status']) continue LOG.debug('pool %s weight %s <=> %s', pool, self.info[pool]['status'].split(':')[2], weight) if self.info[pool]['status'].split(':')[2] != weight: return False return True def queryDynect(self): LOG.info('Query DynECT to get the state of GSLBs') try: rest_iface = DynectRest() if CONF.debug and CONF.use_stderr: rest_iface.verbose = True # login credentials = { 'customer_name': CONF.dynect_customer, 'user_name': CONF.dynect_username, 'password': CONF.dynect_password, } LOG.debug('credentials = %s', credentials) response = rest_iface.execute('/Session/', 'POST', credentials) if response['status'] != 'success': LOG.error('Failed to create API session: %s', response['msgs'][0]['INFO']) self.updating = False return # Discover all the Zones in DynECT response = rest_iface.execute('/Zone/', 'GET') LOG.debug('/Zone/ => %s', json.dumps(response, indent=4)) zone_resources = response['data'] # Discover all the LoadBalancers for resource in zone_resources: zone = resource.split('/')[3] # eg. /REST/Zone/guardiannews.com/ response = rest_iface.execute('/LoadBalance/' + zone + '/', 'GET') LOG.debug('/LoadBalance/%s/ => %s', zone, json.dumps(response, indent=4)) gslb = response['data'] # Discover LoadBalancer pool information. for lb in gslb: fqdn = lb.split('/')[4] # eg. /REST/LoadBalance/guardiannews.com/id.guardiannews.com/ response = rest_iface.execute('/LoadBalance/' + zone + '/' + fqdn + '/', 'GET') LOG.debug('/LoadBalance/%s/%s/ => %s', zone, fqdn, json.dumps(response, indent=4)) status = response['data']['status'] monitor = response['data']['monitor'] self.info['gslb-' + fqdn] = {'status': status, 'gslb': fqdn, 'rawData': monitor} for pool in response['data']['pool']: name = '%s-%s' % (fqdn, pool['label'].replace(' ', '-')) status = '%s:%s:%s' % (pool['status'], pool['serve_mode'], pool['weight']) self.info['pool-' + name] = {'status': status, 'gslb': fqdn, 'rawData': pool} LOG.info('Finished object discovery query.') LOG.debug('GSLBs and Pools: %s', json.dumps(self.info, indent=4)) # logout rest_iface.execute('/Session/', 'DELETE') except Exception, e: LOG.error('Failed to discover GSLBs: %s', e) self.updating = False self.updating = True
class SnmpTrapHandler(object): def __init__(self, prog, disable_flag=None): self.prog = prog self.disable_flag = disable_flag or CONF.disable_flag def start(self): LOG.info('Starting %s...' % self.prog) self.skip_on_disable() self.run() def skip_on_disable(self): if os.path.isfile(self.disable_flag): LOG.warning('Disable flag %s exists. Skipping...', self.disable_flag) sys.exit(0) def run(self): self.statsd = StatsD() # graphite metrics data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: self.api.send(snmptrapAlert) self.statsd.metric_send('alert.snmptrap.alerts.total', 1) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.api.send(heartbeat) @staticmethod def parse_snmptrap(data): pdu_data = data.splitlines() varbind_list = pdu_data[:] trapvars = dict() for line in pdu_data: if line.startswith('$'): special, value = line.split(None, 1) trapvars[special] = value varbind_list.pop(0) if '$s' in trapvars: if trapvars['$s'] == '0': version = 'SNMPv1' elif trapvars['$s'] == '1': version = 'SNMPv2c' elif trapvars['$s'] == '2': version = 'SNMPv2u' # not supported else: version = 'SNMPv3' trapvars['$s'] = version # Get varbinds varbinds = dict() idx = 0 for varbind in '\n'.join(varbind_list).split('~%~'): if varbind == '': break idx += 1 try: oid, value = varbind.split(None, 1) except ValueError: oid = varbind value = '' varbinds[oid] = value trapvars['$' + str(idx)] = value # $n LOG.debug('$%s %s', str(idx), value) trapvars['$q'] = trapvars['$q'].lstrip('.') # if numeric, remove leading '.' trapvars['$#'] = str(idx) LOG.debug('varbinds = %s', varbinds) LOG.debug('version = %s', version) correlate = list() if version == 'SNMPv1': if trapvars['$w'] == '0': trapvars['$O'] = 'coldStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '1': trapvars['$O'] = 'warmStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '2': trapvars['$O'] = 'linkDown' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '3': trapvars['$O'] = 'linkUp' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '4': trapvars['$O'] = 'authenticationFailure' elif trapvars['$w'] == '5': trapvars['$O'] = 'egpNeighborLoss' elif trapvars['$w'] == '6': # enterpriseSpecific(6) if trapvars['$q'].isdigit(): # XXX - specific trap number was not decoded trapvars['$O'] = '%s.0.%s' % (trapvars['$N'], trapvars['$q']) else: trapvars['$O'] = trapvars['$q'] elif version == 'SNMPv2c': if 'coldStart' in trapvars['$2']: trapvars['$w'] = '0' trapvars['$W'] = 'Cold Start' elif 'warmStart' in trapvars['$2']: trapvars['$w'] = '1' trapvars['$W'] = 'Warm Start' elif 'linkDown' in trapvars['$2']: trapvars['$w'] = '2' trapvars['$W'] = 'Link Down' elif 'linkUp' in trapvars['$2']: trapvars['$w'] = '3' trapvars['$W'] = 'Link Up' elif 'authenticationFailure' in trapvars['$2']: trapvars['$w'] = '4' trapvars['$W'] = 'Authentication Failure' elif 'egpNeighborLoss' in trapvars['$2']: trapvars['$w'] = '5' trapvars['$W'] = 'EGP Neighbor Loss' else: trapvars['$w'] = '6' trapvars['$W'] = 'Enterprise Specific' trapvars['$O'] = trapvars['$2'] # SNMPv2-MIB::snmpTrapOID.0 LOG.debug('trapvars = %s', trapvars) LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'], trapvars['$B'], trapvars['$x'], trapvars['$X']) if trapvars['$B'] != '<UNKNOWN>': resource = trapvars['$B'] elif trapvars['$A'] != '0.0.0.0': resource = trapvars['$A'] else: m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b']) if m: resource = m.group(1) else: resource = '<NONE>' # Defaults event = trapvars['$O'] severity = severity_code.NORMAL group = 'SNMP' value = trapvars['$w'] text = trapvars['$W'] environment = ['INFRA'] service = ['Network'] tags = {'Version': version} timeout = None threshold_info = None summary = None create_time = datetime.datetime.strptime('%sT%s.000Z' % (trapvars['$x'], trapvars['$X']), '%Y-%m-%dT%H:%M:%S.%fZ') snmptrapAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='snmptrapAlert', tags=tags, timeout=timeout, threshold_info=threshold_info, summary=summary, create_time=create_time, raw_data=data, ) suppress = snmptrapAlert.transform_alert(trapoid=trapvars['$O'], trapvars=trapvars, varbinds=varbinds) if suppress: LOG.info('Suppressing %s SNMP trap', snmptrapAlert.event) LOG.debug('%s', snmptrapAlert) return snmptrapAlert.translate_alert(trapvars) if snmptrapAlert.get_type() == 'Heartbeat': snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin, version='n/a', timeout=snmptrapAlert.timeout) return snmptrapAlert