def on_message(self, headers, body): if 'type' not in headers or 'correlation-id' not in headers: LOG.warning( 'Malformed header missing "type" or "correlation-id": %s', headers) self.statsd.metric_send('alerta.alerts.rejected', 1) return LOG.info("Received %s %s", headers['type'], headers['correlation-id']) LOG.debug("Received body : %s", body) if headers['type'] == 'Heartbeat': heartbeat = Heartbeat.parse_heartbeat(body) if heartbeat: heartbeat.receive_now() LOG.debug('Queueing successfully parsed heartbeat %s', heartbeat.get_body()) self.queue.put(heartbeat) else: try: alert = Alert.parse_alert(body) except ValueError: self.statsd.metric_send('alerta.alerts.rejected', 1) return if alert: alert.receive_now() LOG.debug('Queueing successfully parsed alert %s', alert.get_body()) self.queue.put(alert)
def run(self): self.running = True # Connect to message queue self.mq = Messaging() self.mq.connect(callback=GangliaMessage(self.mq)) while not self.shuttingdown: try: rules = init_rules() # re-read rule config each time self.metric_check(rules) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect()
def run(self): self.running = True # Connect to message queue self.mq = Messaging() self.mq.connect(callback=DynectMessage(self.mq)) while not self.shuttingdown: try: self.queryDynect() if self.updating: self.alertDynect() self.last_info = self.info LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True self.running = False
def on_message(self, headers, body): if 'type' not in headers or 'correlation-id' not in headers: LOG.warning('Malformed header missing "type" or "correlation-id": %s', headers) self.statsd.metric_send('alerta.alerts.rejected', 1) return LOG.info("Received %s %s", headers['type'], headers['correlation-id']) LOG.debug("Received body : %s", body) if headers['type'] == 'Heartbeat': heartbeat = Heartbeat.parse_heartbeat(body) if heartbeat: heartbeat.receive_now() LOG.debug('Queueing successfully parsed heartbeat %s', heartbeat.get_body()) self.queue.put(heartbeat) else: try: alert = Alert.parse_alert(body) except ValueError: self.statsd.metric_send('alerta.alerts.rejected', 1) return if alert: alert.receive_now() LOG.debug('Queueing successfully parsed alert %s', alert.get_body()) self.queue.put(alert)
def run(self): self.running = True # Connect to message queue self.mq = Messaging() self.mq.connect(callback=LoggerMessage(self.mq)) self.mq.subscribe(destination=CONF.outbound_queue) while not self.shuttingdown: try: LOG.debug('Waiting for log messages...') time.sleep(CONF.loop_every) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect()
def create_heartbeat(): # Create a new heartbeat try: heartbeat = Heartbeat.parse_heartbeat(request.data) except Exception, e: return jsonify(response={"status": "error", "message": str(e)})
def run(self): onhold = dict() # Start token bucket thread tokens = LeakyBucket(tokens=20, rate=30) tokens.start() mailer = MailerMessage(onhold, tokens) mailer.start() sender = MailSender(onhold, tokens) sender.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): mailer.should_stop = True
def main(self): if CONF.heartbeat: vtag = ''.join(CONF.tags) if CONF.tags else None heartbeat = Heartbeat( origin=CONF.origin, version=vtag or Version, timeout=CONF.timeout ) LOG.debug(repr(heartbeat)) api = ApiClient() api.send(heartbeat) return heartbeat.get_id() else: exceptionAlert = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=CONF.value, status=CONF.status, severity=CONF.severity, environment=CONF.environment, service=CONF.service, text=CONF.text, event_type=CONF.event_type, tags=CONF.tags, origin=CONF.origin, threshold_info='n/a', # TODO(nsatterl): make this configurable? summary=CONF.summary, timeout=CONF.timeout, raw_data='n/a', # TODO(nsatterl): make this configurable? more_info=CONF.more_info, graph_urls=CONF.graph_urls, ) LOG.debug(repr(exceptionAlert)) api = ApiClient() api.send(exceptionAlert) return exceptionAlert.get_id()
def run(self): self.running = True # Start token bucket thread self.tokens = LeakyBucket(tokens=20, rate=30) self.tokens.start() self.onhold = dict() # Connect to message queue self.mq = Messaging() self.mq.connect( callback=MailerMessage(self.mq, self.onhold, self.tokens)) self.mq.subscribe(destination=CONF.outbound_topic) while not self.shuttingdown: try: LOG.debug('Send email messages...') for alertid in self.onhold.keys(): try: (mailAlert, hold_time) = self.onhold[alertid] except KeyError: continue if time.time() > hold_time: if not self.tokens.get_token(): LOG.warning( '%s : No tokens left, rate limiting this alert', alertid) continue email = Mailer(mailAlert) mail_to = CONF.mail_list.split(',') if 'mailto' in mailAlert.tags: mail_to.append(mailAlert.tags['mailto']) email.send(mail_to=mail_to) try: del self.onhold[alertid] except KeyError: continue time.sleep(CONF.loop_every) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False self.tokens.shutdown() LOG.info('Disconnecting from message broker...') self.mq.disconnect()
def main(self): if CONF.heartbeat: heartbeat = Heartbeat(origin=CONF.origin, version=CONF.tags.get('Version', Version), timeout=CONF.timeout) LOG.debug(heartbeat) api = ApiClient() api.send(heartbeat) return heartbeat.get_id() else: exceptionAlert = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=CONF.value, status=CONF.status, severity=CONF.severity, environment=CONF.environment, service=CONF.service, text=CONF.text, event_type=CONF.event_type, tags=CONF.tags, origin=CONF.origin, threshold_info='n/a', # TODO(nsatterl): make this configurable? summary=CONF.summary, timeout=CONF.timeout, raw_data='n/a', # TODO(nsatterl): make this configurable? more_info=CONF.more_info, graph_urls=CONF.graph_urls, ) LOG.debug(repr(exceptionAlert)) api = ApiClient() api.send(exceptionAlert) return exceptionAlert.get_id()
def run(self): self.running = True # Initialiase alert config init_config() # Start token bucket thread _TokenThread = TokenTopUp() _TokenThread.start() # Start notify thread _NotifyThread = ReleaseThread() _NotifyThread.start() # Connect to message queue self.mq = Messaging() self.mq.connect(callback=NotifyMessage(self.mq)) self.mq.subscribe(destination=CONF.outbound_topic) while not self.shuttingdown: try: # Read (or re-read) config as necessary if os.path.getmtime(CONF.yaml_config) != config_mod_time: init_config() config_mod_time = os.path.getmtime(CONF.yaml_config) LOG.debug('Waiting for email messages...') time.sleep(CONF.loop_every) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True _TokenThread.shutdown() _NotifyThread.shutdown() LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect()
def run(self): pd = PagerDutyMessage() pd.start() self.api = ApiClient() try: while True: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): pd.should_stop = True
def run(self): self.statsd = StatsD() # graphite metrics data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) self.api = ApiClient() if snmptrapAlert: self.api.send(snmptrapAlert) self.statsd.metric_send('alert.snmptrap.alerts.total', 1) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.api.send(heartbeat)
def on_message(self, headers, body): LOG.info("Received %s %s", headers['type'], headers['correlation-id']) LOG.debug("Received body : %s", body) if headers['type'] == 'Heartbeat': heartbeat = Heartbeat.parse_heartbeat(body) if heartbeat: heartbeat.receive_now() LOG.debug('Queueing successfully parsed heartbeat %s', heartbeat.get_body()) self.queue.put(heartbeat) elif headers['type'].endswith('Alert'): try: alert = Alert.parse_alert(body) except ValueError: self.statsd.metric_send('alerta.alerts.rejected', 1) return if alert: alert.receive_now() LOG.debug('Queueing successfully parsed alert %s', alert.get_body()) self.queue.put(alert)
def parse_snmptrap(data): pdu_data = data.splitlines() varbind_list = pdu_data[:] trapvars = dict() for line in pdu_data: if line.startswith('$'): special, value = line.split(None, 1) trapvars[special] = value varbind_list.pop(0) if '$s' in trapvars: if trapvars['$s'] == '0': version = 'SNMPv1' elif trapvars['$s'] == '1': version = 'SNMPv2c' elif trapvars['$s'] == '2': version = 'SNMPv2u' # not supported else: version = 'SNMPv3' trapvars['$s'] = version # Get varbinds varbinds = dict() idx = 0 for varbind in '\n'.join(varbind_list).split('~%~'): if varbind == '': break idx += 1 try: oid, value = varbind.split(None, 1) except ValueError: oid = varbind value = '' varbinds[oid] = value trapvars['$' + str(idx)] = value # $n LOG.debug('$%s %s', str(idx), value) trapvars['$q'] = trapvars['$q'].lstrip('.') # if numeric, remove leading '.' trapvars['$#'] = str(idx) LOG.debug('varbinds = %s', varbinds) LOG.debug('version = %s', version) correlate = list() if version == 'SNMPv1': if trapvars['$w'] == '0': trapvars['$O'] = 'coldStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '1': trapvars['$O'] = 'warmStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '2': trapvars['$O'] = 'linkDown' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '3': trapvars['$O'] = 'linkUp' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '4': trapvars['$O'] = 'authenticationFailure' elif trapvars['$w'] == '5': trapvars['$O'] = 'egpNeighborLoss' elif trapvars['$w'] == '6': # enterpriseSpecific(6) if trapvars['$q'].isdigit(): # XXX - specific trap number was not decoded trapvars['$O'] = '%s.0.%s' % (trapvars['$N'], trapvars['$q']) else: trapvars['$O'] = trapvars['$q'] elif version == 'SNMPv2c': if 'coldStart' in trapvars['$2']: trapvars['$w'] = '0' trapvars['$W'] = 'Cold Start' elif 'warmStart' in trapvars['$2']: trapvars['$w'] = '1' trapvars['$W'] = 'Warm Start' elif 'linkDown' in trapvars['$2']: trapvars['$w'] = '2' trapvars['$W'] = 'Link Down' elif 'linkUp' in trapvars['$2']: trapvars['$w'] = '3' trapvars['$W'] = 'Link Up' elif 'authenticationFailure' in trapvars['$2']: trapvars['$w'] = '4' trapvars['$W'] = 'Authentication Failure' elif 'egpNeighborLoss' in trapvars['$2']: trapvars['$w'] = '5' trapvars['$W'] = 'EGP Neighbor Loss' else: trapvars['$w'] = '6' trapvars['$W'] = 'Enterprise Specific' trapvars['$O'] = trapvars['$2'] # SNMPv2-MIB::snmpTrapOID.0 LOG.debug('trapvars = %s', trapvars) LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'], trapvars['$B'], trapvars['$x'], trapvars['$X']) if trapvars['$B'] != '<UNKNOWN>': resource = trapvars['$B'] elif trapvars['$A'] != '0.0.0.0': resource = trapvars['$A'] else: m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b']) if m: resource = m.group(1) else: resource = '<NONE>' # Defaults event = trapvars['$O'] severity = severity_code.NORMAL group = 'SNMP' value = trapvars['$w'] text = trapvars['$W'] environment = ['INFRA'] service = ['Network'] tags = {'Version': version} timeout = None threshold_info = None summary = None create_time = datetime.datetime.strptime('%sT%s.000Z' % (trapvars['$x'], trapvars['$X']), '%Y-%m-%dT%H:%M:%S.%fZ') snmptrapAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='snmptrapAlert', tags=tags, timeout=timeout, threshold_info=threshold_info, summary=summary, create_time=create_time, raw_data=data, ) suppress = snmptrapAlert.transform_alert(trapoid=trapvars['$O'], trapvars=trapvars, varbinds=varbinds) if suppress: LOG.info('Suppressing %s SNMP trap', snmptrapAlert.event) LOG.debug('%s', snmptrapAlert) return snmptrapAlert.translate_alert(trapvars) if snmptrapAlert.get_type() == 'Heartbeat': snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin, version='n/a', timeout=snmptrapAlert.timeout) return snmptrapAlert
return jsonify(application="alerta", time=int(time.time() * 1000), heartbeats=heartbeats) @app.route('/alerta/api/v2/heartbeats/heartbeat.json', methods=['POST']) @jsonp def create_heartbeat(): # Create a new heartbeat try: data = json.loads(request.data) except Exception, e: return jsonify(response={"status": "error", "message": str(e)}) heartbeat = Heartbeat( origin=data.get('origin', None), version=data.get('version', None), heartbeatid=data.get('id', None), timeout=data.get('timeout', None), ) LOG.debug('New heartbeat %s', heartbeat) mq.send(heartbeat) if heartbeat: return jsonify(response={"status": "ok", "id": heartbeat.get_id()}) else: return jsonify(response={"status": "error", "message": "something went wrong"}) @app.route('/alerta/widgets/v2/severity') def severity_widget(): label = request.args.get('label', 'Alert Severity')
def main(self): if CONF.heartbeat: msg = Heartbeat(version=Version) else: # Run Nagios plugin check args = shlex.split( os.path.join(CONF.nagios_plugins, CONF.nagios_cmd)) LOG.info('Running %s', ' '.join(args)) try: check = subprocess.Popen(args, stdout=subprocess.PIPE) except Exception, e: LOG.error('Nagios check did not execute: %s', e) sys.exit(1) stdout = check.communicate()[0] rc = check.returncode LOG.debug('Nagios plugin %s => %s (rc=%d)', CONF.nagios_cmd, stdout, rc) if rc == 0: severity = severity_code.NORMAL elif rc == 1: severity = severity_code.WARNING elif rc == 2: severity = severity_code.CRITICAL elif rc == 3: severity = severity_code.UNKNOWN else: rc = -1 severity = severity_code.INDETERMINATE # Parse Nagios plugin check output text = '' long_text = '' perf_data = '' extra_perf_data = False for num, line in enumerate(stdout.split('\n'), start=1): if num == 1: if '|' in line: text = line.split('|')[0].rstrip(' ') perf_data = line.split('|')[1] value = perf_data.split(';')[0].lstrip(' ') else: text = line value = 'rc=%s' % rc else: if '|' in line: long_text += line.split('|')[0] perf_data += line.split('|')[1] extra_perf_data = True elif extra_perf_data is False: long_text += line else: perf_data += line LOG.debug('Short Output: %s', text) LOG.debug('Long Output: %s', long_text) LOG.debug('Perf Data: %s', perf_data) graph_urls = None msg = Alert( resource=CONF.resource, event=CONF.event, correlate=CONF.correlate, group=CONF.group, value=value, severity=severity, environment=CONF.environment, service=CONF.service, text=text + ' ' + long_text, event_type='nagiosAlert', tags=CONF.tags, threshold_info=CONF.nagios_cmd, timeout=CONF.timeout, raw_data=stdout, more_info=perf_data, graph_urls=graph_urls, )
class PingerDaemon(Daemon): pinger_opts = { 'ping_file': '/etc/alerta/alert-pinger.targets', 'ping_max_timeout': 15, # seconds 'ping_max_retries': 2, 'ping_slow_warning': 5, # ms 'ping_slow_critical': 10, # ms 'server_threads': 20, } def __init__(self, prog, **kwargs): config.register_opts(PingerDaemon.pinger_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True # Create internal queue self.queue = Queue.Queue() # Connect to message queue self.mq = Messaging() self.mq.connect(callback=PingerMessage(self.mq)) self.dedup = DeDup() self.carbon = Carbon() # graphite metrics # Initialiase ping targets ping_list = init_targets() # Start worker threads LOG.debug('Starting %s worker threads...', CONF.server_threads) for i in range(CONF.server_threads): w = WorkerThread(self.mq, self.queue, self.dedup, self.carbon) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: for p in ping_list: if 'targets' in p and p['targets']: for target in p['targets']: environment = p['environment'] service = p['service'] retries = p.get('retries', CONF.ping_max_retries) self.queue.put( (environment, service, target, retries, time.time())) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) time.sleep(CONF.loop_every) LOG.info('Ping queue length is %d', self.queue.qsize()) self.carbon.metric_send('alert.pinger.queueLength', self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False for i in range(CONF.server_threads): self.queue.put(None) w.join() LOG.info('Disconnecting from message broker...') self.mq.disconnect()
class AwsDaemon(Daemon): aws_opts = { 'fog_file': '/etc/fog/alerta.conf', 'ec2_regions': ['eu-west-1', 'us-east-1'], 'http_proxy': None, 'https_proxy': None, } def __init__(self, prog, **kwargs): config.register_opts(AwsDaemon.aws_opts) Daemon.__init__(self, prog, kwargs) self.info = {} self.last = {} self.lookup = {} self.dedup = DeDup() def run(self): self.running = True # Read in FOG config file try: self.fog = yaml.load(open(CONF.fog_file).read()) except IOError, e: LOG.error('Could not read AWS credentials file %s: %s', CONF.fog_file, e) sys.exit(1) if not self.fog: LOG.error('No AWS credentials found in FOG file %s. Exiting...', CONF.fog_file) sys.exit(1) # Connect to message queue self.mq = Messaging() self.mq.connect(callback=AwsMessage(self.mq)) if CONF.http_proxy: os.environ['http_proxy'] = CONF.http_proxy if CONF.https_proxy: os.environ['https_proxy'] = CONF.https_proxy while not self.shuttingdown: try: self.ec2_status_check() LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) LOG.debug('Waiting for next check run...') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect()
class AlertaDaemon(Daemon): alerta_opts = { 'forward_duplicate': 'no', } def __init__(self, prog, **kwargs): config.register_opts(AlertaDaemon.alerta_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True self.queue = Queue.Queue() # Create internal queue self.db = Mongo() # mongo database self.carbon = Carbon() # carbon metrics self.statsd = StatsD() # graphite metrics # Connect to message queue self.mq = Messaging() self.mq.connect( callback=ServerMessage(self.mq, self.queue, self.statsd)) self.mq.subscribe() # Start worker threads LOG.debug('Starting %s worker threads...', CONF.server_threads) for i in range(CONF.server_threads): w = WorkerThread(self.mq, self.queue, self.statsd) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version, timeout=CONF.loop_every) self.mq.send(heartbeat) time.sleep(CONF.loop_every) LOG.info('Alert processing queue length is %d', self.queue.qsize()) self.carbon.metric_send('alerta.alerts.queueLength', self.queue.qsize()) self.db.update_queue_metric(self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False for i in range(CONF.server_threads): self.queue.put(None) w.join() LOG.info('Disconnecting from message broker...') self.mq.disconnect()
class IrcbotDaemon(Daemon): ircbot_opts = { 'irc_host': 'localhost', 'irc_port': 6667, 'irc_channel': '#alerts', 'irc_user': '******', } def __init__(self, prog, **kwargs): config.register_opts(IrcbotDaemon.ircbot_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True # An IRC client may send 1 message every 2 seconds # See section 5.8 in http://datatracker.ietf.org/doc/rfc2813/ tokens = LeakyBucket(tokens=20, rate=2) tokens.start() # Connect to IRC server try: irc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) irc.connect((CONF.irc_host, CONF.irc_port)) time.sleep(1) irc.send('NICK %s\r\n' % CONF.irc_user) time.sleep(1) irc.send('USER %s 8 * : %s\r\n' % (CONF.irc_user, CONF.irc_user)) LOG.debug('USER -> %s', irc.recv(4096)) time.sleep(1) irc.send('JOIN %s\r\n' % CONF.irc_channel) LOG.debug('JOIN -> %s', irc.recv(4096)) except Exception, e: LOG.error('IRC connection error: %s', e) sys.exit(1) LOG.info('Joined IRC channel %s on %s as USER %s', CONF.irc_channel, CONF.irc_host, CONF.irc_user) # Connect to message queue self.mq = Messaging() self.mq.connect(callback=IrcbotMessage(self.mq, irc, tokens)) self.mq.subscribe(destination=CONF.outbound_topic) while not self.shuttingdown: try: LOG.debug('Waiting for IRC messages...') ip, op, rdy = select.select([irc], [], [], CONF.loop_every) if ip: for i in ip: if i == irc: data = irc.recv(4096).rstrip('\r\n') if len(data) > 0: if 'ERROR' in data: LOG.error('%s. Exiting...', data) sys.exit(1) else: LOG.debug('%s', data) else: LOG.warning('IRC server sent no data') if 'PING' in data: LOG.info('IRC PING received -> PONG ' + data.split()[1]) irc.send('PONG ' + data.split()[1] + '\r\n') elif 'ack' in data.lower(): LOG.info('Request to ACK %s by %s', data.split()[4], data.split()[0]) ack_alert(data.split()[4]) elif 'delete' in data.lower(): LOG.info('Request to DELETE %s by %s', data.split()[4], data.split()[0]) delete_alert(data.split()[4]) elif data.find('!alerta quit') != -1: irc.send('QUIT\r\n') else: LOG.warning('IRC: %s', data) else: i.recv() else: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False tokens.shutdown() LOG.info('Disconnecting from message broker...') self.mq.disconnect()
def parse_events(self, data): LOG.debug('Parsing solarwinds event data...') LOG.debug(data) try: data[0] except IndexError: return [] try: data[0].c0 except AttributeError: data = [data] solarwindsAlerts = list() for row in data: LOG.debug(row) event = row.c4.replace(" ", "") correlate = SOLAR_WINDS_CORRELATED_EVENTS.get(event, None) resource = '%s:%s' % (row.c2, row.c3.lower()) severity = SOLAR_WINDS_SEVERITY_LEVELS.get(row.c7, None) group = 'Orion' value = '%s' % row.c6 text = '%s' % row.c5 environment = ['INFRA'] service = ['Network'] tags = None timeout = None threshold_info = None summary = None raw_data = repr(row) create_time = datetime.datetime.strptime(row.c1[:-5] + 'Z', '%Y-%m-%dT%H:%M:%S.%fZ') solarwindsAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='solarwindsAlert', tags=tags, threshold_info=threshold_info, summary=summary, timeout=timeout, create_time=create_time, raw_data=raw_data, ) suppress = solarwindsAlert.transform_alert() if suppress: LOG.info('Suppressing %s alert', solarwindsAlert.event) LOG.debug('%s', solarwindsAlert) continue if solarwindsAlert.get_type() == 'Heartbeat': solarwindsAlert = Heartbeat(origin=solarwindsAlert.origin, version='n/a', timeout=solarwindsAlert.timeout) solarwindsAlerts.append(solarwindsAlert) return solarwindsAlerts
def create_heartbeat(): try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError, e: return jsonify(status="error", message=str(e))
class SolarWindsDaemon(Daemon): solarwinds_opts = { 'solarwinds_host': 'localhost', 'solarwinds_username': '******', 'solarwinds_password': '', 'solarwinds_group': 'websys', } def __init__(self, prog, **kwargs): config.register_opts(SolarWindsDaemon.solarwinds_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True while True: try: swis = SwisClient(username=CONF.solarwinds_username, password=CONF.solarwinds_password) except Exception, e: LOG.error('SolarWinds SWIS Client error: %s', e) time.sleep(30) else: break LOG.info('Polling for SolarWinds events on %s' % CONF.solarwinds_host) # Connect to message queue self.mq = Messaging() self.mq.connect(callback=SolarWindsMessage(self.mq)) self.dedup = DeDup(by_value=True) while not self.shuttingdown: try: LOG.debug('Polling SolarWinds...') send_heartbeat = True # network, interface and volume events try: events = swis.get_npm_events() except IOError: events = [] send_heartbeat = False solarwindsAlerts = self.parse_events(events) for solarwindsAlert in solarwindsAlerts: if self.dedup.is_send(solarwindsAlert): self.mq.send(solarwindsAlert) # Cisco UCS events try: events = swis.get_ucs_events() except IOError: events = [] send_heartbeat = False solarwindsAlerts = self.parse_events(events) for solarwindsAlert in solarwindsAlerts: if self.dedup.is_send(solarwindsAlert): self.mq.send(solarwindsAlert) if send_heartbeat: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) else: LOG.error('SolarWinds failure. Skipping heartbeat.') time.sleep(CONF.loop_every) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect()
class UrlmonDaemon(Daemon): urlmon_opts = { 'urlmon_file': '/etc/alerta/alert-urlmon.targets', 'urlmon_max_timeout': 15, # seconds 'urlmon_slow_warning': 2000, # ms 'urlmon_slow_critical': 5000, # ms } def __init__(self, prog, **kwargs): config.register_opts(UrlmonDaemon.urlmon_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True # Create internal queue self.queue = Queue.Queue() # Connect to message queue self.mq = Messaging() self.mq.connect(callback=UrlmonMessage(self.mq)) self.dedup = DeDup() self.carbon = Carbon() # graphite metrics # Initialiase alert rules urls = init_urls() # Start worker threads LOG.debug('Starting %s worker threads...', CONF.server_threads) for i in range(CONF.server_threads): w = WorkerThread(self.mq, self.queue, self.dedup, self.carbon) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: for url in urls: self.queue.put((url, time.time())) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) time.sleep(CONF.loop_every) LOG.info('URL check queue length is %d', self.queue.qsize()) self.carbon.metric_send('alert.urlmon.queueLength', self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False for i in range(CONF.server_threads): self.queue.put(None) w.join() LOG.info('Disconnecting from message broker...') self.mq.disconnect()
LOG.info('Waiting for CloudWatch alarms...') try: m = q.read(wait_time_seconds=20) except boto.exception.SQSError, e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) if m: message = m.get_body() cloudwatchAlert = self.parse_notification(message) if self.dedup.is_send(cloudwatchAlert): self.mq.send(cloudwatchAlert) q.delete_message(m) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(version=Version) self.mq.send(heartbeat) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') self.running = False LOG.info('Disconnecting from message broker...') self.mq.disconnect() def parse_notification(self, message): LOG.debug('Parsing CloudWatch notification message...')