def run(self): while True: LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE) try: notification = self.sqs.read(wait_time_seconds=20) except boto.exception.SQSError as e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) continue if notification: cloudwatchAlert = self.parse_notification(notification) try: self.api.send(cloudwatchAlert) except Exception as e: LOG.warning('Failed to send alert: %s', e) self.sqs.delete_message(notification) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception as e: LOG.warning('Failed to send heartbeat: %s', e)
class SnmpTrapHandler(object): def __init__(self): self.api = None def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def main(): listener = Listener() while True: listener.send_cmd('READY\n') data = sys.stdin.readline() headers = dict([x.split(':') for x in data.split()]) data = sys.stdin.read(int(headers['len'])) body = dict([x.split(':') for x in data.split()]) event = headers['eventname'] if event.startswith('TICK'): supervisorAlert = Heartbeat(origin='supervisord', tags=[headers['ver'], event]) else: if event.endswith('FATAL'): severity = 'critical' elif event.endswith('BACKOFF'): severity = 'warning' elif event.endswith('EXITED'): severity = 'minor' else: severity = 'normal' supervisorAlert = Alert( resource=body['processname'], environment='Production', service=['supervisord'], event=event, correlate=[ 'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING', 'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING', 'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED', 'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN' ], value='serial=%s' % headers['serial'], severity=severity, origin=headers['server'], text='State changed from %s to %s.' % (body['from_state'], event), raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body))) try: listener.api.send(supervisorAlert) except Exception as e: listener.log_stderr(e) listener.send_cmd('RESULT 4\nFAIL') else: listener.send_cmd('RESULT 2\nOK')
def create_heartbeat(): try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError as e: return jsonify(status="error", message=str(e)), 400 try: heartbeat = db.save_heartbeat(heartbeat) except Exception as e: return jsonify(status="error", message=str(e)), 500 body = heartbeat.get_body() body['href'] = "%s/%s" % (request.base_url, heartbeat.id) return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': '%s/%s' % (request.base_url, heartbeat.id)}
def create_heartbeat(): try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError as e: return jsonify(status="error", message=str(e)), 400 try: heartbeat = db.save_heartbeat(heartbeat) except Exception as e: return jsonify(status="error", message=str(e)), 500 body = heartbeat.get_body() body['href'] = "%s/%s" % (request.base_url, heartbeat.id) return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, { 'Location': '%s/%s' % (request.base_url, heartbeat.id) }
def create_heartbeat(): try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError as e: return jsonify(status="error", message=str(e)), 400 if g.get('role', None) != 'admin': heartbeat.customer = g.get('customer', None) try: heartbeat = db.save_heartbeat(heartbeat) except Exception as e: return jsonify(status="error", message=str(e)), 500 body = heartbeat.get_body() body['href'] = absolute_url('/heartbeat/' + heartbeat.id) return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': body['href']}
def create_heartbeat(): try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError as e: return jsonify(status="error", message=str(e)), 400 if g.get('role', None) != 'admin': heartbeat.customer = g.get('customer', None) try: heartbeat = db.save_heartbeat(heartbeat) except Exception as e: return jsonify(status="error", message=str(e)), 500 body = heartbeat.get_body() body['href'] = absolute_url('/heartbeat/' + heartbeat.id) return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': body['href']}
class UrlmonDaemon(object): def __init__(self): self.shuttingdown = False def run(self): self.running = True self.queue = Queue.Queue() self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY) # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREADS) for i in range(SERVER_THREADS): w = WorkerThread(self.queue, self.api) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: for check in settings.checks: self.queue.put((check, time.time())) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(LOOP_EVERY) LOG.info('URL check queue length is %d', self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True
def heartbeat(self, args): try: heartbeat = Heartbeat(origin=args.origin, tags=args.tags, timeout=args.timeout) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(heartbeat) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['id']) else: LOG.error(response['message']) sys.exit(1)
def create_heartbeat(tenant): tenant = generateDBName(tenant) try: heartbeat = Heartbeat.parse_heartbeat(request.data) except ValueError as e: return jsonify(status="error", message=str(e)), 400 if g.get('role', None) != 'admin': heartbeat.customer = g.get('customer', None) try: heartbeat = db.save_heartbeat(tenant, heartbeat) except Exception as e: return jsonify(status="error", message=str(e)), 500 body = heartbeat.get_body() body['href'] = "%s/%s" % (request.base_url, heartbeat.id) return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': '%s/%s' % (request.base_url, heartbeat.id)}
def parse_snmptrap(data): pdu_data = data.splitlines() varbind_list = pdu_data[:] trapvars = dict() for line in pdu_data: if line.startswith('$'): special, value = line.split(None, 1) trapvars[special] = value varbind_list.pop(0) if '$s' in trapvars: if trapvars['$s'] == '0': version = 'SNMPv1' elif trapvars['$s'] == '1': version = 'SNMPv2c' elif trapvars['$s'] == '2': version = 'SNMPv2u' # not supported else: version = 'SNMPv3' trapvars['$s'] = version else: LOG.warning('Failed to parse unknown trap type.') return # Get varbinds varbinds = dict() idx = 0 for varbind in '\n'.join(varbind_list).split('~%~'): if varbind == '': break idx += 1 try: oid, value = varbind.split(None, 1) except ValueError: oid = varbind value = '' varbinds[oid] = value trapvars['$' + str(idx)] = value # $n LOG.debug('$%s %s', str(idx), value) trapvars['$q'] = trapvars['$q'].lstrip( '.') # if numeric, remove leading '.' trapvars['$#'] = str(idx) LOG.debug('varbinds = %s', varbinds) LOG.debug('version = %s', version) correlate = list() if version == 'SNMPv1': if trapvars['$w'] == '0': trapvars['$O'] = 'coldStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '1': trapvars['$O'] = 'warmStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '2': trapvars['$O'] = 'linkDown' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '3': trapvars['$O'] = 'linkUp' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '4': trapvars['$O'] = 'authenticationFailure' elif trapvars['$w'] == '5': trapvars['$O'] = 'egpNeighborLoss' elif trapvars['$w'] == '6': # enterpriseSpecific(6) if trapvars['$q'].isdigit( ): # XXX - specific trap number was not decoded trapvars['$O'] = '%s.0.%s' % (trapvars['$N'], trapvars['$q']) else: trapvars['$O'] = trapvars['$q'] elif version == 'SNMPv2c': if 'coldStart' in trapvars['$2']: trapvars['$w'] = '0' trapvars['$W'] = 'Cold Start' elif 'warmStart' in trapvars['$2']: trapvars['$w'] = '1' trapvars['$W'] = 'Warm Start' elif 'linkDown' in trapvars['$2']: trapvars['$w'] = '2' trapvars['$W'] = 'Link Down' elif 'linkUp' in trapvars['$2']: trapvars['$w'] = '3' trapvars['$W'] = 'Link Up' elif 'authenticationFailure' in trapvars['$2']: trapvars['$w'] = '4' trapvars['$W'] = 'Authentication Failure' elif 'egpNeighborLoss' in trapvars['$2']: trapvars['$w'] = '5' trapvars['$W'] = 'EGP Neighbor Loss' else: trapvars['$w'] = '6' trapvars['$W'] = 'Enterprise Specific' trapvars['$O'] = trapvars['$2'] # SNMPv2-MIB::snmpTrapOID.0 LOG.debug('trapvars = %s', trapvars) LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'], trapvars['$B'], trapvars['$x'], trapvars['$X']) if trapvars['$B'] != '<UNKNOWN>': resource = trapvars['$B'] elif trapvars['$A'] != '0.0.0.0': resource = trapvars['$A'] else: m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b']) if m: resource = m.group(1) else: resource = '<NONE>' # Defaults event = trapvars['$O'] severity = 'normal' group = 'SNMP' value = trapvars['$w'] text = trapvars['$W'] environment = 'Production' service = ['Network'] attributes = {'source': trapvars['$B']} tags = [version] timeout = None create_time = datetime.datetime.strptime( '%sT%s.000Z' % (trapvars['$x'], trapvars['$X']), '%Y-%m-%dT%H:%M:%S.%fZ') snmptrapAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='snmptrapAlert', attributes=attributes, tags=tags, timeout=timeout, create_time=create_time, raw_data=data, ) SnmpTrapHandler.translate_alert(snmptrapAlert, trapvars) if snmptrapAlert.get_type() == 'Heartbeat': snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin, tags=[__version__], timeout=snmptrapAlert.timeout) return snmptrapAlert
def parse_syslog(self, addr, data): LOG.debug('Parsing syslog message...') syslogAlerts = list() event = None resource = None for msg in data.split('\n'): # NOTE: if syslog msgs aren't being split on newlines and #012 appears instead then # try adding "$EscapeControlCharactersOnReceive off" to rsyslog.conf if not msg or 'last message repeated' in msg: continue if re.match('<\d+>1', msg): # Parse RFC 5424 compliant message m = re.match(r'<(\d+)>1 (\S+) (\S+) (\S+) (\S+) (\S+) (.*)', msg) if m: PRI = int(m.group(1)) ISOTIMESTAMP = m.group(2) HOSTNAME = m.group(3) APPNAME = m.group(4) PROCID = m.group(5) MSGID = m.group(6) TAG = '%s[%s] %s' % (APPNAME, PROCID, MSGID) MSG = m.group(7) LOG.info("Parsed RFC 5424 message OK") else: LOG.error("Could not parse RFC 5424 syslog message: %s", msg) continue elif re.match(r'<(\d{1,3})>\S{3}\s', msg): # Parse RFC 3164 compliant message m = re.match(r'<(\d{1,3})>\S{3}\s{1,2}\d?\d \d{2}:\d{2}:\d{2} (\S+)( (\S+):)? (.*)', msg) if m: PRI = int(m.group(1)) HOSTNAME = m.group(2) TAG = m.group(4) MSG = m.group(5) LOG.info("Parsed RFC 3164 message OK") else: LOG.error("Could not parse RFC 3164 syslog message: %s", msg) continue elif re.match('<\d+>.*%[A-Z0-9_-]+', msg): # Parse Cisco Syslog message m = re.match('<(\d+)>.*(%([A-Z0-9_-]+)):? (.*)', msg) if m: LOG.debug(m.groups()) PRI = int(m.group(1)) CISCO_SYSLOG = m.group(2) try: CISCO_FACILITY, CISCO_SEVERITY, CISCO_MNEMONIC = m.group(3).split('-') except ValueError, e: LOG.error('Could not parse Cisco syslog - %s: %s', e, m.group(3)) CISCO_FACILITY = CISCO_SEVERITY = CISCO_MNEMONIC = 'na' TAG = CISCO_MNEMONIC MSG = m.group(4) event = CISCO_SYSLOG # replace IP address with a hostname, if necessary try: socket.inet_aton(addr) (resource, _, _) = socket.gethostbyaddr(addr) except (socket.error, socket.herror): resource = addr resource = '%s:%s' % (resource, CISCO_FACILITY) else: LOG.error("Could not parse Cisco syslog message: %s", msg) continue facility, level = decode_priority(PRI) # Defaults event = event or '%s%s' % (facility.capitalize(), level.capitalize()) resource = resource or '%s%s' % (HOSTNAME, ':' + TAG if TAG else '') severity = priority_to_code(level) group = 'Syslog' value = level text = MSG environment = 'Production' service = ['Platform'] tags = ['%s.%s' % (facility, level)] correlate = list() timeout = None raw_data = msg syslogAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='syslogAlert', tags=tags, timeout=timeout, raw_data=raw_data, ) suppress = False try: suppress = Transformers.normalise_alert(syslogAlert, facility=facility, level=level) except RuntimeWarning: pass if suppress: LOG.info('Suppressing %s.%s alert', facility, level) LOG.debug('%s', syslogAlert) continue if syslogAlert.get_type() == 'Heartbeat': syslogAlert = Heartbeat(origin=syslogAlert.origin, timeout=syslogAlert.timeout) syslogAlerts.append(syslogAlert)
data = client.recv(4096) data = unicode(data, 'utf-8', errors='ignore') client.close() LOG.debug('Syslog TCP data received from %s: %s', addr, data) syslogAlerts = self.parse_syslog(addr[0], data) for syslogAlert in syslogAlerts: try: self.api.send(syslogAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) count += 1 if not ip or count % 5 == 0: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True LOG.info('Shutdown request received...') def parse_syslog(self, addr, data): LOG.debug('Parsing syslog message...') syslogAlerts = list()