def run(self): while True: LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE) try: notification = self.sqs.read(wait_time_seconds=20) except boto.exception.SQSError as e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) continue if notification: cloudwatchAlert = self.parse_notification(notification) try: self.api.send(cloudwatchAlert) except Exception as e: LOG.warning('Failed to send alert: %s', e) self.sqs.delete_message(notification) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception as e: LOG.warning('Failed to send heartbeat: %s', e)
def run(self): api = ApiClient(endpoint=OPTIONS['endpoint'], key=OPTIONS['key']) keep_alive = 0 while not self.should_stop: for alertid in on_hold.keys(): try: (alert, hold_time) = on_hold[alertid] except KeyError: continue if time.time() > hold_time: self.send_email(alert) try: del on_hold[alertid] except KeyError: continue if keep_alive >= 10: tag = OPTIONS['smtp_host'] or 'alerta-mailer' try: api.send(Heartbeat(tags=[tag])) except Exception as e: time.sleep(5) continue keep_alive = 0 keep_alive += 1 time.sleep(2)
class SnmpTrapHandler(object): def __init__(self): self.api = None def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def createheartbeat(): hb = Heartbeat(origin=origin, timeout=timeout) for i in range(max_retries): try: print(api.send(hb)) except Exception as e: print("HTTP Error: {}".format(e)) time.sleep(sleep) continue else: break else: print("api is down")
class PingerDaemon(object): def __init__(self): self.shuttingdown = False def run(self): self.running = True # Create internal queue self.queue = Queue.Queue() self.api = ApiClient() # Initialiase ping targets ping_list = init_targets() # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREAD_COUNT) for i in range(SERVER_THREAD_COUNT): w = WorkerThread(self.api, self.queue) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: for p in ping_list: if 'targets' in p and p['targets']: for target in p['targets']: environment = p['environment'] service = p['service'] retries = p.get('retries', PING_MAX_RETRIES) self.queue.put((environment, service, target, retries, time.time())) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(LOOP_EVERY) LOG.info('Ping queue length is %d', self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True
def main(): api = ApiClient() listener = Listener() while True: listener.send_cmd('READY\n') headers, body = listener.wait() event = headers['eventname'] if event.startswith('TICK'): supervisorAlert = Heartbeat(origin='supervisord', tags=[headers['ver'], event]) else: if event.endswith('FATAL'): severity = 'critical' elif event.endswith('BACKOFF'): severity = 'warning' elif event.endswith('EXITED'): severity = 'minor' else: severity = 'normal' supervisorAlert = Alert( resource='%s:%s' % (platform.uname()[1], body['processname']), environment='Production', service=['supervisord'], event=event, correlate=[ 'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING', 'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING', 'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED', 'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN' ], value='serial=%s' % headers['serial'], severity=severity, origin=headers['server'], text='State changed from %s to %s.' % (body['from_state'], event), raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body))) try: api.send(supervisorAlert) except Exception as e: listener.log_stderr(e) listener.send_cmd('RESULT 4\nFAIL') else: listener.send_cmd('RESULT 2\nOK')
class UrlmonDaemon(object): def __init__(self): self.shuttingdown = False def run(self): self.running = True self.queue = Queue.Queue() self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY) # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREADS) for i in range(SERVER_THREADS): w = WorkerThread(self.queue, self.api) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName()) while not self.shuttingdown: try: for check in settings.checks: self.queue.put((check, time.time())) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e) time.sleep(LOOP_EVERY) LOG.info('URL check queue length is %d', self.queue.qsize()) except (KeyboardInterrupt, SystemExit): self.shuttingdown = True
def run(self): count = 0 while not self.shuttingdown: try: LOG.debug('Waiting for syslog messages...') ip, op, rdy = select.select([self.udp, self.tcp], [], [], LOOP_EVERY) if ip: for i in ip: if i == self.udp: data, addr = self.udp.recvfrom(4096) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('Syslog UDP data received from %s: %s', addr, data) if i == self.tcp: client, addr = self.tcp.accept() data = client.recv(4096) data = unicode(data, 'utf-8', errors='ignore') client.close() LOG.debug('Syslog TCP data received from %s: %s', addr, data) alerts = self.parse_syslog(ip=addr[0], data=data) for alert in alerts: try: self.api.send(alert) except Exception, e: LOG.warning('Failed to send alert: %s', e) count += 1 if not ip or count % 5 == 0: LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def parse_snmptrap(data): pdu_data = data.splitlines() varbind_list = pdu_data[:] trapvars = dict() for line in pdu_data: if line.startswith('$'): special, value = line.split(None, 1) trapvars[special] = value varbind_list.pop(0) if '$s' in trapvars: if trapvars['$s'] == '0': version = 'SNMPv1' elif trapvars['$s'] == '1': version = 'SNMPv2c' elif trapvars['$s'] == '2': version = 'SNMPv2u' # not supported else: version = 'SNMPv3' trapvars['$s'] = version else: LOG.warning('Failed to parse unknown trap type.') return # Get varbinds varbinds = dict() idx = 0 for varbind in '\n'.join(varbind_list).split('~%~'): if varbind == '': break idx += 1 try: oid, value = varbind.split(None, 1) except ValueError: oid = varbind value = '' varbinds[oid] = value trapvars['$' + str(idx)] = value # $n LOG.debug('$%s %s', str(idx), value) trapvars['$q'] = trapvars['$q'].lstrip( '.') # if numeric, remove leading '.' trapvars['$#'] = str(idx) LOG.debug('varbinds = %s', varbinds) LOG.debug('version = %s', version) correlate = list() if version == 'SNMPv1': if trapvars['$w'] == '0': trapvars['$O'] = 'coldStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '1': trapvars['$O'] = 'warmStart' correlate = ['coldStart', 'warmStart'] elif trapvars['$w'] == '2': trapvars['$O'] = 'linkDown' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '3': trapvars['$O'] = 'linkUp' correlate = ['linkUp', 'linkDown'] elif trapvars['$w'] == '4': trapvars['$O'] = 'authenticationFailure' elif trapvars['$w'] == '5': trapvars['$O'] = 'egpNeighborLoss' elif trapvars['$w'] == '6': # enterpriseSpecific(6) if trapvars['$q'].isdigit( ): # XXX - specific trap number was not decoded trapvars['$O'] = '%s.0.%s' % (trapvars['$N'], trapvars['$q']) else: trapvars['$O'] = trapvars['$q'] elif version == 'SNMPv2c': if 'coldStart' in trapvars['$2']: trapvars['$w'] = '0' trapvars['$W'] = 'Cold Start' elif 'warmStart' in trapvars['$2']: trapvars['$w'] = '1' trapvars['$W'] = 'Warm Start' elif 'linkDown' in trapvars['$2']: trapvars['$w'] = '2' trapvars['$W'] = 'Link Down' elif 'linkUp' in trapvars['$2']: trapvars['$w'] = '3' trapvars['$W'] = 'Link Up' elif 'authenticationFailure' in trapvars['$2']: trapvars['$w'] = '4' trapvars['$W'] = 'Authentication Failure' elif 'egpNeighborLoss' in trapvars['$2']: trapvars['$w'] = '5' trapvars['$W'] = 'EGP Neighbor Loss' else: trapvars['$w'] = '6' trapvars['$W'] = 'Enterprise Specific' trapvars['$O'] = trapvars['$2'] # SNMPv2-MIB::snmpTrapOID.0 LOG.debug('trapvars = %s', trapvars) LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'], trapvars['$B'], trapvars['$x'], trapvars['$X']) if trapvars['$B'] != '<UNKNOWN>': resource = trapvars['$B'] elif trapvars['$A'] != '0.0.0.0': resource = trapvars['$A'] else: m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b']) if m: resource = m.group(1) else: resource = '<NONE>' # Defaults event = trapvars['$O'] severity = 'indeterminate' group = 'SNMP' value = trapvars['$w'] text = trapvars['$W'] environment = 'Production' service = ['Network'] attributes = {'source': trapvars['$B']} tags = [version] create_time = datetime.datetime.strptime( '%sT%s.000Z' % (trapvars['$x'], trapvars['$X']), '%Y-%m-%dT%H:%M:%S.%fZ') snmptrapAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment=environment, service=service, text=text, event_type='snmptrapAlert', attributes=attributes, tags=tags, create_time=create_time, raw_data=data, ) SnmpTrapHandler.translate_alert(snmptrapAlert, trapvars) if snmptrapAlert.get_type() == 'Heartbeat': snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin, tags=[__version__]) return snmptrapAlert