class SnmpTrapHandler(object): def __init__(self): self.api = None def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def alert_tunnel_up(outside_ip,status_message,last_status_change, aws_acc=None,gwid=None,gwip=None,vpnid=None): ''' Report tunnel up status to alerta only if a down status was sent ''' api = ApiClient(endpoint=alerta_endpoint) alertres = vpnid+','+gwid+','+outside_ip status_file =tempdir+'/'+alertres.replace(',','_')+'.down' if not os.path.exists(status_file): # if file does not exists it means it wasn't down. no point in sending the alert. return alert = Alert( resource=alertres, event='TunnelUp', correlate=['TunnelDown'], group='aws', environment='Production', service=[aws_acc], severity='normal', value=status_message, text=aws_acc+' : Tunnel '+outside_ip +' up since '+last_status_change+'.'+'endpoint: '+gwip, tags=['aws'], attributes={'customer': 'The Guardian', 'account' : aws_acc,'GatewayId' : gwid+' [ '+gwip+' ]','vpnId' : vpnid, 'TunnelOutsideIp' : outside_ip} ) #print alert try: api.send(alert) os.remove (status_file ) except Exception as e: print e
def alert(resource,event,text,value,severity,status,go=False): api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k') #2h expired alert_info = Alert(resource=resource, event=event,text=text,group='ir',environment="Production",service=["localhost"],status=status,timeout='2880',value=value,severity=severity) t = api.send(alert_info) if not go: print 'alert info:',alert_info print t
def alert_rawData(resource,event,text,value,rawData,severity,status,go=False): api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k') #alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Production",service=["localhost"],status='open',timeout=86400,value="query1",severity="major") #alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Development",service=["localhost"],status='open',timeout=86400,value="query1",severity="major") #2h expired alert_info = Alert(resource=resource, event=event,text=text,rawData=rawData,group='ir',environment="Production",service=["localhost"],status=status,timeout='2880',value=value,severity=severity) t = api.send(alert_info) if not go: print 'alert info:',alert_info print t
def main(): api = ApiClient() listener = Listener() while True: listener.send_cmd('READY\n') headers, body = listener.wait() event = headers['eventname'] if event.startswith('TICK'): supervisorAlert = Heartbeat( origin='supervisord', tags=[headers['ver'], event] ) else: if event.endswith('FATAL'): severity = 'critical' elif event.endswith('BACKOFF'): severity = 'warning' elif event.endswith('EXITED'): severity = 'minor' else: severity = 'normal' supervisorAlert = Alert( resource=body['processname'], environment='Production', service=['supervisord'], event=event, correlate=[ 'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING', 'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING', 'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED', 'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN' ], value='serial=%s' % headers['serial'], severity=severity, origin=headers['server'], text='State changed from %s to %s.' % (body['from_state'], event), raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body)) ) try: api.send(supervisorAlert) except Exception as e: listener.log_stderr(e) listener.send_cmd('RESULT 4\nFAIL') else: listener.send_cmd('RESULT 2\nOK')
def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e)
def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1)
def alert_tunnel_down(outside_ip,status_message,last_status_change, aws_acc=None,gwid=None,gwip=None,vpnid=None,severity='minor'): ''' Report tunnel down status to alerta only if we haven't already sent an alert ''' api = ApiClient(endpoint=alerta_endpoint) alertres = vpnid+','+gwid+','+outside_ip status_file =tempdir+'/'+alertres.replace(',','_')+'.down' count = 1 if os.path.exists(status_file) and not severity == 'critical': # if file does exists, it means its already down and an alert has been sent, check number of counts and send again after 10min try: count = get_down_count(status_file) count = int(count) + 1 except Exception as e: count = 10 record_status(status_file,str(count)) if count >= 5: #assume cron is running every 2 min, send down alert every 10 min. count = 1 else: return alert = Alert( resource=alertres, event='TunnelDown', correlate=['TunnelUp'], group='aws', environment='Production', service=[aws_acc], severity=severity, value=status_message, text=aws_acc+' : Tunnel '+outside_ip +' Down since '+last_status_change+'.'+' endpoint: '+gwip, tags=['aws'], attributes={'customer': 'The Guardian', 'account' : aws_acc,'GatewayId' : gwid+' [ '+gwip+' ]','vpnId' : vpnid, 'TunnelOutsideIp' : outside_ip} ) #print alert try: api.send(alert) record_status(status_file,str(count)) except Exception as e: print e
def run(self): api = ApiClient(endpoint=OPTIONS["endpoint"], key=OPTIONS["key"]) keep_alive = 0 while not self.should_stop: for alertid in on_hold.keys(): try: (alert, hold_time) = on_hold[alertid] except KeyError: continue if time.time() > hold_time: self.send_email(alert) try: del on_hold[alertid] except KeyError: continue if keep_alive >= 10: tag = OPTIONS["smtp_host"] or "alerta-mailer" api.send(Heartbeat(tags=[tag])) keep_alive = 0 keep_alive += 1 time.sleep(2)
def run(self): self.running = True self.queue = Queue.Queue() self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY) # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREADS) for i in range(SERVER_THREADS): w = WorkerThread(self.queue, self.api) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName())
def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1)
def __init__(self): self.api = ApiClient()
def set(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key)
class AlertCommand(object): def __init__(self): self.api = ApiClient() def set(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) def send(self, args): try: alert = Alert(resource=args.resource, event=args.event, environment=args.environment, severity=args.severity, correlate=args.correlate, status=args.status, service=args.service, group=args.group, value=args.value, text=args.text, tags=args.tags, attributes=dict([ attrib.split('=') for attrib in args.attributes ]), origin=args.origin, event_type=args.event_type, timeout=args.timeout, raw_data=args.raw_data) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(alert) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': if not 'alert' in response: info = response['message'] elif response['alert']['repeat']: info = "%s duplicates" % response['alert']['duplicateCount'] else: info = "%s -> %s" % (response['alert']['previousSeverity'], response['alert']['severity']) print("{} ({})".format(response['id'], info)) else: LOG.error(response['message']) sys.exit(1) def heartbeat(self, args): try: heartbeat = Heartbeat(origin=args.origin, tags=args.tags, timeout=args.timeout) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(heartbeat) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['id']) else: LOG.error(response['message']) sys.exit(1) def query(self, args, from_date=None): response = self._alerts(args.filters, from_date) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): a = AlertDocument.parse_alert(alert) line_color = '' end_color = _ENDC if args.color: line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown']) print( line_color + '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}' .format( a.id[0:8], a.get_date('last_receive_time', 'local', args.timezone), a.severity, a.duplicate_count, a.customer or "-", a.environment, ','.join(a.service), a.resource, a.group, a.event, a.value) + end_color) print(line_color + ' |{}'.format(a.text) + end_color) if args.details: print(line_color + ' severity | {} -> {}'.format( a.previous_severity, a.severity) + end_color) print(line_color + ' trend | {}'.format(a.trend_indication) + end_color) print(line_color + ' status | {}'.format(a.status) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' group | {}'.format(a.group) + end_color) print(line_color + ' event | {}'.format(a.event) + end_color) print(line_color + ' value | {}'.format(a.value) + end_color) print(line_color + ' tags | {}'.format(' '.join(a.tags)) + end_color) for key, value in a.attributes.items(): print(line_color + ' {} | {}'.format(key.ljust(10), value) + end_color) latency = a.receive_time - a.create_time print(line_color + ' time created | {}'.format( a.get_date('create_time', 'iso', args.timezone)) + end_color) print(line_color + ' time received | {}'.format( a.get_date('receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' last received | {}'.format( a.get_date('last_receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' latency | {}ms'.format( (latency.microseconds / 1000)) + end_color) print(line_color + ' timeout | {}s'.format(a.timeout) + end_color) print(line_color + ' alert id | {}'.format(a.id) + end_color) print( line_color + ' last recv id | {}'.format(a.last_receive_id) + end_color) print(line_color + ' customer | {}'.format(a.customer) + end_color) print(line_color + ' environment | {}'.format(a.environment) + end_color) print(line_color + ' service | {}'.format( ','.join(a.service)) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' type | {}'.format(a.event_type) + end_color) print(line_color + ' repeat | {}'.format(a.repeat) + end_color) print(line_color + ' origin | {}'.format(a.origin) + end_color) print(line_color + ' correlate | {}'.format( ','.join(a.correlate)) + end_color) return response.get('lastTime', '') def watch(self, args): from_date = None while True: from_date = self.query(args, from_date) try: time.sleep(2) except (KeyboardInterrupt, SystemExit): sys.exit(0) def top(self, args): screen = Screen(endpoint=args.endpoint, key=args.key) try: screen.run() except RuntimeError as e: screen._reset() print(e) sys.exit(1) except (KeyboardInterrupt, SystemExit): screen.w.running = False screen._reset() print('Exiting...') sys.exit(0) def raw(self, args): response = self._alerts(args.filters) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): line_color = '' end_color = _ENDC print(line_color + '%s' % alert['rawData'] + end_color) def history(self, args): response = self._history(args.filters) history = response['history'] if args.output == "json": print(json.dumps(history, indent=4)) sys.exit(0) for hist in history: line_color = '' end_color = _ENDC update_time = datetime.strptime(hist.get('updateTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') if 'severity' in hist: if args.color: line_color = _COLOR_MAP.get(hist['severity'], _COLOR_MAP['unknown']) print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['severity'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], hist['value'], hist['text']) + end_color) if 'status' in hist: print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['status'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], 'n/a', hist['text']) + end_color) def tag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.tag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def untag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Un-tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.untag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def ack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.ack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def unack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("un-Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.unack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def close(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Closing alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.close_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def delete(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Deleting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.delete_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def status(self, args): response = self._status() metrics = response['metrics'] print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME', 'VALUE', 'AVG')) for metric in [ m for m in metrics if m['type'] in ['gauge', 'counter', 'timer'] ]: if metric['type'] == 'gauge': print('{0:<28} {1:<8} {2:<26} {3:<10}'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) else: value = metric.get('count', 0) avg = int(metric['totalTime']) * 1.0 / int(metric['count']) print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], value, avg)) for metric in [m for m in metrics if m['type'] == 'text']: print('{0:<28} {1:<8} {2:<26} {3:<10}'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) def heartbeats(self, args): response = self._heartbeats() heartbeats = response['heartbeats'] print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format( 'ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE')) for heartbeat in heartbeats: hb = HeartbeatDocument.parse_heartbeat(heartbeat) latency = (hb.receive_time - hb.create_time).microseconds / 1000 since = datetime.utcnow() - hb.receive_time since = since - timedelta(microseconds=since.microseconds) latency_exceeded = latency > MAX_LATENCY timeout_exceeded = since.seconds > hb.timeout print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format( hb.origin, ' '.join(hb.tags), hb.get_date('create_time', 'local', args.timezone), '*' if latency_exceeded else ' ', latency, hb.timeout, '*' if timeout_exceeded else ' ', since)) if args.alert: if timeout_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatFail', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='major', value='{}'.format(since), text='Heartbeat not received in {} seconds'.format( hb.timeout), tags=hb.tags, type='heartbeatAlert') elif latency_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatSlow', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='major', value='{}ms'.format(latency), text='Heartbeat took more than {}ms to be processed'. format(MAX_LATENCY), tags=hb.tags, type='heartbeatAlert') else: alert = Alert(resource=hb.origin, event='HeartbeatOK', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='normal', value='', text='Heartbeat OK', tags=hb.tags, type='heartbeatAlert') self.send(alert) def blackout(self, args): if '.' not in args.start: args.start = args.start.replace('Z', '.000Z') try: blackout = { "environment": args.environment, "resource": args.resource, "service": args.service, "event": args.event, "group": args.group, "tags": args.tags, "startTime": args.start, "duration": args.duration } except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.blackout_alerts(blackout) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['blackout']) else: LOG.error(response['message']) sys.exit(1) def blackouts(self, args): response = self.api.get_blackouts() blackouts = response['blackouts'] print( '{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}' .format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE', 'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION')) for blackout in blackouts: start_time = datetime.strptime(blackout['startTime'], '%Y-%m-%dT%H:%M:%S.%fZ') tz = pytz.timezone(args.timezone) if args.purge and blackout['status'] == 'expired': response = self.api.delete_blackout(blackout['id']) if response['status'] == 'ok': blackout['status'] = 'deleted' else: blackout['status'] = 'error' print( '{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s' .format( blackout['id'][:8], blackout.get('customer', '*'), blackout.get('environment', '*'), ','.join(blackout.get('service', '*')), blackout.get('resource', '*'), blackout.get('event', '*'), blackout.get('group', '*'), ' '.join(blackout.get('tags', '*')), blackout['status'], start_time.replace(tzinfo=pytz.UTC).astimezone( tz).strftime('%Y/%m/%d %H:%M:%S'), blackout['duration'])) @staticmethod def _build(filters, from_date=None, to_date=None): if filters: query = [tuple(x.split('=', 1)) for x in filters if '=' in x] else: query = list() if from_date: query.append(('from-date', from_date)) if to_date: query.append(('to-date', to_date)) if 'sort-by' not in query: query.append(('sort-by', 'lastReceiveTime')) return query def _alerts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_alerts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _counts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_counts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _history(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_history(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _heartbeats(self): try: response = self.api.get_heartbeats() except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _status(self): try: response = self.api.get_status() except Exception as e: LOG.error(e) sys.exit(1) return response def help(self, args): pass def uptime(self, args): response = self._status() now = datetime.fromtimestamp(int(response['time']) / 1000.0) d = datetime(1, 1, 1) + timedelta(seconds=int(response['uptime']) / 1000.0) print('{0} up {1} days {2:02d}:{3:02d}'.format(now.strftime('%H:%M'), d.day - 1, d.hour, d.minute)) def version(self, args): response = self._status() print('{0} {1}'.format( response['application'], response['version'], )) print('alerta client {0}'.format(__version__)) print('requests {0}'.format(requests.__version__))
sys.exit(2) LOG.info('Listening on syslog port %s/udp' % SYSLOG_UDP_PORT) LOG.info('Starting TCP listener...') # Set up syslog TCP listener try: tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp.bind(('', SYSLOG_TCP_PORT)) tcp.listen(5) except socket.error, e: LOG.error('Syslog TCP error: %s', e) sys.exit(2) LOG.info('Listening on syslog port %s/tcp' % SYSLOG_TCP_PORT) self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY) count = 0 while not self.shuttingdown: try: LOG.debug('Waiting for syslog messages...') ip, op, rdy = select.select([udp, tcp], [], [], LOOP_EVERY) if ip: for i in ip: if i == udp: data, addr = udp.recvfrom(4096) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('Syslog UDP data received from %s: %s', addr, data) if i == tcp: client, addr = tcp.accept() data = client.recv(4096)
) # In[ ]: get_ipython().system( u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta --endpoint-url "http://localhost:8090" delete' ) # ### Same Thing, Python style # In[ ]: from alerta.api import ApiClient from alerta.alert import Alert api = ApiClient(endpoint='http://localhost:8090') alert = Alert(resource='localhost', event='VolUnavailable', service=['Filesystem'], environment='Production', value='ERROR', severity='minor') res = api.send(alert) # ## Custom Alerts # ### Remember, you can do amazing stuff… # In[ ]: import utils
class CloudWatch(object): def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1) def run(self): while True: LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE) try: notification = self.sqs.read(wait_time_seconds=20) except boto.exception.SQSError as e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) continue if notification: cloudwatchAlert = self.parse_notification(notification) try: self.api.send(cloudwatchAlert) except Exception as e: LOG.warning('Failed to send alert: %s', e) self.sqs.delete_message(notification) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception as e: LOG.warning('Failed to send heartbeat: %s', e) def parse_notification(self, notification): notification = json.loads(notification.get_body()) alarm = json.loads(notification['Message']) if 'Trigger' not in alarm: return # Defaults resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value']) event = alarm['AlarmName'] severity = self.cw_state_to_severity(alarm['NewStateValue']) group = 'CloudWatch' value = alarm['Trigger']['MetricName'] text = alarm['AlarmDescription'] service = [AWS_ACCOUNT_ID.get(alarm['AWSAccountId'], 'AWSAccountId:' + alarm['AWSAccountId'])] tags = [alarm['Trigger']['Namespace']] correlate = list() origin = notification['TopicArn'] timeout = None create_time = datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') raw_data = notification['Message'] cloudwatchAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment='Production', service=service, text=text, event_type='cloudwatchAlarm', tags=tags, attributes={ 'awsMessageId': notification['MessageId'], 'awsRegion': alarm['Region'], 'thresholdInfo': alarm['NewStateReason'] }, origin=origin, timeout=timeout, create_time=create_time, raw_data=raw_data, ) return cloudwatchAlert @staticmethod def cw_state_to_severity(state): if state == 'ALARM': return 'major' elif state == 'INSUFFICIENT_DATA': return 'warning' elif state == 'OK': return 'normal' else: return 'unknown'
import argparse import threading from os.path import dirname, basename, join import urllib3 import requests from packaging.version import Version from alerta.api import ApiClient from alerta.alert import Alert import utils urllib3.disable_warnings() alerta_endpoint = 'http://localhost:8090' api = ApiClient(endpoint=alerta_endpoint) DRY_RUN = False # # alert monitoring functions, kind of wrapping functions in utils # def alert_volume_not_existing(path): """ Alert if a volume does not exist, delete previous alert if it does. Command-line alternative (replace path argument): alerta send -r localhost -e VolumeUnavailable -E Localhost \ -S Filesystem -s minor -t "Volume not available." -v <path>
class CloudWatch(object): def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1) def run(self): while True: LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE) try: notification = self.sqs.read(wait_time_seconds=20) except boto.exception.SQSError as e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) continue if notification: cloudwatchAlert = self.parse_notification(notification) try: self.api.send(cloudwatchAlert) except Exception as e: LOG.warning('Failed to send alert: %s', e) self.sqs.delete_message(notification) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception as e: LOG.warning('Failed to send heartbeat: %s', e) def parse_notification(self, notification): notification = json.loads(notification.get_body()) alarm = json.loads(notification['Message']) if 'Trigger' not in alarm: return # Defaults resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value']) event = alarm['AlarmName'] severity = self.cw_state_to_severity(alarm['NewStateValue']) group = 'CloudWatch' value = alarm['Trigger']['MetricName'] text = alarm['AlarmDescription'] service = [ AWS_ACCOUNT_ID.get(alarm['AWSAccountId'], 'AWSAccountId:' + alarm['AWSAccountId']) ] tags = [alarm['Trigger']['Namespace']] correlate = list() origin = notification['TopicArn'] timeout = None create_time = datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') raw_data = notification['Message'] cloudwatchAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment='Production', service=service, text=text, event_type='cloudwatchAlarm', tags=tags, attributes={ 'awsMessageId': notification['MessageId'], 'awsRegion': alarm['Region'], 'thresholdInfo': alarm['NewStateReason'] }, origin=origin, timeout=timeout, create_time=create_time, raw_data=raw_data, ) return cloudwatchAlert @staticmethod def cw_state_to_severity(state): if state == 'ALARM': return 'major' elif state == 'INSUFFICIENT_DATA': return 'warning' elif state == 'OK': return 'normal' else: return 'unknown'
get_ipython().system(u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta --endpoint-url "http://localhost:8090" send -E Production -r localhost -e VolUnavailable -S Filesystem -v ERROR -s minor -t "/Volumes/XYZ not available."') # In[ ]: get_ipython().system(u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta --endpoint-url "http://localhost:8090" delete') # ### Same Thing, Python style # In[ ]: from alerta.api import ApiClient from alerta.alert import Alert api = ApiClient(endpoint='http://localhost:8090') alert = Alert(resource='localhost', event='VolUnavailable', service=['Filesystem'], environment='Production', value='ERROR', severity='minor') res = api.send(alert) # ## Custom Alerts # ### Remember, you can do amazing stuff… # In[ ]: import utils utils.volume_is_mounted('/Volumes/Intenso64')
#!/usr/bin/env python # coding=utf-8 from alerta.api import ApiClient from alerta.alert import Alert api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k') #alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Production",service=["localhost"],status='open',timeout=86400,value="query1",severity="major") alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Development",service=["localhost"],status='open',timeout=86400,value="query1",severity="major") print alert t = api.send(alert) print t
class AlertCommand(object): def __init__(self): self.api = ApiClient() def set(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) def send(self, args): try: alert = Alert( resource=args.resource, event=args.event, environment=args.environment, severity=args.severity, correlate=args.correlate, status=args.status, service=args.service, group=args.group, value=args.value, text=args.text, tags=args.tags, attributes=dict([attrib.split('=') for attrib in args.attributes]), origin=args.origin, event_type=args.event_type, timeout=args.timeout, raw_data=args.raw_data ) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(alert) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': if not 'alert' in response: info = response['message'] elif response['alert']['repeat']: info = "%s duplicates" % response['alert']['duplicateCount'] else: info = "%s -> %s" % (response['alert']['previousSeverity'], response['alert']['severity']) print("{} ({})".format(response['id'], info)) else: LOG.error(response['message']) sys.exit(1) def heartbeat(self, args): try: heartbeat = Heartbeat( origin=args.origin, tags=args.tags, timeout=args.timeout ) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(heartbeat) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['id']) else: LOG.error(response['message']) sys.exit(1) def query(self, args, from_date=None): response = self._alerts(args.filters, from_date) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): a = AlertDocument.parse_alert(alert) line_color = '' end_color = _ENDC if args.color: line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown']) print(line_color + '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}'.format( a.id[0:8], a.get_date('last_receive_time', 'local', args.timezone), a.severity, a.duplicate_count, a.customer or "-", a.environment, ','.join(a.service), a.resource, a.group, a.event, a.value) + end_color) print(line_color + ' |{}'.format(a.text) + end_color) if args.details: print(line_color + ' severity | {} -> {}'.format(a.previous_severity, a.severity) + end_color) print(line_color + ' trend | {}'.format(a.trend_indication) + end_color) print(line_color + ' status | {}'.format(a.status) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' group | {}'.format(a.group) + end_color) print(line_color + ' event | {}'.format(a.event) + end_color) print(line_color + ' value | {}'.format(a.value) + end_color) print(line_color + ' tags | {}'.format(' '.join(a.tags)) + end_color) for key, value in a.attributes.items(): print(line_color + ' {} | {}'.format(key.ljust(10), value) + end_color) latency = a.receive_time - a.create_time print(line_color + ' time created | {}'.format(a.get_date('create_time', 'iso', args.timezone)) + end_color) print(line_color + ' time received | {}'.format(a.get_date('receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' last received | {}'.format(a.get_date('last_receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' latency | {}ms'.format((latency.microseconds / 1000)) + end_color) print(line_color + ' timeout | {}s'.format(a.timeout) + end_color) print(line_color + ' alert id | {}'.format(a.id) + end_color) print(line_color + ' last recv id | {}'.format(a.last_receive_id) + end_color) print(line_color + ' customer | {}'.format(a.customer) + end_color) print(line_color + ' environment | {}'.format(a.environment) + end_color) print(line_color + ' service | {}'.format(','.join(a.service)) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' type | {}'.format(a.event_type) + end_color) print(line_color + ' repeat | {}'.format(a.repeat) + end_color) print(line_color + ' origin | {}'.format(a.origin) + end_color) print(line_color + ' correlate | {}'.format(','.join(a.correlate)) + end_color) return response.get('lastTime', '') def watch(self, args): from_date = None while True: from_date = self.query(args, from_date) try: time.sleep(2) except (KeyboardInterrupt, SystemExit): sys.exit(0) def top(self, args): screen = Screen(endpoint=args.endpoint, key=args.key) try: screen.run() except RuntimeError as e: screen._reset() print(e) sys.exit(1) except (KeyboardInterrupt, SystemExit): screen.w.running = False screen._reset() print('Exiting...') sys.exit(0) def raw(self, args): response = self._alerts(args.filters) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): line_color = '' end_color = _ENDC print(line_color + '%s' % alert['rawData'] + end_color) def history(self, args): response = self._history(args.filters) history = response['history'] if args.output == "json": print(json.dumps(history, indent=4)) sys.exit(0) for hist in history: line_color = '' end_color = _ENDC update_time = datetime.strptime(hist.get('updateTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') if 'severity' in hist: if args.color: line_color = _COLOR_MAP.get(hist['severity'], _COLOR_MAP['unknown']) print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % ( hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['severity'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], hist['value'], hist['text'] ) + end_color) if 'status' in hist: print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % ( hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['status'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], 'n/a', hist['text'] ) + end_color) def tag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.tag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def untag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Un-tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.untag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def ack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.ack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def unack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("un-Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.unack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def close(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Closing alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.close_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def delete(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Deleting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.delete_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def status(self, args): response = self._status() metrics = response['metrics'] print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME', 'VALUE', 'AVG')) for metric in [m for m in metrics if m['type'] in ['gauge', 'counter', 'timer']]: if metric['type'] == 'gauge': print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) else: value = metric.get('count', 0) avg = int(metric['totalTime']) * 1.0 / int(metric['count']) print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], value, avg)) for metric in [m for m in metrics if m['type'] == 'text']: print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) def heartbeats(self, args): response = self._heartbeats() heartbeats = response['heartbeats'] print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format('ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE')) for heartbeat in heartbeats: hb = HeartbeatDocument.parse_heartbeat(heartbeat) latency = (hb.receive_time - hb.create_time).microseconds / 1000 since = datetime.utcnow() - hb.receive_time since = since - timedelta(microseconds=since.microseconds) latency_exceeded = latency > MAX_LATENCY timeout_exceeded = since.seconds > hb.timeout print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format( hb.origin, ' '.join(hb.tags), hb.get_date('create_time', 'local', args.timezone), '*' if latency_exceeded else ' ', latency, hb.timeout, '*' if timeout_exceeded else ' ', since )) if args.alert: if timeout_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatFail', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='major', value='{}'.format(since), text='Heartbeat not received in {} seconds'.format(hb.timeout), tags=hb.tags, type='heartbeatAlert' ) elif latency_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatSlow', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='major', value='{}ms'.format(latency), text='Heartbeat took more than {}ms to be processed'.format(MAX_LATENCY), tags=hb.tags, type='heartbeatAlert' ) else: alert = Alert( resource=hb.origin, event='HeartbeatOK', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='normal', value='', text='Heartbeat OK', tags=hb.tags, type='heartbeatAlert' ) self.send(alert) def blackout(self, args): if '.' not in args.start: args.start = args.start.replace('Z', '.000Z') try: blackout = { "environment": args.environment, "resource": args.resource, "service": args.service, "event": args.event, "group": args.group, "tags": args.tags, "startTime": args.start, "duration": args.duration } except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.blackout_alerts(blackout) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['blackout']) else: LOG.error(response['message']) sys.exit(1) def blackouts(self, args): response = self.api.get_blackouts() blackouts = response['blackouts'] print('{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}'.format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE', 'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION')) for blackout in blackouts: start_time = datetime.strptime(blackout['startTime'], '%Y-%m-%dT%H:%M:%S.%fZ') tz = pytz.timezone(args.timezone) if args.purge and blackout['status'] == 'expired': response = self.api.delete_blackout(blackout['id']) if response['status'] == 'ok': blackout['status'] = 'deleted' else: blackout['status'] = 'error' print('{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s'.format( blackout['id'][:8], blackout.get('customer', '*'), blackout.get('environment', '*'), ','.join(blackout.get('service', '*')), blackout.get('resource', '*'), blackout.get('event', '*'), blackout.get('group', '*'), ' '.join(blackout.get('tags', '*')), blackout['status'], start_time.replace(tzinfo=pytz.UTC).astimezone(tz).strftime('%Y/%m/%d %H:%M:%S'), blackout['duration'] )) @staticmethod def _build(filters, from_date=None, to_date=None): if filters: query = [tuple(x.split('=', 1)) for x in filters if '=' in x] else: query = list() if from_date: query.append(('from-date', from_date)) if to_date: query.append(('to-date', to_date)) if 'sort-by' not in query: query.append(('sort-by', 'lastReceiveTime')) return query def _alerts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_alerts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _counts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_counts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _history(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_history(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _heartbeats(self): try: response = self.api.get_heartbeats() except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _status(self): try: response = self.api.get_status() except Exception as e: LOG.error(e) sys.exit(1) return response def help(self, args): pass def uptime(self, args): response = self._status() now = datetime.fromtimestamp(int(response['time']) / 1000.0) d = datetime(1, 1, 1) + timedelta(seconds=int(response['uptime']) / 1000.0) print('{0} up {1} days {2:02d}:{3:02d}'.format( now.strftime('%H:%M'), d.day - 1, d.hour, d.minute )) def version(self, args): response = self._status() print('{0} {1}'.format( response['application'], response['version'], )) print('alerta client {0}'.format(__version__)) print('requests {0}'.format(requests.__version__))