class AlertCommand(object): def __init__(self): self.api = ApiClient() def set(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) def send(self, args): try: alert = Alert(resource=args.resource, event=args.event, environment=args.environment, severity=args.severity, correlate=args.correlate, status=args.status, service=args.service, group=args.group, value=args.value, text=args.text, tags=args.tags, attributes=dict([ attrib.split('=') for attrib in args.attributes ]), origin=args.origin, event_type=args.event_type, timeout=args.timeout, raw_data=args.raw_data) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(alert) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': if not 'alert' in response: info = response['message'] elif response['alert']['repeat']: info = "%s duplicates" % response['alert']['duplicateCount'] else: info = "%s -> %s" % (response['alert']['previousSeverity'], response['alert']['severity']) print("{} ({})".format(response['id'], info)) else: LOG.error(response['message']) sys.exit(1) def heartbeat(self, args): try: heartbeat = Heartbeat(origin=args.origin, tags=args.tags, timeout=args.timeout) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(heartbeat) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['id']) else: LOG.error(response['message']) sys.exit(1) def query(self, args, from_date=None): response = self._alerts(args.filters, from_date) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): a = AlertDocument.parse_alert(alert) line_color = '' end_color = _ENDC if args.color: line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown']) print( line_color + '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}' .format( a.id[0:8], a.get_date('last_receive_time', 'local', args.timezone), a.severity, a.duplicate_count, a.customer or "-", a.environment, ','.join(a.service), a.resource, a.group, a.event, a.value) + end_color) print(line_color + ' |{}'.format(a.text) + end_color) if args.details: print(line_color + ' severity | {} -> {}'.format( a.previous_severity, a.severity) + end_color) print(line_color + ' trend | {}'.format(a.trend_indication) + end_color) print(line_color + ' status | {}'.format(a.status) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' group | {}'.format(a.group) + end_color) print(line_color + ' event | {}'.format(a.event) + end_color) print(line_color + ' value | {}'.format(a.value) + end_color) print(line_color + ' tags | {}'.format(' '.join(a.tags)) + end_color) for key, value in a.attributes.items(): print(line_color + ' {} | {}'.format(key.ljust(10), value) + end_color) latency = a.receive_time - a.create_time print(line_color + ' time created | {}'.format( a.get_date('create_time', 'iso', args.timezone)) + end_color) print(line_color + ' time received | {}'.format( a.get_date('receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' last received | {}'.format( a.get_date('last_receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' latency | {}ms'.format( (latency.microseconds / 1000)) + end_color) print(line_color + ' timeout | {}s'.format(a.timeout) + end_color) print(line_color + ' alert id | {}'.format(a.id) + end_color) print( line_color + ' last recv id | {}'.format(a.last_receive_id) + end_color) print(line_color + ' customer | {}'.format(a.customer) + end_color) print(line_color + ' environment | {}'.format(a.environment) + end_color) print(line_color + ' service | {}'.format( ','.join(a.service)) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' type | {}'.format(a.event_type) + end_color) print(line_color + ' repeat | {}'.format(a.repeat) + end_color) print(line_color + ' origin | {}'.format(a.origin) + end_color) print(line_color + ' correlate | {}'.format( ','.join(a.correlate)) + end_color) return response.get('lastTime', '') def watch(self, args): from_date = None while True: from_date = self.query(args, from_date) try: time.sleep(2) except (KeyboardInterrupt, SystemExit): sys.exit(0) def top(self, args): screen = Screen(endpoint=args.endpoint, key=args.key) try: screen.run() except RuntimeError as e: screen._reset() print(e) sys.exit(1) except (KeyboardInterrupt, SystemExit): screen.w.running = False screen._reset() print('Exiting...') sys.exit(0) def raw(self, args): response = self._alerts(args.filters) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): line_color = '' end_color = _ENDC print(line_color + '%s' % alert['rawData'] + end_color) def history(self, args): response = self._history(args.filters) history = response['history'] if args.output == "json": print(json.dumps(history, indent=4)) sys.exit(0) for hist in history: line_color = '' end_color = _ENDC update_time = datetime.strptime(hist.get('updateTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') if 'severity' in hist: if args.color: line_color = _COLOR_MAP.get(hist['severity'], _COLOR_MAP['unknown']) print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['severity'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], hist['value'], hist['text']) + end_color) if 'status' in hist: print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['status'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], 'n/a', hist['text']) + end_color) def tag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.tag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def untag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Un-tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.untag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def ack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.ack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def unack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("un-Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.unack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def close(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Closing alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.close_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def delete(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Deleting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.delete_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def status(self, args): response = self._status() metrics = response['metrics'] print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME', 'VALUE', 'AVG')) for metric in [ m for m in metrics if m['type'] in ['gauge', 'counter', 'timer'] ]: if metric['type'] == 'gauge': print('{0:<28} {1:<8} {2:<26} {3:<10}'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) else: value = metric.get('count', 0) avg = int(metric['totalTime']) * 1.0 / int(metric['count']) print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], value, avg)) for metric in [m for m in metrics if m['type'] == 'text']: print('{0:<28} {1:<8} {2:<26} {3:<10}'.format( metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) def heartbeats(self, args): response = self._heartbeats() heartbeats = response['heartbeats'] print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format( 'ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE')) for heartbeat in heartbeats: hb = HeartbeatDocument.parse_heartbeat(heartbeat) latency = (hb.receive_time - hb.create_time).microseconds / 1000 since = datetime.utcnow() - hb.receive_time since = since - timedelta(microseconds=since.microseconds) latency_exceeded = latency > MAX_LATENCY timeout_exceeded = since.seconds > hb.timeout print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format( hb.origin, ' '.join(hb.tags), hb.get_date('create_time', 'local', args.timezone), '*' if latency_exceeded else ' ', latency, hb.timeout, '*' if timeout_exceeded else ' ', since)) if args.alert: if timeout_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatFail', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='major', value='{}'.format(since), text='Heartbeat not received in {} seconds'.format( hb.timeout), tags=hb.tags, type='heartbeatAlert') elif latency_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatSlow', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='major', value='{}ms'.format(latency), text='Heartbeat took more than {}ms to be processed'. format(MAX_LATENCY), tags=hb.tags, type='heartbeatAlert') else: alert = Alert(resource=hb.origin, event='HeartbeatOK', correlate=[ 'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK' ], group='System', environment='Production', service=['Alerta'], severity='normal', value='', text='Heartbeat OK', tags=hb.tags, type='heartbeatAlert') self.send(alert) def blackout(self, args): if '.' not in args.start: args.start = args.start.replace('Z', '.000Z') try: blackout = { "environment": args.environment, "resource": args.resource, "service": args.service, "event": args.event, "group": args.group, "tags": args.tags, "startTime": args.start, "duration": args.duration } except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.blackout_alerts(blackout) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['blackout']) else: LOG.error(response['message']) sys.exit(1) def blackouts(self, args): response = self.api.get_blackouts() blackouts = response['blackouts'] print( '{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}' .format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE', 'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION')) for blackout in blackouts: start_time = datetime.strptime(blackout['startTime'], '%Y-%m-%dT%H:%M:%S.%fZ') tz = pytz.timezone(args.timezone) if args.purge and blackout['status'] == 'expired': response = self.api.delete_blackout(blackout['id']) if response['status'] == 'ok': blackout['status'] = 'deleted' else: blackout['status'] = 'error' print( '{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s' .format( blackout['id'][:8], blackout.get('customer', '*'), blackout.get('environment', '*'), ','.join(blackout.get('service', '*')), blackout.get('resource', '*'), blackout.get('event', '*'), blackout.get('group', '*'), ' '.join(blackout.get('tags', '*')), blackout['status'], start_time.replace(tzinfo=pytz.UTC).astimezone( tz).strftime('%Y/%m/%d %H:%M:%S'), blackout['duration'])) @staticmethod def _build(filters, from_date=None, to_date=None): if filters: query = [tuple(x.split('=', 1)) for x in filters if '=' in x] else: query = list() if from_date: query.append(('from-date', from_date)) if to_date: query.append(('to-date', to_date)) if 'sort-by' not in query: query.append(('sort-by', 'lastReceiveTime')) return query def _alerts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_alerts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _counts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_counts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _history(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_history(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _heartbeats(self): try: response = self.api.get_heartbeats() except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _status(self): try: response = self.api.get_status() except Exception as e: LOG.error(e) sys.exit(1) return response def help(self, args): pass def uptime(self, args): response = self._status() now = datetime.fromtimestamp(int(response['time']) / 1000.0) d = datetime(1, 1, 1) + timedelta(seconds=int(response['uptime']) / 1000.0) print('{0} up {1} days {2:02d}:{3:02d}'.format(now.strftime('%H:%M'), d.day - 1, d.hour, d.minute)) def version(self, args): response = self._status() print('{0} {1}'.format( response['application'], response['version'], )) print('alerta client {0}'.format(__version__)) print('requests {0}'.format(requests.__version__))
class AlertCommand(object): def __init__(self): self.api = ApiClient() def set(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) def send(self, args): try: alert = Alert( resource=args.resource, event=args.event, environment=args.environment, severity=args.severity, correlate=args.correlate, status=args.status, service=args.service, group=args.group, value=args.value, text=args.text, tags=args.tags, attributes=dict([attrib.split('=') for attrib in args.attributes]), origin=args.origin, event_type=args.event_type, timeout=args.timeout, raw_data=args.raw_data ) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(alert) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': if not 'alert' in response: info = response['message'] elif response['alert']['repeat']: info = "%s duplicates" % response['alert']['duplicateCount'] else: info = "%s -> %s" % (response['alert']['previousSeverity'], response['alert']['severity']) print("{} ({})".format(response['id'], info)) else: LOG.error(response['message']) sys.exit(1) def heartbeat(self, args): try: heartbeat = Heartbeat( origin=args.origin, tags=args.tags, timeout=args.timeout ) except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.send(heartbeat) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['id']) else: LOG.error(response['message']) sys.exit(1) def query(self, args, from_date=None): response = self._alerts(args.filters, from_date) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): a = AlertDocument.parse_alert(alert) line_color = '' end_color = _ENDC if args.color: line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown']) print(line_color + '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}'.format( a.id[0:8], a.get_date('last_receive_time', 'local', args.timezone), a.severity, a.duplicate_count, a.customer or "-", a.environment, ','.join(a.service), a.resource, a.group, a.event, a.value) + end_color) print(line_color + ' |{}'.format(a.text) + end_color) if args.details: print(line_color + ' severity | {} -> {}'.format(a.previous_severity, a.severity) + end_color) print(line_color + ' trend | {}'.format(a.trend_indication) + end_color) print(line_color + ' status | {}'.format(a.status) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' group | {}'.format(a.group) + end_color) print(line_color + ' event | {}'.format(a.event) + end_color) print(line_color + ' value | {}'.format(a.value) + end_color) print(line_color + ' tags | {}'.format(' '.join(a.tags)) + end_color) for key, value in a.attributes.items(): print(line_color + ' {} | {}'.format(key.ljust(10), value) + end_color) latency = a.receive_time - a.create_time print(line_color + ' time created | {}'.format(a.get_date('create_time', 'iso', args.timezone)) + end_color) print(line_color + ' time received | {}'.format(a.get_date('receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' last received | {}'.format(a.get_date('last_receive_time', 'iso', args.timezone)) + end_color) print(line_color + ' latency | {}ms'.format((latency.microseconds / 1000)) + end_color) print(line_color + ' timeout | {}s'.format(a.timeout) + end_color) print(line_color + ' alert id | {}'.format(a.id) + end_color) print(line_color + ' last recv id | {}'.format(a.last_receive_id) + end_color) print(line_color + ' customer | {}'.format(a.customer) + end_color) print(line_color + ' environment | {}'.format(a.environment) + end_color) print(line_color + ' service | {}'.format(','.join(a.service)) + end_color) print(line_color + ' resource | {}'.format(a.resource) + end_color) print(line_color + ' type | {}'.format(a.event_type) + end_color) print(line_color + ' repeat | {}'.format(a.repeat) + end_color) print(line_color + ' origin | {}'.format(a.origin) + end_color) print(line_color + ' correlate | {}'.format(','.join(a.correlate)) + end_color) return response.get('lastTime', '') def watch(self, args): from_date = None while True: from_date = self.query(args, from_date) try: time.sleep(2) except (KeyboardInterrupt, SystemExit): sys.exit(0) def top(self, args): screen = Screen(endpoint=args.endpoint, key=args.key) try: screen.run() except RuntimeError as e: screen._reset() print(e) sys.exit(1) except (KeyboardInterrupt, SystemExit): screen.w.running = False screen._reset() print('Exiting...') sys.exit(0) def raw(self, args): response = self._alerts(args.filters) alerts = response['alerts'] if args.output == "json": print(json.dumps(alerts, indent=4)) sys.exit(0) for alert in reversed(alerts): line_color = '' end_color = _ENDC print(line_color + '%s' % alert['rawData'] + end_color) def history(self, args): response = self._history(args.filters) history = response['history'] if args.output == "json": print(json.dumps(history, indent=4)) sys.exit(0) for hist in history: line_color = '' end_color = _ENDC update_time = datetime.strptime(hist.get('updateTime', None), '%Y-%m-%dT%H:%M:%S.%fZ') if 'severity' in hist: if args.color: line_color = _COLOR_MAP.get(hist['severity'], _COLOR_MAP['unknown']) print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % ( hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['severity'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], hist['value'], hist['text'] ) + end_color) if 'status' in hist: print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % ( hist['id'][0:8], update_time.strftime('%Y/%m/%d %H:%M:%S'), hist['status'], hist['customer'], hist['environment'], ','.join(hist['service']), hist['resource'], hist['group'], hist['event'], 'n/a', hist['text'] ) + end_color) def tag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.tag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def untag(self, args): sys.stdout.write("Counting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Un-tagging alerts: ") for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.untag_alert(alert['id'], args.tags) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def ack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.ack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def unack(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("un-Acking alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.unack_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def close(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Closing alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.close_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def delete(self, args): sys.stdout.write("Counting alerts: ") response = self._counts(args.filters) total = response['total'] sys.stdout.write("%s, done.\n" % total) sys.stdout.write("Deleting alerts: ") response = self._alerts(args.filters) alerts = response['alerts'] for i, alert in enumerate(alerts): pct = int(100.0 * i / total) sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total)) sys.stdout.flush() sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total)))) try: self.api.delete_alert(alert['id']) except Exception as e: print() LOG.error(e) sys.exit(1) sys.stdout.write("100%% (%d/%d), done.\n" % (total, total)) def status(self, args): response = self._status() metrics = response['metrics'] print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME', 'VALUE', 'AVG')) for metric in [m for m in metrics if m['type'] in ['gauge', 'counter', 'timer']]: if metric['type'] == 'gauge': print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) else: value = metric.get('count', 0) avg = int(metric['totalTime']) * 1.0 / int(metric['count']) print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], value, avg)) for metric in [m for m in metrics if m['type'] == 'text']: print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value'])) def heartbeats(self, args): response = self._heartbeats() heartbeats = response['heartbeats'] print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format('ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE')) for heartbeat in heartbeats: hb = HeartbeatDocument.parse_heartbeat(heartbeat) latency = (hb.receive_time - hb.create_time).microseconds / 1000 since = datetime.utcnow() - hb.receive_time since = since - timedelta(microseconds=since.microseconds) latency_exceeded = latency > MAX_LATENCY timeout_exceeded = since.seconds > hb.timeout print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format( hb.origin, ' '.join(hb.tags), hb.get_date('create_time', 'local', args.timezone), '*' if latency_exceeded else ' ', latency, hb.timeout, '*' if timeout_exceeded else ' ', since )) if args.alert: if timeout_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatFail', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='major', value='{}'.format(since), text='Heartbeat not received in {} seconds'.format(hb.timeout), tags=hb.tags, type='heartbeatAlert' ) elif latency_exceeded: alert = Alert( resource=hb.origin, event='HeartbeatSlow', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='major', value='{}ms'.format(latency), text='Heartbeat took more than {}ms to be processed'.format(MAX_LATENCY), tags=hb.tags, type='heartbeatAlert' ) else: alert = Alert( resource=hb.origin, event='HeartbeatOK', correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'], group='System', environment='Production', service=['Alerta'], severity='normal', value='', text='Heartbeat OK', tags=hb.tags, type='heartbeatAlert' ) self.send(alert) def blackout(self, args): if '.' not in args.start: args.start = args.start.replace('Z', '.000Z') try: blackout = { "environment": args.environment, "resource": args.resource, "service": args.service, "event": args.event, "group": args.group, "tags": args.tags, "startTime": args.start, "duration": args.duration } except Exception as e: LOG.error(e) sys.exit(1) try: response = self.api.blackout_alerts(blackout) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == 'ok': print(response['blackout']) else: LOG.error(response['message']) sys.exit(1) def blackouts(self, args): response = self.api.get_blackouts() blackouts = response['blackouts'] print('{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}'.format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE', 'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION')) for blackout in blackouts: start_time = datetime.strptime(blackout['startTime'], '%Y-%m-%dT%H:%M:%S.%fZ') tz = pytz.timezone(args.timezone) if args.purge and blackout['status'] == 'expired': response = self.api.delete_blackout(blackout['id']) if response['status'] == 'ok': blackout['status'] = 'deleted' else: blackout['status'] = 'error' print('{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s'.format( blackout['id'][:8], blackout.get('customer', '*'), blackout.get('environment', '*'), ','.join(blackout.get('service', '*')), blackout.get('resource', '*'), blackout.get('event', '*'), blackout.get('group', '*'), ' '.join(blackout.get('tags', '*')), blackout['status'], start_time.replace(tzinfo=pytz.UTC).astimezone(tz).strftime('%Y/%m/%d %H:%M:%S'), blackout['duration'] )) @staticmethod def _build(filters, from_date=None, to_date=None): if filters: query = [tuple(x.split('=', 1)) for x in filters if '=' in x] else: query = list() if from_date: query.append(('from-date', from_date)) if to_date: query.append(('to-date', to_date)) if 'sort-by' not in query: query.append(('sort-by', 'lastReceiveTime')) return query def _alerts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_alerts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _counts(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_counts(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _history(self, filters, from_date=None, to_date=None): query = self._build(filters, from_date, to_date) try: response = self.api.get_history(query) except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _heartbeats(self): try: response = self.api.get_heartbeats() except Exception as e: LOG.error(e) sys.exit(1) if response['status'] == "error": LOG.error(response['message']) sys.exit(1) return response def _status(self): try: response = self.api.get_status() except Exception as e: LOG.error(e) sys.exit(1) return response def help(self, args): pass def uptime(self, args): response = self._status() now = datetime.fromtimestamp(int(response['time']) / 1000.0) d = datetime(1, 1, 1) + timedelta(seconds=int(response['uptime']) / 1000.0) print('{0} up {1} days {2:02d}:{3:02d}'.format( now.strftime('%H:%M'), d.day - 1, d.hour, d.minute )) def version(self, args): response = self._status() print('{0} {1}'.format( response['application'], response['version'], )) print('alerta client {0}'.format(__version__)) print('requests {0}'.format(requests.__version__))