Example #1
0
class SnmpTrapHandler(object):
    def __init__(self):

        self.api = None

    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)

        LOG.debug('Send heartbeat...')
        heartbeat = Heartbeat(tags=[__version__])
        try:
            self.api.send(heartbeat)
        except Exception, e:
            LOG.warning('Failed to send heartbeat: %s', e)
Example #2
0
class SnmpTrapHandler(object):

    def __init__(self):

        self.api = None

    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)

        LOG.debug('Send heartbeat...')
        heartbeat = Heartbeat(tags=[__version__])
        try:
            self.api.send(heartbeat)
        except Exception, e:
            LOG.warning('Failed to send heartbeat: %s', e)
Example #3
0
def alert_tunnel_up(outside_ip,status_message,last_status_change, aws_acc=None,gwid=None,gwip=None,vpnid=None):
	'''
	Report tunnel up status to alerta
	only if a down status was sent
	'''
	api = ApiClient(endpoint=alerta_endpoint)
	alertres = vpnid+','+gwid+','+outside_ip
	status_file =tempdir+'/'+alertres.replace(',','_')+'.down' 
	if not os.path.exists(status_file):
	# if file does not exists it means it wasn't down. no point in sending the alert.
		return
	alert = Alert(
	    resource=alertres,
	    event='TunnelUp',
	    correlate=['TunnelDown'],
	    group='aws',
	    environment='Production',
	    service=[aws_acc],
    	    severity='normal',
    	    value=status_message,
    	    text=aws_acc+' : Tunnel '+outside_ip +' up since '+last_status_change+'.'+'endpoint: '+gwip,
    	    tags=['aws'],
    	    attributes={'customer': 'The Guardian', 'account' : aws_acc,'GatewayId' : gwid+' [ '+gwip+' ]','vpnId' : vpnid, 'TunnelOutsideIp' : outside_ip}
	    )
	#print alert
	try:
    		api.send(alert)
		os.remove (status_file )
	except Exception as e:
    		print e
Example #4
0
def alert(resource,event,text,value,severity,status,go=False):
    api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k')
    #2h expired
    alert_info = Alert(resource=resource, event=event,text=text,group='ir',environment="Production",service=["localhost"],status=status,timeout='2880',value=value,severity=severity)
    t = api.send(alert_info)
    if not go:
        print 'alert info:',alert_info
        print t
Example #5
0
def alert_rawData(resource,event,text,value,rawData,severity,status,go=False):
    api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k')
    #alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Production",service=["localhost"],status='open',timeout=86400,value="query1",severity="major")
    #alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Development",service=["localhost"],status='open',timeout=86400,value="query1",severity="major")
    #2h expired
    alert_info = Alert(resource=resource, event=event,text=text,rawData=rawData,group='ir',environment="Production",service=["localhost"],status=status,timeout='2880',value=value,severity=severity)
    t = api.send(alert_info)
    if not go:
        print 'alert info:',alert_info
        print t
Example #6
0
def main():

    api = ApiClient()
    listener = Listener()

    while True:
        listener.send_cmd('READY\n')
        headers, body = listener.wait()

        event = headers['eventname']
        if event.startswith('TICK'):
            supervisorAlert = Heartbeat(
                origin='supervisord',
                tags=[headers['ver'], event]
            )
        else:
            if event.endswith('FATAL'):
                severity = 'critical'
            elif event.endswith('BACKOFF'):
                severity = 'warning'
            elif event.endswith('EXITED'):
                severity = 'minor'
            else:
                severity = 'normal'

            supervisorAlert = Alert(
                resource=body['processname'],
                environment='Production',
                service=['supervisord'],
                event=event,
                correlate=[
                    'PROCESS_STATE_STARTING',
                    'PROCESS_STATE_RUNNING',
                    'PROCESS_STATE_BACKOFF',
                    'PROCESS_STATE_STOPPING',
                    'PROCESS_STATE_EXITED',
                    'PROCESS_STATE_STOPPED',
                    'PROCESS_STATE_FATAL',
                    'PROCESS_STATE_UNKNOWN'
                ],
                value='serial=%s' % headers['serial'],
                severity=severity,
                origin=headers['server'],
                text='State changed from %s to %s.' % (body['from_state'], event),
                raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body))
            )
        try:
            api.send(supervisorAlert)
        except Exception as e:
            listener.log_stderr(e)
            listener.send_cmd('RESULT 4\nFAIL')
        else:
            listener.send_cmd('RESULT 2\nOK')
Example #7
0
    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)
Example #8
0
    def __init__(self):

        self.api = ApiClient()

        try:
            connection = boto.sqs.connect_to_region(
                AWS_REGION,
                aws_access_key_id=AWS_ACCESS_KEY_ID,
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
        except boto.exception.SQSError as e:
            LOG.error('SQS API call failed: %s', e)
            sys.exit(1)

        try:
            self.sqs = connection.create_queue(AWS_SQS_QUEUE)
            self.sqs.set_message_class(RawMessage)
        except boto.exception.SQSError as e:
            LOG.error('SQS queue error: %s', e)
            sys.exit(1)
Example #9
0
def alert_tunnel_down(outside_ip,status_message,last_status_change, aws_acc=None,gwid=None,gwip=None,vpnid=None,severity='minor'):
	'''
	Report tunnel down status to alerta
	only if we haven't already sent an alert
	'''
	api = ApiClient(endpoint=alerta_endpoint)
	alertres = vpnid+','+gwid+','+outside_ip
	status_file =tempdir+'/'+alertres.replace(',','_')+'.down' 
	count = 1
	if os.path.exists(status_file) and not severity == 'critical':
		# if file does exists, it means its already down and an alert has been sent, check number of counts and send again after 10min
		try:
			count = get_down_count(status_file)
			count = int(count) + 1
		except Exception as e:
		  	count = 10
		record_status(status_file,str(count))
		if count >= 5:
			#assume cron is running every 2 min, send down alert every 10 min.	
			count = 1
		else:
			return
	alert = Alert(
	    resource=alertres,
	    event='TunnelDown',
	    correlate=['TunnelUp'],
	    group='aws',
	    environment='Production',
	    service=[aws_acc],
    	    severity=severity,
    	    value=status_message,
    	    text=aws_acc+' : Tunnel '+outside_ip +' Down since '+last_status_change+'.'+' endpoint: '+gwip,
    	    tags=['aws'],
    	    attributes={'customer': 'The Guardian', 'account' : aws_acc,'GatewayId' : gwid+' [ '+gwip+' ]','vpnId' : vpnid, 'TunnelOutsideIp' : outside_ip}
	    )
	#print alert
	try:
    		api.send(alert)
		record_status(status_file,str(count))
	except Exception as e:
    		print e
Example #10
0
    def run(self):

        api = ApiClient(endpoint=OPTIONS["endpoint"], key=OPTIONS["key"])
        keep_alive = 0

        while not self.should_stop:
            for alertid in on_hold.keys():
                try:
                    (alert, hold_time) = on_hold[alertid]
                except KeyError:
                    continue
                if time.time() > hold_time:
                    self.send_email(alert)
                    try:
                        del on_hold[alertid]
                    except KeyError:
                        continue
            if keep_alive >= 10:
                tag = OPTIONS["smtp_host"] or "alerta-mailer"
                api.send(Heartbeat(tags=[tag]))
                keep_alive = 0
            keep_alive += 1
            time.sleep(2)
Example #11
0
    def run(self):

        self.running = True

        self.queue = Queue.Queue()
        self.api = self.api = ApiClient(endpoint=settings.ENDPOINT,
                                        key=settings.API_KEY)

        # Start worker threads
        LOG.debug('Starting %s worker threads...', SERVER_THREADS)
        for i in range(SERVER_THREADS):
            w = WorkerThread(self.queue, self.api)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())
Example #12
0
    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)
Example #13
0
    def __init__(self):

        self.api = ApiClient()

        try:
            connection = boto.sqs.connect_to_region(
                AWS_REGION,
                aws_access_key_id=AWS_ACCESS_KEY_ID,
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY
            )
        except boto.exception.SQSError as e:
            LOG.error('SQS API call failed: %s', e)
            sys.exit(1)

        try:
            self.sqs = connection.create_queue(AWS_SQS_QUEUE)
            self.sqs.set_message_class(RawMessage)
        except boto.exception.SQSError as e:
            LOG.error('SQS queue error: %s', e)
            sys.exit(1)
Example #14
0
    def __init__(self):

        self.api = ApiClient()
Example #15
0
    def set(self, endpoint, key):

        self.api = ApiClient(endpoint=endpoint, key=key)
Example #16
0
class AlertCommand(object):
    def __init__(self):

        self.api = ApiClient()

    def set(self, endpoint, key):

        self.api = ApiClient(endpoint=endpoint, key=key)

    def send(self, args):

        try:
            alert = Alert(resource=args.resource,
                          event=args.event,
                          environment=args.environment,
                          severity=args.severity,
                          correlate=args.correlate,
                          status=args.status,
                          service=args.service,
                          group=args.group,
                          value=args.value,
                          text=args.text,
                          tags=args.tags,
                          attributes=dict([
                              attrib.split('=') for attrib in args.attributes
                          ]),
                          origin=args.origin,
                          event_type=args.event_type,
                          timeout=args.timeout,
                          raw_data=args.raw_data)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.send(alert)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            if not 'alert' in response:
                info = response['message']
            elif response['alert']['repeat']:
                info = "%s duplicates" % response['alert']['duplicateCount']
            else:
                info = "%s -> %s" % (response['alert']['previousSeverity'],
                                     response['alert']['severity'])
            print("{} ({})".format(response['id'], info))
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def heartbeat(self, args):

        try:
            heartbeat = Heartbeat(origin=args.origin,
                                  tags=args.tags,
                                  timeout=args.timeout)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.send(heartbeat)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            print(response['id'])
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def query(self, args, from_date=None):

        response = self._alerts(args.filters, from_date)
        alerts = response['alerts']

        if args.output == "json":
            print(json.dumps(alerts, indent=4))
            sys.exit(0)

        for alert in reversed(alerts):

            a = AlertDocument.parse_alert(alert)

            line_color = ''
            end_color = _ENDC

            if args.color:
                line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown'])

            print(
                line_color +
                '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}'
                .format(
                    a.id[0:8],
                    a.get_date('last_receive_time', 'local',
                               args.timezone), a.severity, a.duplicate_count,
                    a.customer or "-", a.environment, ','.join(a.service),
                    a.resource, a.group, a.event, a.value) + end_color)
            print(line_color + '   |{}'.format(a.text) + end_color)

            if args.details:
                print(line_color + '    severity   | {} -> {}'.format(
                    a.previous_severity, a.severity) + end_color)
                print(line_color +
                      '    trend      | {}'.format(a.trend_indication) +
                      end_color)
                print(line_color + '    status     | {}'.format(a.status) +
                      end_color)
                print(line_color + '    resource   | {}'.format(a.resource) +
                      end_color)
                print(line_color + '    group      | {}'.format(a.group) +
                      end_color)
                print(line_color + '    event      | {}'.format(a.event) +
                      end_color)
                print(line_color + '    value      | {}'.format(a.value) +
                      end_color)
                print(line_color +
                      '    tags       | {}'.format(' '.join(a.tags)) +
                      end_color)

                for key, value in a.attributes.items():
                    print(line_color +
                          '    {} | {}'.format(key.ljust(10), value) +
                          end_color)

                latency = a.receive_time - a.create_time

                print(line_color + '        time created  | {}'.format(
                    a.get_date('create_time', 'iso', args.timezone)) +
                      end_color)
                print(line_color + '        time received | {}'.format(
                    a.get_date('receive_time', 'iso', args.timezone)) +
                      end_color)
                print(line_color + '        last received | {}'.format(
                    a.get_date('last_receive_time', 'iso', args.timezone)) +
                      end_color)
                print(line_color + '        latency       | {}ms'.format(
                    (latency.microseconds / 1000)) + end_color)
                print(line_color +
                      '        timeout       | {}s'.format(a.timeout) +
                      end_color)

                print(line_color +
                      '            alert id     | {}'.format(a.id) + end_color)
                print(
                    line_color +
                    '            last recv id | {}'.format(a.last_receive_id) +
                    end_color)
                print(line_color +
                      '            customer     | {}'.format(a.customer) +
                      end_color)
                print(line_color +
                      '            environment  | {}'.format(a.environment) +
                      end_color)
                print(line_color + '            service      | {}'.format(
                    ','.join(a.service)) + end_color)
                print(line_color +
                      '            resource     | {}'.format(a.resource) +
                      end_color)
                print(line_color +
                      '            type         | {}'.format(a.event_type) +
                      end_color)
                print(line_color +
                      '            repeat       | {}'.format(a.repeat) +
                      end_color)
                print(line_color +
                      '            origin       | {}'.format(a.origin) +
                      end_color)
                print(line_color + '            correlate    | {}'.format(
                    ','.join(a.correlate)) + end_color)

        return response.get('lastTime', '')

    def watch(self, args):

        from_date = None
        while True:
            from_date = self.query(args, from_date)
            try:
                time.sleep(2)
            except (KeyboardInterrupt, SystemExit):
                sys.exit(0)

    def top(self, args):

        screen = Screen(endpoint=args.endpoint, key=args.key)

        try:
            screen.run()
        except RuntimeError as e:
            screen._reset()
            print(e)
            sys.exit(1)
        except (KeyboardInterrupt, SystemExit):
            screen.w.running = False
            screen._reset()
            print('Exiting...')
            sys.exit(0)

    def raw(self, args):

        response = self._alerts(args.filters)
        alerts = response['alerts']

        if args.output == "json":
            print(json.dumps(alerts, indent=4))
            sys.exit(0)

        for alert in reversed(alerts):
            line_color = ''
            end_color = _ENDC

            print(line_color + '%s' % alert['rawData'] + end_color)

    def history(self, args):

        response = self._history(args.filters)
        history = response['history']

        if args.output == "json":
            print(json.dumps(history, indent=4))
            sys.exit(0)

        for hist in history:

            line_color = ''
            end_color = _ENDC

            update_time = datetime.strptime(hist.get('updateTime', None),
                                            '%Y-%m-%dT%H:%M:%S.%fZ')

            if 'severity' in hist:
                if args.color:
                    line_color = _COLOR_MAP.get(hist['severity'],
                                                _COLOR_MAP['unknown'])
                print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' %
                      (hist['id'][0:8],
                       update_time.strftime('%Y/%m/%d %H:%M:%S'),
                       hist['severity'], hist['customer'], hist['environment'],
                       ','.join(hist['service']), hist['resource'],
                       hist['group'], hist['event'], hist['value'],
                       hist['text']) + end_color)

            if 'status' in hist:
                print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' %
                      (hist['id'][0:8],
                       update_time.strftime('%Y/%m/%d %H:%M:%S'),
                       hist['status'], hist['customer'], hist['environment'],
                       ','.join(hist['service']), hist['resource'],
                       hist['group'], hist['event'], 'n/a', hist['text']) +
                      end_color)

    def tag(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Tagging alerts: ")
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.tag_alert(alert['id'], args.tags)
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def untag(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Un-tagging alerts: ")
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.untag_alert(alert['id'], args.tags)
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def ack(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Acking alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.ack_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def unack(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("un-Acking alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.unack_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def close(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Closing alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.close_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def delete(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Deleting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.delete_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def status(self, args):

        response = self._status()
        metrics = response['metrics']

        print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME',
                                                    'VALUE', 'AVG'))

        for metric in [
                m for m in metrics
                if m['type'] in ['gauge', 'counter', 'timer']
        ]:
            if metric['type'] == 'gauge':
                print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(
                    metric['title'], metric['type'],
                    metric['group'] + '.' + metric['name'], metric['value']))
            else:
                value = metric.get('count', 0)
                avg = int(metric['totalTime']) * 1.0 / int(metric['count'])
                print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format(
                    metric['title'], metric['type'],
                    metric['group'] + '.' + metric['name'], value, avg))

        for metric in [m for m in metrics if m['type'] == 'text']:
            print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(
                metric['title'], metric['type'],
                metric['group'] + '.' + metric['name'], metric['value']))

    def heartbeats(self, args):

        response = self._heartbeats()
        heartbeats = response['heartbeats']

        print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format(
            'ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE'))

        for heartbeat in heartbeats:
            hb = HeartbeatDocument.parse_heartbeat(heartbeat)
            latency = (hb.receive_time - hb.create_time).microseconds / 1000
            since = datetime.utcnow() - hb.receive_time
            since = since - timedelta(microseconds=since.microseconds)

            latency_exceeded = latency > MAX_LATENCY
            timeout_exceeded = since.seconds > hb.timeout

            print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format(
                hb.origin, ' '.join(hb.tags),
                hb.get_date('create_time', 'local',
                            args.timezone), '*' if latency_exceeded else ' ',
                latency, hb.timeout, '*' if timeout_exceeded else ' ', since))

            if args.alert:
                if timeout_exceeded:
                    alert = Alert(
                        resource=hb.origin,
                        event='HeartbeatFail',
                        correlate=[
                            'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'
                        ],
                        group='System',
                        environment='Production',
                        service=['Alerta'],
                        severity='major',
                        value='{}'.format(since),
                        text='Heartbeat not received in {} seconds'.format(
                            hb.timeout),
                        tags=hb.tags,
                        type='heartbeatAlert')
                elif latency_exceeded:
                    alert = Alert(
                        resource=hb.origin,
                        event='HeartbeatSlow',
                        correlate=[
                            'HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'
                        ],
                        group='System',
                        environment='Production',
                        service=['Alerta'],
                        severity='major',
                        value='{}ms'.format(latency),
                        text='Heartbeat took more than {}ms to be processed'.
                        format(MAX_LATENCY),
                        tags=hb.tags,
                        type='heartbeatAlert')
                else:
                    alert = Alert(resource=hb.origin,
                                  event='HeartbeatOK',
                                  correlate=[
                                      'HeartbeatFail', 'HeartbeatSlow',
                                      'HeartbeatOK'
                                  ],
                                  group='System',
                                  environment='Production',
                                  service=['Alerta'],
                                  severity='normal',
                                  value='',
                                  text='Heartbeat OK',
                                  tags=hb.tags,
                                  type='heartbeatAlert')
                self.send(alert)

    def blackout(self, args):

        if '.' not in args.start:
            args.start = args.start.replace('Z', '.000Z')

        try:
            blackout = {
                "environment": args.environment,
                "resource": args.resource,
                "service": args.service,
                "event": args.event,
                "group": args.group,
                "tags": args.tags,
                "startTime": args.start,
                "duration": args.duration
            }
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.blackout_alerts(blackout)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            print(response['blackout'])
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def blackouts(self, args):

        response = self.api.get_blackouts()
        blackouts = response['blackouts']

        print(
            '{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}'
            .format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE',
                    'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION'))

        for blackout in blackouts:
            start_time = datetime.strptime(blackout['startTime'],
                                           '%Y-%m-%dT%H:%M:%S.%fZ')
            tz = pytz.timezone(args.timezone)

            if args.purge and blackout['status'] == 'expired':
                response = self.api.delete_blackout(blackout['id'])
                if response['status'] == 'ok':
                    blackout['status'] = 'deleted'
                else:
                    blackout['status'] = 'error'

            print(
                '{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s'
                .format(
                    blackout['id'][:8], blackout.get('customer', '*'),
                    blackout.get('environment',
                                 '*'), ','.join(blackout.get('service', '*')),
                    blackout.get('resource', '*'), blackout.get('event', '*'),
                    blackout.get('group', '*'),
                    ' '.join(blackout.get('tags', '*')), blackout['status'],
                    start_time.replace(tzinfo=pytz.UTC).astimezone(
                        tz).strftime('%Y/%m/%d %H:%M:%S'),
                    blackout['duration']))

    @staticmethod
    def _build(filters, from_date=None, to_date=None):

        if filters:
            query = [tuple(x.split('=', 1)) for x in filters if '=' in x]
        else:
            query = list()

        if from_date:
            query.append(('from-date', from_date))

        if to_date:
            query.append(('to-date', to_date))

        if 'sort-by' not in query:
            query.append(('sort-by', 'lastReceiveTime'))

        return query

    def _alerts(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_alerts(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _counts(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_counts(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _history(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_history(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _heartbeats(self):

        try:
            response = self.api.get_heartbeats()
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _status(self):

        try:
            response = self.api.get_status()
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        return response

    def help(self, args):

        pass

    def uptime(self, args):

        response = self._status()

        now = datetime.fromtimestamp(int(response['time']) / 1000.0)
        d = datetime(1, 1,
                     1) + timedelta(seconds=int(response['uptime']) / 1000.0)

        print('{0} up {1} days {2:02d}:{3:02d}'.format(now.strftime('%H:%M'),
                                                       d.day - 1, d.hour,
                                                       d.minute))

    def version(self, args):

        response = self._status()

        print('{0} {1}'.format(
            response['application'],
            response['version'],
        ))
        print('alerta client {0}'.format(__version__))
        print('requests {0}'.format(requests.__version__))
Example #17
0
    def set(self, endpoint, key):

        self.api = ApiClient(endpoint=endpoint, key=key)
Example #18
0
            sys.exit(2)
        LOG.info('Listening on syslog port %s/udp' % SYSLOG_UDP_PORT)

        LOG.info('Starting TCP listener...')
        # Set up syslog TCP listener
        try:
            tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            tcp.bind(('', SYSLOG_TCP_PORT))
            tcp.listen(5)
        except socket.error, e:
            LOG.error('Syslog TCP error: %s', e)
            sys.exit(2)
        LOG.info('Listening on syslog port %s/tcp' % SYSLOG_TCP_PORT)

        self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY)

        count = 0
        while not self.shuttingdown:
            try:
                LOG.debug('Waiting for syslog messages...')
                ip, op, rdy = select.select([udp, tcp], [], [], LOOP_EVERY)
                if ip:
                    for i in ip:
                        if i == udp:
                            data, addr = udp.recvfrom(4096)
                            data = unicode(data, 'utf-8', errors='ignore')
                            LOG.debug('Syslog UDP data received from %s: %s', addr, data)
                        if i == tcp:
                            client, addr = tcp.accept()
                            data = client.recv(4096)
Example #19
0
)

# In[ ]:

get_ipython().system(
    u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta     --endpoint-url "http://localhost:8090"     delete'
)

# ### Same Thing, Python style

# In[ ]:

from alerta.api import ApiClient
from alerta.alert import Alert

api = ApiClient(endpoint='http://localhost:8090')
alert = Alert(resource='localhost',
              event='VolUnavailable',
              service=['Filesystem'],
              environment='Production',
              value='ERROR',
              severity='minor')
res = api.send(alert)

# ## Custom Alerts

# ### Remember, you can do amazing stuff…

# In[ ]:

import utils
Example #20
0
    def __init__(self):

        self.api = ApiClient()
Example #21
0
class CloudWatch(object):

    def __init__(self):

        self.api = ApiClient()

        try:
            connection = boto.sqs.connect_to_region(
                AWS_REGION,
                aws_access_key_id=AWS_ACCESS_KEY_ID,
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY
            )
        except boto.exception.SQSError as e:
            LOG.error('SQS API call failed: %s', e)
            sys.exit(1)

        try:
            self.sqs = connection.create_queue(AWS_SQS_QUEUE)
            self.sqs.set_message_class(RawMessage)
        except boto.exception.SQSError as e:
            LOG.error('SQS queue error: %s', e)
            sys.exit(1)

    def run(self):

        while True:
            LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE)
            try:
                notification = self.sqs.read(wait_time_seconds=20)
            except boto.exception.SQSError as e:
                LOG.warning('Could not read from queue: %s', e)
                time.sleep(20)
                continue

            if notification:
                cloudwatchAlert = self.parse_notification(notification)
                try:
                    self.api.send(cloudwatchAlert)
                except Exception as e:
                    LOG.warning('Failed to send alert: %s', e)
                self.sqs.delete_message(notification)

            LOG.debug('Send heartbeat...')
            heartbeat = Heartbeat(tags=[__version__])
            try:
                self.api.send(heartbeat)
            except Exception as e:
                LOG.warning('Failed to send heartbeat: %s', e)

    def parse_notification(self, notification):

        notification = json.loads(notification.get_body())
        alarm = json.loads(notification['Message'])

        if 'Trigger' not in alarm:
            return

        # Defaults
        resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value'])
        event = alarm['AlarmName']
        severity = self.cw_state_to_severity(alarm['NewStateValue'])
        group = 'CloudWatch'
        value = alarm['Trigger']['MetricName']
        text = alarm['AlarmDescription']
        service = [AWS_ACCOUNT_ID.get(alarm['AWSAccountId'], 'AWSAccountId:' + alarm['AWSAccountId'])]
        tags = [alarm['Trigger']['Namespace']]
        correlate = list()
        origin = notification['TopicArn']
        timeout = None
        create_time = datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ')
        raw_data = notification['Message']

        cloudwatchAlert = Alert(
            resource=resource,
            event=event,
            correlate=correlate,
            group=group,
            value=value,
            severity=severity,
            environment='Production',
            service=service,
            text=text,
            event_type='cloudwatchAlarm',
            tags=tags,
            attributes={
                'awsMessageId': notification['MessageId'],
                'awsRegion': alarm['Region'],
                'thresholdInfo': alarm['NewStateReason']
            },
            origin=origin,
            timeout=timeout,
            create_time=create_time,
            raw_data=raw_data,
        )

        return cloudwatchAlert

    @staticmethod
    def cw_state_to_severity(state):

        if state == 'ALARM':
            return 'major'
        elif state == 'INSUFFICIENT_DATA':
            return 'warning'
        elif state == 'OK':
            return 'normal'
        else:
            return 'unknown'
Example #22
0
import argparse
import threading
from os.path import dirname, basename, join

import urllib3
import requests
from packaging.version import Version
from alerta.api import ApiClient
from alerta.alert import Alert

import utils

urllib3.disable_warnings()

alerta_endpoint = 'http://localhost:8090'
api = ApiClient(endpoint=alerta_endpoint)
DRY_RUN = False

#
# alert monitoring functions, kind of wrapping functions in utils
#


def alert_volume_not_existing(path):
    """
    Alert if a volume does not exist, delete previous alert if it does.
    
    Command-line alternative (replace path argument):

        alerta send -r localhost -e VolumeUnavailable -E Localhost \
            -S Filesystem -s minor -t "Volume not available." -v <path>
Example #23
0
class CloudWatch(object):
    def __init__(self):

        self.api = ApiClient()

        try:
            connection = boto.sqs.connect_to_region(
                AWS_REGION,
                aws_access_key_id=AWS_ACCESS_KEY_ID,
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
        except boto.exception.SQSError as e:
            LOG.error('SQS API call failed: %s', e)
            sys.exit(1)

        try:
            self.sqs = connection.create_queue(AWS_SQS_QUEUE)
            self.sqs.set_message_class(RawMessage)
        except boto.exception.SQSError as e:
            LOG.error('SQS queue error: %s', e)
            sys.exit(1)

    def run(self):

        while True:
            LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE)
            try:
                notification = self.sqs.read(wait_time_seconds=20)
            except boto.exception.SQSError as e:
                LOG.warning('Could not read from queue: %s', e)
                time.sleep(20)
                continue

            if notification:
                cloudwatchAlert = self.parse_notification(notification)
                try:
                    self.api.send(cloudwatchAlert)
                except Exception as e:
                    LOG.warning('Failed to send alert: %s', e)
                self.sqs.delete_message(notification)

            LOG.debug('Send heartbeat...')
            heartbeat = Heartbeat(tags=[__version__])
            try:
                self.api.send(heartbeat)
            except Exception as e:
                LOG.warning('Failed to send heartbeat: %s', e)

    def parse_notification(self, notification):

        notification = json.loads(notification.get_body())
        alarm = json.loads(notification['Message'])

        if 'Trigger' not in alarm:
            return

        # Defaults
        resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'],
                              alarm['Trigger']['Dimensions'][0]['value'])
        event = alarm['AlarmName']
        severity = self.cw_state_to_severity(alarm['NewStateValue'])
        group = 'CloudWatch'
        value = alarm['Trigger']['MetricName']
        text = alarm['AlarmDescription']
        service = [
            AWS_ACCOUNT_ID.get(alarm['AWSAccountId'],
                               'AWSAccountId:' + alarm['AWSAccountId'])
        ]
        tags = [alarm['Trigger']['Namespace']]
        correlate = list()
        origin = notification['TopicArn']
        timeout = None
        create_time = datetime.datetime.strptime(notification['Timestamp'],
                                                 '%Y-%m-%dT%H:%M:%S.%fZ')
        raw_data = notification['Message']

        cloudwatchAlert = Alert(
            resource=resource,
            event=event,
            correlate=correlate,
            group=group,
            value=value,
            severity=severity,
            environment='Production',
            service=service,
            text=text,
            event_type='cloudwatchAlarm',
            tags=tags,
            attributes={
                'awsMessageId': notification['MessageId'],
                'awsRegion': alarm['Region'],
                'thresholdInfo': alarm['NewStateReason']
            },
            origin=origin,
            timeout=timeout,
            create_time=create_time,
            raw_data=raw_data,
        )

        return cloudwatchAlert

    @staticmethod
    def cw_state_to_severity(state):

        if state == 'ALARM':
            return 'major'
        elif state == 'INSUFFICIENT_DATA':
            return 'warning'
        elif state == 'OK':
            return 'normal'
        else:
            return 'unknown'
Example #24
0
get_ipython().system(u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta     --endpoint-url "http://localhost:8090"     send -E Production -r localhost -e VolUnavailable          -S Filesystem -v ERROR -s minor          -t "/Volumes/XYZ not available."')


# In[ ]:

get_ipython().system(u' cd $ALERTA_TEST_DIR && ./miniconda2/bin/alerta     --endpoint-url "http://localhost:8090"     delete')


# ### Same Thing, Python style

# In[ ]:

from alerta.api import ApiClient
from alerta.alert import Alert

api = ApiClient(endpoint='http://localhost:8090')
alert = Alert(resource='localhost', event='VolUnavailable',
              service=['Filesystem'], environment='Production',
              value='ERROR', severity='minor')
res = api.send(alert)


# ## Custom Alerts

# ### Remember, you can do amazing stuff…

# In[ ]:

import utils
utils.volume_is_mounted('/Volumes/Intenso64')
Example #25
0
#!/usr/bin/env python
# coding=utf-8
from alerta.api import ApiClient
from alerta.alert import Alert

api = ApiClient(endpoint='http://alert.localhost/api', key='UszE5hI_hx5pXKcsCP_2&1DIs&9_Ve*k')
#alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Production",service=["localhost"],status='open',timeout=86400,value="query1",severity="major")
alert = Alert(resource='irdev', event='searchServerDown',text='The search server is down.',group='ir',environment="Development",service=["localhost"],status='open',timeout=86400,value="query1",severity="major")
print alert
t = api.send(alert)
print t
Example #26
0
class AlertCommand(object):

    def __init__(self):

        self.api = ApiClient()

    def set(self, endpoint, key):

        self.api = ApiClient(endpoint=endpoint, key=key)

    def send(self, args):

        try:
            alert = Alert(
                resource=args.resource,
                event=args.event,
                environment=args.environment,
                severity=args.severity,
                correlate=args.correlate,
                status=args.status,
                service=args.service,
                group=args.group,
                value=args.value,
                text=args.text,
                tags=args.tags,
                attributes=dict([attrib.split('=') for attrib in args.attributes]),
                origin=args.origin,
                event_type=args.event_type,
                timeout=args.timeout,
                raw_data=args.raw_data
            )
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.send(alert)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            if not 'alert' in response:
                info = response['message']
            elif response['alert']['repeat']:
                info = "%s duplicates" % response['alert']['duplicateCount']
            else:
                info = "%s -> %s" % (response['alert']['previousSeverity'], response['alert']['severity'])
            print("{} ({})".format(response['id'], info))
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def heartbeat(self, args):

        try:
            heartbeat = Heartbeat(
                origin=args.origin,
                tags=args.tags,
                timeout=args.timeout
            )
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.send(heartbeat)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            print(response['id'])
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def query(self, args, from_date=None):

        response = self._alerts(args.filters, from_date)
        alerts = response['alerts']

        if args.output == "json":
            print(json.dumps(alerts, indent=4))
            sys.exit(0)

        for alert in reversed(alerts):

            a = AlertDocument.parse_alert(alert)

            line_color = ''
            end_color = _ENDC

            if args.color:
                line_color = _COLOR_MAP.get(a.severity, _COLOR_MAP['unknown'])

            print(line_color + '{0}|{1}|{2}|{3:5d}|{4}|{5:<5s}|{6:<10s}|{7:<18s}|{8:12s}|{9:16s}|{10:12s}'.format(
                a.id[0:8],
                a.get_date('last_receive_time', 'local', args.timezone),
                a.severity,
                a.duplicate_count,
                a.customer or "-",
                a.environment,
                ','.join(a.service),
                a.resource,
                a.group,
                a.event,
                a.value) + end_color)
            print(line_color + '   |{}'.format(a.text) + end_color)

            if args.details:
                print(line_color + '    severity   | {} -> {}'.format(a.previous_severity, a.severity) + end_color)
                print(line_color + '    trend      | {}'.format(a.trend_indication) + end_color)
                print(line_color + '    status     | {}'.format(a.status) + end_color)
                print(line_color + '    resource   | {}'.format(a.resource) + end_color)
                print(line_color + '    group      | {}'.format(a.group) + end_color)
                print(line_color + '    event      | {}'.format(a.event) + end_color)
                print(line_color + '    value      | {}'.format(a.value) + end_color)
                print(line_color + '    tags       | {}'.format(' '.join(a.tags)) + end_color)

                for key, value in a.attributes.items():
                    print(line_color + '    {} | {}'.format(key.ljust(10), value) + end_color)

                latency = a.receive_time - a.create_time

                print(line_color + '        time created  | {}'.format(a.get_date('create_time', 'iso', args.timezone)) + end_color)
                print(line_color + '        time received | {}'.format(a.get_date('receive_time', 'iso', args.timezone)) + end_color)
                print(line_color + '        last received | {}'.format(a.get_date('last_receive_time', 'iso', args.timezone)) + end_color)
                print(line_color + '        latency       | {}ms'.format((latency.microseconds / 1000)) + end_color)
                print(line_color + '        timeout       | {}s'.format(a.timeout) + end_color)

                print(line_color + '            alert id     | {}'.format(a.id) + end_color)
                print(line_color + '            last recv id | {}'.format(a.last_receive_id) + end_color)
                print(line_color + '            customer     | {}'.format(a.customer) + end_color)
                print(line_color + '            environment  | {}'.format(a.environment) + end_color)
                print(line_color + '            service      | {}'.format(','.join(a.service)) + end_color)
                print(line_color + '            resource     | {}'.format(a.resource) + end_color)
                print(line_color + '            type         | {}'.format(a.event_type) + end_color)
                print(line_color + '            repeat       | {}'.format(a.repeat) + end_color)
                print(line_color + '            origin       | {}'.format(a.origin) + end_color)
                print(line_color + '            correlate    | {}'.format(','.join(a.correlate)) + end_color)

        return response.get('lastTime', '')

    def watch(self, args):

        from_date = None
        while True:
            from_date = self.query(args, from_date)
            try:
                time.sleep(2)
            except (KeyboardInterrupt, SystemExit):
                sys.exit(0)

    def top(self, args):

        screen = Screen(endpoint=args.endpoint, key=args.key)

        try:
            screen.run()
        except RuntimeError as e:
            screen._reset()
            print(e)
            sys.exit(1)
        except (KeyboardInterrupt, SystemExit):
            screen.w.running = False
            screen._reset()
            print('Exiting...')
            sys.exit(0)

    def raw(self, args):

        response = self._alerts(args.filters)
        alerts = response['alerts']

        if args.output == "json":
            print(json.dumps(alerts, indent=4))
            sys.exit(0)

        for alert in reversed(alerts):
            line_color = ''
            end_color = _ENDC

            print(line_color + '%s' % alert['rawData'] + end_color)

    def history(self, args):

        response = self._history(args.filters)
        history = response['history']

        if args.output == "json":
            print(json.dumps(history, indent=4))
            sys.exit(0)

        for hist in history:

            line_color = ''
            end_color = _ENDC

            update_time = datetime.strptime(hist.get('updateTime', None), '%Y-%m-%dT%H:%M:%S.%fZ')

            if 'severity' in hist:
                if args.color:
                    line_color = _COLOR_MAP.get(hist['severity'], _COLOR_MAP['unknown'])
                print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (
                    hist['id'][0:8],
                    update_time.strftime('%Y/%m/%d %H:%M:%S'),
                    hist['severity'],
                    hist['customer'],
                    hist['environment'],
                    ','.join(hist['service']),
                    hist['resource'],
                    hist['group'],
                    hist['event'],
                    hist['value'],
                    hist['text']
                ) + end_color)

            if 'status' in hist:
                print(line_color + '%s|%s|%s|%s|%-5s|%-10s|%-18s|%s|%s|%s|%s' % (
                    hist['id'][0:8],
                    update_time.strftime('%Y/%m/%d %H:%M:%S'),
                    hist['status'],
                    hist['customer'],
                    hist['environment'],
                    ','.join(hist['service']),
                    hist['resource'],
                    hist['group'],
                    hist['event'],
                    'n/a',
                    hist['text']
                ) + end_color)

    def tag(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Tagging alerts: ")
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.tag_alert(alert['id'], args.tags)
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def untag(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Un-tagging alerts: ")
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.untag_alert(alert['id'], args.tags)
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def ack(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Acking alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.ack_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def unack(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("un-Acking alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.unack_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def close(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Closing alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.close_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def delete(self, args):

        sys.stdout.write("Counting alerts: ")
        response = self._counts(args.filters)
        total = response['total']
        sys.stdout.write("%s, done.\n" % total)

        sys.stdout.write("Deleting alerts: ")
        response = self._alerts(args.filters)
        alerts = response['alerts']
        for i, alert in enumerate(alerts):
            pct = int(100.0 * i / total)
            sys.stdout.write("%3d%% (%d/%d)" % (pct, i, total))
            sys.stdout.flush()
            sys.stdout.write("\b" * (8 + len(str(i)) + len(str(total))))
            try:
                self.api.delete_alert(alert['id'])
            except Exception as e:
                print()
                LOG.error(e)
                sys.exit(1)

        sys.stdout.write("100%% (%d/%d), done.\n" % (total, total))

    def status(self, args):

        response = self._status()
        metrics = response['metrics']

        print('{:<28} {:<8} {:<26} {:10} {}'.format('METRIC', 'TYPE', 'NAME', 'VALUE', 'AVG'))

        for metric in [m for m in metrics if m['type'] in ['gauge', 'counter', 'timer']]:
            if metric['type'] == 'gauge':
                print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value']))
            else:
                value = metric.get('count', 0)
                avg = int(metric['totalTime']) * 1.0 / int(metric['count'])
                print('{0:<28} {1:<8} {2:<26} {3:<10} {4:-3.2f} ms'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], value, avg))

        for metric in [m for m in metrics if m['type'] == 'text']:
            print('{0:<28} {1:<8} {2:<26} {3:<10}'.format(metric['title'], metric['type'], metric['group'] + '.' + metric['name'], metric['value']))

    def heartbeats(self, args):

        response = self._heartbeats()
        heartbeats = response['heartbeats']

        print('{:<28} {:<26} {:<19} {:>8} {:7} {}'.format('ORIGIN', 'TAGS', 'CREATED', 'LATENCY', 'TIMEOUT', 'SINCE'))

        for heartbeat in heartbeats:
            hb = HeartbeatDocument.parse_heartbeat(heartbeat)
            latency = (hb.receive_time - hb.create_time).microseconds / 1000
            since = datetime.utcnow() - hb.receive_time
            since = since - timedelta(microseconds=since.microseconds)

            latency_exceeded = latency > MAX_LATENCY
            timeout_exceeded = since.seconds > hb.timeout

            print('{:<28} {:<26} {} {}{:6}ms {:6}s {}{}'.format(
                hb.origin,
                ' '.join(hb.tags),
                hb.get_date('create_time', 'local', args.timezone),
                '*' if latency_exceeded else ' ', latency,
                hb.timeout,
                '*' if timeout_exceeded else ' ', since
            ))

            if args.alert:
                if timeout_exceeded:
                    alert = Alert(
                        resource=hb.origin,
                        event='HeartbeatFail',
                        correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'],
                        group='System',
                        environment='Production',
                        service=['Alerta'],
                        severity='major',
                        value='{}'.format(since),
                        text='Heartbeat not received in {} seconds'.format(hb.timeout),
                        tags=hb.tags,
                        type='heartbeatAlert'
                    )
                elif latency_exceeded:
                    alert = Alert(
                        resource=hb.origin,
                        event='HeartbeatSlow',
                        correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'],
                        group='System',
                        environment='Production',
                        service=['Alerta'],
                        severity='major',
                        value='{}ms'.format(latency),
                        text='Heartbeat took more than {}ms to be processed'.format(MAX_LATENCY),
                        tags=hb.tags,
                        type='heartbeatAlert'
                    )
                else:
                    alert = Alert(
                        resource=hb.origin,
                        event='HeartbeatOK',
                        correlate=['HeartbeatFail', 'HeartbeatSlow', 'HeartbeatOK'],
                        group='System',
                        environment='Production',
                        service=['Alerta'],
                        severity='normal',
                        value='',
                        text='Heartbeat OK',
                        tags=hb.tags,
                        type='heartbeatAlert'
                    )
                self.send(alert)

    def blackout(self, args):

        if '.' not in args.start:
            args.start = args.start.replace('Z', '.000Z')

        try:
            blackout = {
                "environment": args.environment,
                "resource": args.resource,
                "service": args.service,
                "event": args.event,
                "group": args.group,
                "tags": args.tags,
                "startTime": args.start,
                "duration": args.duration
            }
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.blackout_alerts(blackout)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            print(response['blackout'])
        else:
            LOG.error(response['message'])
            sys.exit(1)

    def blackouts(self, args):

        response = self.api.get_blackouts()
        blackouts = response['blackouts']

        print('{:<8} {:<16} {:<16} {:<16} {:<16} {:16} {:16} {:24} {:8} {:19} {}'.format('ID', 'CUSTOMER', 'ENVIRONMENT', 'SERVICE', 'RESOURCE', 'EVENT', 'GROUP', 'TAGS', 'STATUS', 'START', 'DURATION'))

        for blackout in blackouts:
            start_time = datetime.strptime(blackout['startTime'], '%Y-%m-%dT%H:%M:%S.%fZ')
            tz = pytz.timezone(args.timezone)

            if args.purge and blackout['status'] == 'expired':
                response = self.api.delete_blackout(blackout['id'])
                if response['status'] == 'ok':
                    blackout['status'] = 'deleted'
                else:
                    blackout['status'] = 'error'

            print('{:<8} {:<16} {:<16} {:16} {:16} {:16} {:16} {:24} {:8} {} {}s'.format(
                blackout['id'][:8],
                blackout.get('customer', '*'),
                blackout.get('environment', '*'),
                ','.join(blackout.get('service', '*')),
                blackout.get('resource', '*'),
                blackout.get('event', '*'),
                blackout.get('group', '*'),
                ' '.join(blackout.get('tags', '*')),
                blackout['status'],
                start_time.replace(tzinfo=pytz.UTC).astimezone(tz).strftime('%Y/%m/%d %H:%M:%S'),
                blackout['duration']
            ))

    @staticmethod
    def _build(filters, from_date=None, to_date=None):

        if filters:
            query = [tuple(x.split('=', 1)) for x in filters if '=' in x]
        else:
            query = list()

        if from_date:
            query.append(('from-date', from_date))

        if to_date:
            query.append(('to-date', to_date))

        if 'sort-by' not in query:
            query.append(('sort-by', 'lastReceiveTime'))

        return query

    def _alerts(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_alerts(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _counts(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_counts(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _history(self, filters, from_date=None, to_date=None):

        query = self._build(filters, from_date, to_date)

        try:
            response = self.api.get_history(query)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _heartbeats(self):

        try:
            response = self.api.get_heartbeats()
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == "error":
            LOG.error(response['message'])
            sys.exit(1)

        return response

    def _status(self):

        try:
            response = self.api.get_status()
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        return response

    def help(self, args):

        pass

    def uptime(self, args):

        response = self._status()

        now = datetime.fromtimestamp(int(response['time']) / 1000.0)
        d = datetime(1, 1, 1) + timedelta(seconds=int(response['uptime']) / 1000.0)

        print('{0} up {1} days {2:02d}:{3:02d}'.format(
            now.strftime('%H:%M'),
            d.day - 1, d.hour, d.minute
        ))

    def version(self, args):

        response = self._status()

        print('{0} {1}'.format(
            response['application'],
            response['version'],
        ))
        print('alerta client {0}'.format(__version__))
        print('requests {0}'.format(requests.__version__))