Example #1
0
    def on_message(self, headers, body):

        if 'type' not in headers or 'correlation-id' not in headers:
            LOG.warning(
                'Malformed header missing "type" or "correlation-id": %s',
                headers)
            self.statsd.metric_send('alerta.alerts.rejected', 1)
            return

        LOG.info("Received %s %s", headers['type'], headers['correlation-id'])
        LOG.debug("Received body : %s", body)

        if headers['type'] == 'Heartbeat':
            heartbeat = Heartbeat.parse_heartbeat(body)
            if heartbeat:
                heartbeat.receive_now()
                LOG.debug('Queueing successfully parsed heartbeat %s',
                          heartbeat.get_body())
                self.queue.put(heartbeat)
        else:
            try:
                alert = Alert.parse_alert(body)
            except ValueError:
                self.statsd.metric_send('alerta.alerts.rejected', 1)
                return
            if alert:
                alert.receive_now()
                LOG.debug('Queueing successfully parsed alert %s',
                          alert.get_body())
                self.queue.put(alert)
Example #2
0
    def run(self):

        self.running = True

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=GangliaMessage(self.mq))

        while not self.shuttingdown:
            try:
                rules = init_rules()  # re-read rule config each time
                self.metric_check(rules)

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

                LOG.debug('Waiting for next check run...')
                time.sleep(CONF.loop_every)
            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #3
0
    def run(self):

        self.running = True

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=DynectMessage(self.mq))

        while not self.shuttingdown:
            try:
                self.queryDynect()

                if self.updating:
                    self.alertDynect()
                    self.last_info = self.info

                    LOG.debug('Send heartbeat...')
                    heartbeat = Heartbeat(version=Version)
                    self.mq.send(heartbeat)

                LOG.debug('Waiting for next check run...')
                time.sleep(CONF.loop_every)
            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        self.running = False
Example #4
0
    def on_message(self, headers, body):

        if 'type' not in headers or 'correlation-id' not in headers:
            LOG.warning('Malformed header missing "type" or "correlation-id": %s', headers)
            self.statsd.metric_send('alerta.alerts.rejected', 1)
            return

        LOG.info("Received %s %s", headers['type'], headers['correlation-id'])
        LOG.debug("Received body : %s", body)

        if headers['type'] == 'Heartbeat':
            heartbeat = Heartbeat.parse_heartbeat(body)
            if heartbeat:
                heartbeat.receive_now()
                LOG.debug('Queueing successfully parsed heartbeat %s', heartbeat.get_body())
                self.queue.put(heartbeat)
        else:
            try:
                alert = Alert.parse_alert(body)
            except ValueError:
                self.statsd.metric_send('alerta.alerts.rejected', 1)
                return
            if alert:
                alert.receive_now()
                LOG.debug('Queueing successfully parsed alert %s', alert.get_body())
                self.queue.put(alert)
Example #5
0
    def run(self):

        self.running = True

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=LoggerMessage(self.mq))
        self.mq.subscribe(destination=CONF.outbound_queue)

        while not self.shuttingdown:
            try:
                LOG.debug('Waiting for log messages...')
                time.sleep(CONF.loop_every)

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #6
0
def create_heartbeat():

    # Create a new heartbeat
    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except Exception, e:
        return jsonify(response={"status": "error", "message": str(e)})
Example #7
0
def create_heartbeat():

    # Create a new heartbeat
    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except Exception, e:
        return jsonify(response={"status": "error", "message": str(e)})
Example #8
0
    def run(self):

        onhold = dict()

        # Start token bucket thread
        tokens = LeakyBucket(tokens=20, rate=30)
        tokens.start()

        mailer = MailerMessage(onhold, tokens)
        mailer.start()

        sender = MailSender(onhold, tokens)
        sender.start()

        self.api = ApiClient()

        try:
            while True:
                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(tags=[__version__])
                try:
                    self.api.send(heartbeat)
                except Exception, e:
                    LOG.warning('Failed to send heartbeat: %s', e)
                time.sleep(CONF.loop_every)
        except (KeyboardInterrupt, SystemExit):
            mailer.should_stop = True
Example #9
0
    def main(self):

        if CONF.heartbeat:
            vtag = ''.join(CONF.tags) if CONF.tags else None

            heartbeat = Heartbeat(
                origin=CONF.origin,
                version=vtag or Version,
                timeout=CONF.timeout
            )

            LOG.debug(repr(heartbeat))

            api = ApiClient()
            api.send(heartbeat)

            return heartbeat.get_id()

        else:
            exceptionAlert = Alert(
                resource=CONF.resource,
                event=CONF.event,
                correlate=CONF.correlate,
                group=CONF.group,
                value=CONF.value,
                status=CONF.status,
                severity=CONF.severity,
                environment=CONF.environment,
                service=CONF.service,
                text=CONF.text,
                event_type=CONF.event_type,
                tags=CONF.tags,
                origin=CONF.origin,
                threshold_info='n/a',   # TODO(nsatterl): make this configurable?
                summary=CONF.summary,
                timeout=CONF.timeout,
                raw_data='n/a',  # TODO(nsatterl): make this configurable?
                more_info=CONF.more_info,
                graph_urls=CONF.graph_urls,
            )

            LOG.debug(repr(exceptionAlert))

            api = ApiClient()
            api.send(exceptionAlert)

            return exceptionAlert.get_id()
Example #10
0
    def run(self):

        self.running = True

        # Start token bucket thread
        self.tokens = LeakyBucket(tokens=20, rate=30)
        self.tokens.start()

        self.onhold = dict()

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(
            callback=MailerMessage(self.mq, self.onhold, self.tokens))
        self.mq.subscribe(destination=CONF.outbound_topic)

        while not self.shuttingdown:
            try:
                LOG.debug('Send email messages...')
                for alertid in self.onhold.keys():
                    try:
                        (mailAlert, hold_time) = self.onhold[alertid]
                    except KeyError:
                        continue

                    if time.time() > hold_time:
                        if not self.tokens.get_token():
                            LOG.warning(
                                '%s : No tokens left, rate limiting this alert',
                                alertid)
                            continue

                        email = Mailer(mailAlert)
                        mail_to = CONF.mail_list.split(',')

                        if 'mailto' in mailAlert.tags:
                            mail_to.append(mailAlert.tags['mailto'])
                        email.send(mail_to=mail_to)
                        try:
                            del self.onhold[alertid]
                        except KeyError:
                            continue

                time.sleep(CONF.loop_every)

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False
        self.tokens.shutdown()

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #11
0
    def main(self):

        if CONF.heartbeat:
            heartbeat = Heartbeat(origin=CONF.origin,
                                  version=CONF.tags.get('Version', Version),
                                  timeout=CONF.timeout)

            LOG.debug(heartbeat)

            api = ApiClient()
            api.send(heartbeat)

            return heartbeat.get_id()

        else:
            exceptionAlert = Alert(
                resource=CONF.resource,
                event=CONF.event,
                correlate=CONF.correlate,
                group=CONF.group,
                value=CONF.value,
                status=CONF.status,
                severity=CONF.severity,
                environment=CONF.environment,
                service=CONF.service,
                text=CONF.text,
                event_type=CONF.event_type,
                tags=CONF.tags,
                origin=CONF.origin,
                threshold_info='n/a',  # TODO(nsatterl): make this configurable?
                summary=CONF.summary,
                timeout=CONF.timeout,
                raw_data='n/a',  # TODO(nsatterl): make this configurable?
                more_info=CONF.more_info,
                graph_urls=CONF.graph_urls,
            )

            LOG.debug(repr(exceptionAlert))

            api = ApiClient()
            api.send(exceptionAlert)

            return exceptionAlert.get_id()
Example #12
0
    def run(self):

        self.running = True

        # Initialiase alert config
        init_config()

        # Start token bucket thread
        _TokenThread = TokenTopUp()
        _TokenThread.start()

        # Start notify thread
        _NotifyThread = ReleaseThread()
        _NotifyThread.start()

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=NotifyMessage(self.mq))
        self.mq.subscribe(destination=CONF.outbound_topic)

        while not self.shuttingdown:
            try:
                # Read (or re-read) config as necessary
                if os.path.getmtime(CONF.yaml_config) != config_mod_time:
                    init_config()
                    config_mod_time = os.path.getmtime(CONF.yaml_config)

                LOG.debug('Waiting for email messages...')
                time.sleep(CONF.loop_every)

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        _TokenThread.shutdown()
        _NotifyThread.shutdown()

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #13
0
    def run(self):

        pd = PagerDutyMessage()
        pd.start()

        self.api = ApiClient()

        try:
            while True:
                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(tags=[__version__])
                try:
                    self.api.send(heartbeat)
                except Exception, e:
                    LOG.warning('Failed to send heartbeat: %s', e)
                time.sleep(CONF.loop_every)
        except (KeyboardInterrupt, SystemExit):
            pd.should_stop = True
Example #14
0
    def run(self):

        self.statsd = StatsD()  # graphite metrics

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        self.api = ApiClient()

        if snmptrapAlert:
            self.api.send(snmptrapAlert)
            self.statsd.metric_send('alert.snmptrap.alerts.total', 1)

        LOG.debug('Send heartbeat...')
        heartbeat = Heartbeat(version=Version)
        self.api.send(heartbeat)
Example #15
0
File: daemon.py Project: ob3/alerta
    def on_message(self, headers, body):

        LOG.info("Received %s %s", headers['type'], headers['correlation-id'])
        LOG.debug("Received body : %s", body)

        if headers['type'] == 'Heartbeat':
            heartbeat = Heartbeat.parse_heartbeat(body)
            if heartbeat:
                heartbeat.receive_now()
                LOG.debug('Queueing successfully parsed heartbeat %s', heartbeat.get_body())
                self.queue.put(heartbeat)
        elif headers['type'].endswith('Alert'):
            try:
                alert = Alert.parse_alert(body)
            except ValueError:
                self.statsd.metric_send('alerta.alerts.rejected', 1)
                return
            if alert:
                alert.receive_now()
                LOG.debug('Queueing successfully parsed alert %s', alert.get_body())
                self.queue.put(alert)
Example #16
0
    def parse_snmptrap(data):

        pdu_data = data.splitlines()
        varbind_list = pdu_data[:]

        trapvars = dict()
        for line in pdu_data:
            if line.startswith('$'):
                special, value = line.split(None, 1)
                trapvars[special] = value
                varbind_list.pop(0)

        if '$s' in trapvars:
            if trapvars['$s'] == '0':
                version = 'SNMPv1'
            elif trapvars['$s'] == '1':
                version = 'SNMPv2c'
            elif trapvars['$s'] == '2':
                version = 'SNMPv2u'  # not supported
            else:
                version = 'SNMPv3'
            trapvars['$s'] = version

        # Get varbinds
        varbinds = dict()
        idx = 0
        for varbind in '\n'.join(varbind_list).split('~%~'):
            if varbind == '':
                break
            idx += 1
            try:
                oid, value = varbind.split(None, 1)
            except ValueError:
                oid = varbind
                value = ''
            varbinds[oid] = value
            trapvars['$' + str(idx)] = value  # $n
            LOG.debug('$%s %s', str(idx), value)

        trapvars['$q'] = trapvars['$q'].lstrip('.')  # if numeric, remove leading '.'
        trapvars['$#'] = str(idx)

        LOG.debug('varbinds = %s', varbinds)

        LOG.debug('version = %s', version)

        correlate = list()

        if version == 'SNMPv1':
            if trapvars['$w'] == '0':
                trapvars['$O'] = 'coldStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '1':
                trapvars['$O'] = 'warmStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '2':
                trapvars['$O'] = 'linkDown'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '3':
                trapvars['$O'] = 'linkUp'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '4':
                trapvars['$O'] = 'authenticationFailure'
            elif trapvars['$w'] == '5':
                trapvars['$O'] = 'egpNeighborLoss'
            elif trapvars['$w'] == '6':  # enterpriseSpecific(6)
                if trapvars['$q'].isdigit():  # XXX - specific trap number was not decoded
                    trapvars['$O'] = '%s.0.%s' % (trapvars['$N'], trapvars['$q'])
                else:
                    trapvars['$O'] = trapvars['$q']

        elif version == 'SNMPv2c':
            if 'coldStart' in trapvars['$2']:
                trapvars['$w'] = '0'
                trapvars['$W'] = 'Cold Start'
            elif 'warmStart' in trapvars['$2']:
                trapvars['$w'] = '1'
                trapvars['$W'] = 'Warm Start'
            elif 'linkDown' in trapvars['$2']:
                trapvars['$w'] = '2'
                trapvars['$W'] = 'Link Down'
            elif 'linkUp' in trapvars['$2']:
                trapvars['$w'] = '3'
                trapvars['$W'] = 'Link Up'
            elif 'authenticationFailure' in trapvars['$2']:
                trapvars['$w'] = '4'
                trapvars['$W'] = 'Authentication Failure'
            elif 'egpNeighborLoss' in trapvars['$2']:
                trapvars['$w'] = '5'
                trapvars['$W'] = 'EGP Neighbor Loss'
            else:
                trapvars['$w'] = '6'
                trapvars['$W'] = 'Enterprise Specific'
            trapvars['$O'] = trapvars['$2']  # SNMPv2-MIB::snmpTrapOID.0

        LOG.debug('trapvars = %s', trapvars)

        LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'], trapvars['$B'], trapvars['$x'], trapvars['$X'])

        if trapvars['$B'] != '<UNKNOWN>':
            resource = trapvars['$B']
        elif trapvars['$A'] != '0.0.0.0':
            resource = trapvars['$A']
        else:
            m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b'])
            if m:
                resource = m.group(1)
            else:
                resource = '<NONE>'

        # Defaults
        event = trapvars['$O']
        severity = severity_code.NORMAL
        group = 'SNMP'
        value = trapvars['$w']
        text = trapvars['$W']
        environment = ['INFRA']
        service = ['Network']
        tags = {'Version': version}
        timeout = None
        threshold_info = None
        summary = None
        create_time = datetime.datetime.strptime('%sT%s.000Z' % (trapvars['$x'], trapvars['$X']), '%Y-%m-%dT%H:%M:%S.%fZ')

        snmptrapAlert = Alert(
            resource=resource,
            event=event,
            correlate=correlate,
            group=group,
            value=value,
            severity=severity,
            environment=environment,
            service=service,
            text=text,
            event_type='snmptrapAlert',
            tags=tags,
            timeout=timeout,
            threshold_info=threshold_info,
            summary=summary,
            create_time=create_time,
            raw_data=data,
        )

        suppress = snmptrapAlert.transform_alert(trapoid=trapvars['$O'], trapvars=trapvars, varbinds=varbinds)
        if suppress:
            LOG.info('Suppressing %s SNMP trap', snmptrapAlert.event)
            LOG.debug('%s', snmptrapAlert)
            return

        snmptrapAlert.translate_alert(trapvars)

        if snmptrapAlert.get_type() == 'Heartbeat':
            snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin, version='n/a', timeout=snmptrapAlert.timeout)

        return snmptrapAlert
Example #17
0
File: views.py Project: ob3/alerta
    return jsonify(application="alerta", time=int(time.time() * 1000), heartbeats=heartbeats)


@app.route('/alerta/api/v2/heartbeats/heartbeat.json', methods=['POST'])
@jsonp
def create_heartbeat():

    # Create a new heartbeat
    try:
        data = json.loads(request.data)
    except Exception, e:
        return jsonify(response={"status": "error", "message": str(e)})

    heartbeat = Heartbeat(
        origin=data.get('origin', None),
        version=data.get('version', None),
        heartbeatid=data.get('id', None),
        timeout=data.get('timeout', None),
    )
    LOG.debug('New heartbeat %s', heartbeat)
    mq.send(heartbeat)

    if heartbeat:
        return jsonify(response={"status": "ok", "id": heartbeat.get_id()})
    else:
        return jsonify(response={"status": "error", "message": "something went wrong"})


@app.route('/alerta/widgets/v2/severity')
def severity_widget():

    label = request.args.get('label', 'Alert Severity')
Example #18
0
    def main(self):

        if CONF.heartbeat:
            msg = Heartbeat(version=Version)
        else:
            # Run Nagios plugin check
            args = shlex.split(
                os.path.join(CONF.nagios_plugins, CONF.nagios_cmd))
            LOG.info('Running %s', ' '.join(args))
            try:
                check = subprocess.Popen(args, stdout=subprocess.PIPE)
            except Exception, e:
                LOG.error('Nagios check did not execute: %s', e)
                sys.exit(1)

            stdout = check.communicate()[0]
            rc = check.returncode
            LOG.debug('Nagios plugin %s => %s (rc=%d)', CONF.nagios_cmd,
                      stdout, rc)

            if rc == 0:
                severity = severity_code.NORMAL
            elif rc == 1:
                severity = severity_code.WARNING
            elif rc == 2:
                severity = severity_code.CRITICAL
            elif rc == 3:
                severity = severity_code.UNKNOWN
            else:
                rc = -1
                severity = severity_code.INDETERMINATE

            # Parse Nagios plugin check output
            text = ''
            long_text = ''
            perf_data = ''
            extra_perf_data = False

            for num, line in enumerate(stdout.split('\n'), start=1):
                if num == 1:
                    if '|' in line:
                        text = line.split('|')[0].rstrip(' ')
                        perf_data = line.split('|')[1]
                        value = perf_data.split(';')[0].lstrip(' ')
                    else:
                        text = line
                        value = 'rc=%s' % rc
                else:
                    if '|' in line:
                        long_text += line.split('|')[0]
                        perf_data += line.split('|')[1]
                        extra_perf_data = True
                    elif extra_perf_data is False:
                        long_text += line
                    else:
                        perf_data += line

            LOG.debug('Short Output: %s', text)
            LOG.debug('Long Output: %s', long_text)
            LOG.debug('Perf Data: %s', perf_data)

            graph_urls = None

            msg = Alert(
                resource=CONF.resource,
                event=CONF.event,
                correlate=CONF.correlate,
                group=CONF.group,
                value=value,
                severity=severity,
                environment=CONF.environment,
                service=CONF.service,
                text=text + ' ' + long_text,
                event_type='nagiosAlert',
                tags=CONF.tags,
                threshold_info=CONF.nagios_cmd,
                timeout=CONF.timeout,
                raw_data=stdout,
                more_info=perf_data,
                graph_urls=graph_urls,
            )
Example #19
0
class PingerDaemon(Daemon):

    pinger_opts = {
        'ping_file': '/etc/alerta/alert-pinger.targets',
        'ping_max_timeout': 15,  # seconds
        'ping_max_retries': 2,
        'ping_slow_warning': 5,  # ms
        'ping_slow_critical': 10,  # ms
        'server_threads': 20,
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(PingerDaemon.pinger_opts)

        Daemon.__init__(self, prog, kwargs)

    def run(self):

        self.running = True

        # Create internal queue
        self.queue = Queue.Queue()

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=PingerMessage(self.mq))

        self.dedup = DeDup()

        self.carbon = Carbon()  # graphite metrics

        # Initialiase ping targets
        ping_list = init_targets()

        # Start worker threads
        LOG.debug('Starting %s worker threads...', CONF.server_threads)
        for i in range(CONF.server_threads):
            w = WorkerThread(self.mq, self.queue, self.dedup, self.carbon)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                for p in ping_list:
                    if 'targets' in p and p['targets']:
                        for target in p['targets']:
                            environment = p['environment']
                            service = p['service']
                            retries = p.get('retries', CONF.ping_max_retries)
                            self.queue.put(
                                (environment, service, target, retries,
                                 time.time()))

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

                time.sleep(CONF.loop_every)
                LOG.info('Ping queue length is %d', self.queue.qsize())
                self.carbon.metric_send('alert.pinger.queueLength',
                                        self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        for i in range(CONF.server_threads):
            self.queue.put(None)
        w.join()

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #20
0
class AwsDaemon(Daemon):

    aws_opts = {
        'fog_file': '/etc/fog/alerta.conf',
        'ec2_regions': ['eu-west-1', 'us-east-1'],
        'http_proxy': None,
        'https_proxy': None,
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(AwsDaemon.aws_opts)

        Daemon.__init__(self, prog, kwargs)

        self.info = {}
        self.last = {}
        self.lookup = {}
        self.dedup = DeDup()

    def run(self):

        self.running = True

        # Read in FOG config file
        try:
            self.fog = yaml.load(open(CONF.fog_file).read())
        except IOError, e:
            LOG.error('Could not read AWS credentials file %s: %s',
                      CONF.fog_file, e)
            sys.exit(1)

        if not self.fog:
            LOG.error('No AWS credentials found in FOG file %s. Exiting...',
                      CONF.fog_file)
            sys.exit(1)

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=AwsMessage(self.mq))

        if CONF.http_proxy:
            os.environ['http_proxy'] = CONF.http_proxy
        if CONF.https_proxy:
            os.environ['https_proxy'] = CONF.https_proxy

        while not self.shuttingdown:
            try:
                self.ec2_status_check()

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

                LOG.debug('Waiting for next check run...')
                time.sleep(CONF.loop_every)
            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #21
0
class AlertaDaemon(Daemon):

    alerta_opts = {
        'forward_duplicate': 'no',
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(AlertaDaemon.alerta_opts)

        Daemon.__init__(self, prog, kwargs)

    def run(self):

        self.running = True

        self.queue = Queue.Queue()  # Create internal queue
        self.db = Mongo()  # mongo database
        self.carbon = Carbon()  # carbon metrics
        self.statsd = StatsD()  # graphite metrics

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(
            callback=ServerMessage(self.mq, self.queue, self.statsd))
        self.mq.subscribe()

        # Start worker threads
        LOG.debug('Starting %s worker threads...', CONF.server_threads)
        for i in range(CONF.server_threads):
            w = WorkerThread(self.mq, self.queue, self.statsd)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version, timeout=CONF.loop_every)
                self.mq.send(heartbeat)

                time.sleep(CONF.loop_every)
                LOG.info('Alert processing queue length is %d',
                         self.queue.qsize())
                self.carbon.metric_send('alerta.alerts.queueLength',
                                        self.queue.qsize())
                self.db.update_queue_metric(self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        for i in range(CONF.server_threads):
            self.queue.put(None)
        w.join()

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #22
0
class IrcbotDaemon(Daemon):

    ircbot_opts = {
        'irc_host': 'localhost',
        'irc_port': 6667,
        'irc_channel': '#alerts',
        'irc_user': '******',
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(IrcbotDaemon.ircbot_opts)

        Daemon.__init__(self, prog, kwargs)

    def run(self):

        self.running = True

        # An IRC client may send 1 message every 2 seconds
        # See section 5.8 in http://datatracker.ietf.org/doc/rfc2813/
        tokens = LeakyBucket(tokens=20, rate=2)
        tokens.start()

        # Connect to IRC server
        try:
            irc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            irc.connect((CONF.irc_host, CONF.irc_port))
            time.sleep(1)
            irc.send('NICK %s\r\n' % CONF.irc_user)
            time.sleep(1)
            irc.send('USER %s 8 * : %s\r\n' % (CONF.irc_user, CONF.irc_user))
            LOG.debug('USER -> %s', irc.recv(4096))
            time.sleep(1)
            irc.send('JOIN %s\r\n' % CONF.irc_channel)
            LOG.debug('JOIN ->  %s', irc.recv(4096))
        except Exception, e:
            LOG.error('IRC connection error: %s', e)
            sys.exit(1)

        LOG.info('Joined IRC channel %s on %s as USER %s', CONF.irc_channel, CONF.irc_host, CONF.irc_user)

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=IrcbotMessage(self.mq, irc, tokens))
        self.mq.subscribe(destination=CONF.outbound_topic)

        while not self.shuttingdown:
            try:
                LOG.debug('Waiting for IRC messages...')
                ip, op, rdy = select.select([irc], [], [], CONF.loop_every)
                if ip:
                    for i in ip:
                        if i == irc:
                            data = irc.recv(4096).rstrip('\r\n')
                            if len(data) > 0:
                                if 'ERROR' in data:
                                    LOG.error('%s. Exiting...', data)
                                    sys.exit(1)
                                else:
                                    LOG.debug('%s', data)
                            else:
                                LOG.warning('IRC server sent no data')
                            if 'PING' in data:
                                LOG.info('IRC PING received -> PONG ' + data.split()[1])
                                irc.send('PONG ' + data.split()[1] + '\r\n')
                            elif 'ack' in data.lower():
                                LOG.info('Request to ACK %s by %s', data.split()[4], data.split()[0])
                                ack_alert(data.split()[4])
                            elif 'delete' in data.lower():
                                LOG.info('Request to DELETE %s by %s', data.split()[4], data.split()[0])
                                delete_alert(data.split()[4])
                            elif data.find('!alerta quit') != -1:
                                irc.send('QUIT\r\n')
                            else:
                                LOG.warning('IRC: %s', data)
                        else:
                            i.recv()
                else:
                    LOG.debug('Send heartbeat...')
                    heartbeat = Heartbeat(version=Version)
                    self.mq.send(heartbeat)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False
        tokens.shutdown()

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #23
0
    def parse_events(self, data):

        LOG.debug('Parsing solarwinds event data...')
        LOG.debug(data)

        try:
            data[0]
        except IndexError:
            return []

        try:
            data[0].c0
        except AttributeError:
            data = [data]

        solarwindsAlerts = list()

        for row in data:
            LOG.debug(row)

            event = row.c4.replace(" ", "")
            correlate = SOLAR_WINDS_CORRELATED_EVENTS.get(event, None)
            resource = '%s:%s' % (row.c2, row.c3.lower())
            severity = SOLAR_WINDS_SEVERITY_LEVELS.get(row.c7, None)
            group = 'Orion'
            value = '%s' % row.c6
            text = '%s' % row.c5
            environment = ['INFRA']
            service = ['Network']
            tags = None
            timeout = None
            threshold_info = None
            summary = None
            raw_data = repr(row)
            create_time = datetime.datetime.strptime(row.c1[:-5] + 'Z',
                                                     '%Y-%m-%dT%H:%M:%S.%fZ')

            solarwindsAlert = Alert(
                resource=resource,
                event=event,
                correlate=correlate,
                group=group,
                value=value,
                severity=severity,
                environment=environment,
                service=service,
                text=text,
                event_type='solarwindsAlert',
                tags=tags,
                threshold_info=threshold_info,
                summary=summary,
                timeout=timeout,
                create_time=create_time,
                raw_data=raw_data,
            )

            suppress = solarwindsAlert.transform_alert()
            if suppress:
                LOG.info('Suppressing %s alert', solarwindsAlert.event)
                LOG.debug('%s', solarwindsAlert)
                continue

            if solarwindsAlert.get_type() == 'Heartbeat':
                solarwindsAlert = Heartbeat(origin=solarwindsAlert.origin,
                                            version='n/a',
                                            timeout=solarwindsAlert.timeout)

            solarwindsAlerts.append(solarwindsAlert)

        return solarwindsAlerts
Example #24
0
def create_heartbeat():

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError, e:
        return jsonify(status="error", message=str(e))
Example #25
0
class SolarWindsDaemon(Daemon):

    solarwinds_opts = {
        'solarwinds_host': 'localhost',
        'solarwinds_username': '******',
        'solarwinds_password': '',
        'solarwinds_group': 'websys',
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(SolarWindsDaemon.solarwinds_opts)

        Daemon.__init__(self, prog, kwargs)

    def run(self):

        self.running = True

        while True:
            try:
                swis = SwisClient(username=CONF.solarwinds_username,
                                  password=CONF.solarwinds_password)
            except Exception, e:
                LOG.error('SolarWinds SWIS Client error: %s', e)
                time.sleep(30)
            else:
                break
        LOG.info('Polling for SolarWinds events on %s' % CONF.solarwinds_host)

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=SolarWindsMessage(self.mq))

        self.dedup = DeDup(by_value=True)

        while not self.shuttingdown:
            try:
                LOG.debug('Polling SolarWinds...')
                send_heartbeat = True

                # network, interface and volume events
                try:
                    events = swis.get_npm_events()
                except IOError:
                    events = []
                    send_heartbeat = False

                solarwindsAlerts = self.parse_events(events)
                for solarwindsAlert in solarwindsAlerts:
                    if self.dedup.is_send(solarwindsAlert):
                        self.mq.send(solarwindsAlert)

                # Cisco UCS events
                try:
                    events = swis.get_ucs_events()
                except IOError:
                    events = []
                    send_heartbeat = False

                solarwindsAlerts = self.parse_events(events)
                for solarwindsAlert in solarwindsAlerts:
                    if self.dedup.is_send(solarwindsAlert):
                        self.mq.send(solarwindsAlert)

                if send_heartbeat:
                    LOG.debug('Send heartbeat...')
                    heartbeat = Heartbeat(version=Version)
                    self.mq.send(heartbeat)
                else:
                    LOG.error('SolarWinds failure. Skipping heartbeat.')

                time.sleep(CONF.loop_every)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #26
0
class UrlmonDaemon(Daemon):

    urlmon_opts = {
        'urlmon_file': '/etc/alerta/alert-urlmon.targets',
        'urlmon_max_timeout': 15,  # seconds
        'urlmon_slow_warning': 2000,  # ms
        'urlmon_slow_critical': 5000,  # ms
    }

    def __init__(self, prog, **kwargs):

        config.register_opts(UrlmonDaemon.urlmon_opts)

        Daemon.__init__(self, prog, kwargs)

    def run(self):

        self.running = True

        # Create internal queue
        self.queue = Queue.Queue()

        # Connect to message queue
        self.mq = Messaging()
        self.mq.connect(callback=UrlmonMessage(self.mq))

        self.dedup = DeDup()

        self.carbon = Carbon()  # graphite metrics

        # Initialiase alert rules
        urls = init_urls()

        # Start worker threads
        LOG.debug('Starting %s worker threads...', CONF.server_threads)
        for i in range(CONF.server_threads):
            w = WorkerThread(self.mq, self.queue, self.dedup, self.carbon)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                for url in urls:
                    self.queue.put((url, time.time()))

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

                time.sleep(CONF.loop_every)
                LOG.info('URL check queue length is %d', self.queue.qsize())
                self.carbon.metric_send('alert.urlmon.queueLength',
                                        self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        for i in range(CONF.server_threads):
            self.queue.put(None)
        w.join()

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()
Example #27
0
                LOG.info('Waiting for CloudWatch alarms...')
                try:
                    m = q.read(wait_time_seconds=20)
                except boto.exception.SQSError, e:
                    LOG.warning('Could not read from queue: %s', e)
                    time.sleep(20)

                if m:
                    message = m.get_body()
                    cloudwatchAlert = self.parse_notification(message)
                    if self.dedup.is_send(cloudwatchAlert):
                        self.mq.send(cloudwatchAlert)
                    q.delete_message(m)

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(version=Version)
                self.mq.send(heartbeat)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')
        self.running = False

        LOG.info('Disconnecting from message broker...')
        self.mq.disconnect()

    def parse_notification(self, message):

        LOG.debug('Parsing CloudWatch notification message...')