Ejemplo n.º 1
0
    def run(self):

        while True:
            LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE)
            try:
                notification = self.sqs.read(wait_time_seconds=20)
            except boto.exception.SQSError as e:
                LOG.warning('Could not read from queue: %s', e)
                time.sleep(20)
                continue

            if notification:
                cloudwatchAlert = self.parse_notification(notification)
                try:
                    self.api.send(cloudwatchAlert)
                except Exception as e:
                    LOG.warning('Failed to send alert: %s', e)
                self.sqs.delete_message(notification)

            LOG.debug('Send heartbeat...')
            heartbeat = Heartbeat(tags=[__version__])
            try:
                self.api.send(heartbeat)
            except Exception as e:
                LOG.warning('Failed to send heartbeat: %s', e)
Ejemplo n.º 2
0
    def run(self):

        api = ApiClient(endpoint=OPTIONS['endpoint'], key=OPTIONS['key'])
        keep_alive = 0

        while not self.should_stop:
            for alertid in on_hold.keys():
                try:
                    (alert, hold_time) = on_hold[alertid]
                except KeyError:
                    continue
                if time.time() > hold_time:
                    self.send_email(alert)
                    try:
                        del on_hold[alertid]
                    except KeyError:
                        continue

            if keep_alive >= 10:
                tag = OPTIONS['smtp_host'] or 'alerta-mailer'
                try:
                    api.send(Heartbeat(tags=[tag]))
                except Exception as e:
                    time.sleep(5)
                    continue
                keep_alive = 0
            keep_alive += 1
            time.sleep(2)
Ejemplo n.º 3
0
class SnmpTrapHandler(object):
    def __init__(self):

        self.api = None

    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)

        LOG.debug('Send heartbeat...')
        heartbeat = Heartbeat(tags=[__version__])
        try:
            self.api.send(heartbeat)
        except Exception, e:
            LOG.warning('Failed to send heartbeat: %s', e)
Ejemplo n.º 4
0
def createheartbeat():
    hb = Heartbeat(origin=origin, timeout=timeout)
    for i in range(max_retries):
        try:
            print(api.send(hb))
        except Exception as e:
            print("HTTP Error: {}".format(e))
            time.sleep(sleep)
            continue
        else:
            break
    else:
        print("api is down")
Ejemplo n.º 5
0
class PingerDaemon(object):

    def __init__(self):

        self.shuttingdown = False

    def run(self):

        self.running = True

        # Create internal queue
        self.queue = Queue.Queue()

        self.api = ApiClient()

        # Initialiase ping targets
        ping_list = init_targets()

        # Start worker threads
        LOG.debug('Starting %s worker threads...', SERVER_THREAD_COUNT)
        for i in range(SERVER_THREAD_COUNT):
            w = WorkerThread(self.api, self.queue)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                for p in ping_list:
                    if 'targets' in p and p['targets']:
                        for target in p['targets']:
                            environment = p['environment']
                            service = p['service']
                            retries = p.get('retries', PING_MAX_RETRIES)
                            self.queue.put((environment, service, target, retries, time.time()))

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(tags=[__version__])
                try:
                    self.api.send(heartbeat)
                except Exception, e:
                    LOG.warning('Failed to send heartbeat: %s', e)

                time.sleep(LOOP_EVERY)
                LOG.info('Ping queue length is %d', self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True
Ejemplo n.º 6
0
def main():

    api = ApiClient()
    listener = Listener()

    while True:
        listener.send_cmd('READY\n')
        headers, body = listener.wait()

        event = headers['eventname']
        if event.startswith('TICK'):
            supervisorAlert = Heartbeat(origin='supervisord',
                                        tags=[headers['ver'], event])
        else:
            if event.endswith('FATAL'):
                severity = 'critical'
            elif event.endswith('BACKOFF'):
                severity = 'warning'
            elif event.endswith('EXITED'):
                severity = 'minor'
            else:
                severity = 'normal'

            supervisorAlert = Alert(
                resource='%s:%s' % (platform.uname()[1], body['processname']),
                environment='Production',
                service=['supervisord'],
                event=event,
                correlate=[
                    'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING',
                    'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING',
                    'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED',
                    'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN'
                ],
                value='serial=%s' % headers['serial'],
                severity=severity,
                origin=headers['server'],
                text='State changed from %s to %s.' %
                (body['from_state'], event),
                raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body)))
        try:
            api.send(supervisorAlert)
        except Exception as e:
            listener.log_stderr(e)
            listener.send_cmd('RESULT 4\nFAIL')
        else:
            listener.send_cmd('RESULT 2\nOK')
Ejemplo n.º 7
0
class UrlmonDaemon(object):
    def __init__(self):

        self.shuttingdown = False

    def run(self):

        self.running = True

        self.queue = Queue.Queue()
        self.api = self.api = ApiClient(endpoint=settings.ENDPOINT,
                                        key=settings.API_KEY)

        # Start worker threads
        LOG.debug('Starting %s worker threads...', SERVER_THREADS)
        for i in range(SERVER_THREADS):
            w = WorkerThread(self.queue, self.api)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                for check in settings.checks:
                    self.queue.put((check, time.time()))

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(tags=[__version__])
                try:
                    self.api.send(heartbeat)
                except Exception, e:
                    LOG.warning('Failed to send heartbeat: %s', e)

                time.sleep(LOOP_EVERY)
                LOG.info('URL check queue length is %d', self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True
Ejemplo n.º 8
0
    def run(self):

        count = 0
        while not self.shuttingdown:
            try:
                LOG.debug('Waiting for syslog messages...')
                ip, op, rdy = select.select([self.udp, self.tcp], [], [],
                                            LOOP_EVERY)
                if ip:
                    for i in ip:
                        if i == self.udp:
                            data, addr = self.udp.recvfrom(4096)
                            data = unicode(data, 'utf-8', errors='ignore')
                            LOG.debug('Syslog UDP data received from %s: %s',
                                      addr, data)
                        if i == self.tcp:
                            client, addr = self.tcp.accept()
                            data = client.recv(4096)
                            data = unicode(data, 'utf-8', errors='ignore')
                            client.close()
                            LOG.debug('Syslog TCP data received from %s: %s',
                                      addr, data)

                        alerts = self.parse_syslog(ip=addr[0], data=data)
                        for alert in alerts:
                            try:
                                self.api.send(alert)
                            except Exception, e:
                                LOG.warning('Failed to send alert: %s', e)

                    count += 1
                if not ip or count % 5 == 0:
                    LOG.debug('Send heartbeat...')
                    heartbeat = Heartbeat(tags=[__version__])
                    try:
                        self.api.send(heartbeat)
                    except Exception, e:
                        LOG.warning('Failed to send heartbeat: %s', e)
Ejemplo n.º 9
0
    def parse_snmptrap(data):

        pdu_data = data.splitlines()
        varbind_list = pdu_data[:]

        trapvars = dict()
        for line in pdu_data:
            if line.startswith('$'):
                special, value = line.split(None, 1)
                trapvars[special] = value
                varbind_list.pop(0)

        if '$s' in trapvars:
            if trapvars['$s'] == '0':
                version = 'SNMPv1'
            elif trapvars['$s'] == '1':
                version = 'SNMPv2c'
            elif trapvars['$s'] == '2':
                version = 'SNMPv2u'  # not supported
            else:
                version = 'SNMPv3'
            trapvars['$s'] = version
        else:
            LOG.warning('Failed to parse unknown trap type.')
            return

        # Get varbinds
        varbinds = dict()
        idx = 0
        for varbind in '\n'.join(varbind_list).split('~%~'):
            if varbind == '':
                break
            idx += 1
            try:
                oid, value = varbind.split(None, 1)
            except ValueError:
                oid = varbind
                value = ''
            varbinds[oid] = value
            trapvars['$' + str(idx)] = value  # $n
            LOG.debug('$%s %s', str(idx), value)

        trapvars['$q'] = trapvars['$q'].lstrip(
            '.')  # if numeric, remove leading '.'
        trapvars['$#'] = str(idx)

        LOG.debug('varbinds = %s', varbinds)

        LOG.debug('version = %s', version)

        correlate = list()

        if version == 'SNMPv1':
            if trapvars['$w'] == '0':
                trapvars['$O'] = 'coldStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '1':
                trapvars['$O'] = 'warmStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '2':
                trapvars['$O'] = 'linkDown'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '3':
                trapvars['$O'] = 'linkUp'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '4':
                trapvars['$O'] = 'authenticationFailure'
            elif trapvars['$w'] == '5':
                trapvars['$O'] = 'egpNeighborLoss'
            elif trapvars['$w'] == '6':  # enterpriseSpecific(6)
                if trapvars['$q'].isdigit(
                ):  # XXX - specific trap number was not decoded
                    trapvars['$O'] = '%s.0.%s' % (trapvars['$N'],
                                                  trapvars['$q'])
                else:
                    trapvars['$O'] = trapvars['$q']

        elif version == 'SNMPv2c':
            if 'coldStart' in trapvars['$2']:
                trapvars['$w'] = '0'
                trapvars['$W'] = 'Cold Start'
            elif 'warmStart' in trapvars['$2']:
                trapvars['$w'] = '1'
                trapvars['$W'] = 'Warm Start'
            elif 'linkDown' in trapvars['$2']:
                trapvars['$w'] = '2'
                trapvars['$W'] = 'Link Down'
            elif 'linkUp' in trapvars['$2']:
                trapvars['$w'] = '3'
                trapvars['$W'] = 'Link Up'
            elif 'authenticationFailure' in trapvars['$2']:
                trapvars['$w'] = '4'
                trapvars['$W'] = 'Authentication Failure'
            elif 'egpNeighborLoss' in trapvars['$2']:
                trapvars['$w'] = '5'
                trapvars['$W'] = 'EGP Neighbor Loss'
            else:
                trapvars['$w'] = '6'
                trapvars['$W'] = 'Enterprise Specific'
            trapvars['$O'] = trapvars['$2']  # SNMPv2-MIB::snmpTrapOID.0

        LOG.debug('trapvars = %s', trapvars)

        LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'],
                 trapvars['$B'], trapvars['$x'], trapvars['$X'])

        if trapvars['$B'] != '<UNKNOWN>':
            resource = trapvars['$B']
        elif trapvars['$A'] != '0.0.0.0':
            resource = trapvars['$A']
        else:
            m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b'])
            if m:
                resource = m.group(1)
            else:
                resource = '<NONE>'

        # Defaults
        event = trapvars['$O']
        severity = 'indeterminate'
        group = 'SNMP'
        value = trapvars['$w']
        text = trapvars['$W']
        environment = 'Production'
        service = ['Network']
        attributes = {'source': trapvars['$B']}
        tags = [version]
        create_time = datetime.datetime.strptime(
            '%sT%s.000Z' % (trapvars['$x'], trapvars['$X']),
            '%Y-%m-%dT%H:%M:%S.%fZ')

        snmptrapAlert = Alert(
            resource=resource,
            event=event,
            correlate=correlate,
            group=group,
            value=value,
            severity=severity,
            environment=environment,
            service=service,
            text=text,
            event_type='snmptrapAlert',
            attributes=attributes,
            tags=tags,
            create_time=create_time,
            raw_data=data,
        )

        SnmpTrapHandler.translate_alert(snmptrapAlert, trapvars)

        if snmptrapAlert.get_type() == 'Heartbeat':
            snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin,
                                      tags=[__version__])

        return snmptrapAlert