Exemple #1
0
    def run(self, conf):

        name = 'stalker-agent'
        log_type = conf.get('log_type', 'syslog')
        log_file = conf.get('log_path', '/var/log/stalker/stalker-runner.log')
        if log_type == 'syslog':
            logger = get_syslogger(conf, name)
        else:
            logger = get_logger(name, log_path=log_file)

        def spawn_worker():
            sr = StalkerRunner(conf)
            while 1:
                try:
                    sr.start()
                except Exception as err:
                    logger.info(err)

        worker_count = int(conf.get('workers', '1'))

        def kill_children(*args):
            """Kills the entire process group."""
            logger.error('SIGTERM received')
            signal.signal(signal.SIGTERM, signal.SIG_IGN)
            running[0] = False
            os.killpg(0, signal.SIGTERM)

        def hup(*args):
            """Shuts down the server, but allow running requests to complete"""
            logger.error('SIGHUP received')
            signal.signal(signal.SIGHUP, signal.SIG_IGN)
            running[0] = False

        running = [True]
        signal.signal(signal.SIGTERM, kill_children)
        signal.signal(signal.SIGHUP, hup)
        children = []
        while running[0]:
            while len(children) < worker_count:
                pid = os.fork()
                if pid == 0:
                    signal.signal(signal.SIGHUP, signal.SIG_DFL)
                    signal.signal(signal.SIGTERM, signal.SIG_DFL)
                    spawn_worker()
                    logger.info('Child %d exiting normally' % os.getpid())
                    return
                else:
                    logger.info('Started child %s' % pid)
                    children.append(pid)
            try:
                pid, status = os.wait()
                if os.WIFEXITED(status) or os.WIFSIGNALED(status):
                    logger.error('Removing dead child %s' % pid)
                    if pid in children:
                        children.remove(pid)
            except OSError, err:
                if err.errno not in (errno.EINTR, errno.ECHILD):
                    raise
            except KeyboardInterrupt:
                break
Exemple #2
0
 def __init__(self, fullconf):
     conf = fullconf['main']
     self.fullconf = fullconf
     log_file = conf.get('log_path', '/var/log/stalker/stalker-agent.log')
     self.logger = get_logger('stalker_agent', log_path=log_file)
     self.request_logger = FileLikeLogger(self.logger)
     self.listen_addr = conf.get('listen_addr', '')
     self.listen_port = int(conf.get('listen_port', '5050'))
     self.ssl_crt_path = conf.get('ssl_cert', '/etc/stalker/ssl.crt')
     self.ssl_key_path = conf.get('ssl_key', '/etc/stalker/ssl.key')
     self.master_url = conf.get('master_url', 'http://localhost:5000')
     if self.master_url.startswith('https://'):
         self.master_scheme = 'https'
     else:
         self.master_scheme = 'http'
     self.register_key = conf.get('register_key', 'itsamario')
     self.check_key = conf.get('check_key', 'canhazstatus')
     self.script_dir = conf.get('script_dir', '/etc/stalker/scripts')
     self.default_interval = int(conf.get('default_interval', '300'))
     self.default_priority = int(conf.get('default_priority', '1'))
     self.scripts = {}
     hostname_parts = int(conf.get('hostname_parts', '1'))
     self.hostname = conf.get('hostname', '.'.join(getfqdn().split('.')[:hostname_parts]))
     self.roles = [x.strip() for x in conf.get('roles',
                                               'server').split(',')]
     if not os.path.exists(self.script_dir):
         raise Exception("No script dir: %s" % self.script_dir)
     self._build_check_list()
     self.logger.info('Found checks: %s' % self.scripts)
Exemple #3
0
 def __init__(self, conf):
     self.conf = conf
     self.name = 'stalker-runner-%d' % os.getpid()
     log_type = conf.get('log_type', 'syslog')
     log_file = conf.get('log_file', '/var/log/stalker/stalker-runner.log')
     if log_type == 'syslog':
         self.logger = get_syslogger(conf, self.name)
     else:
         self.logger = get_logger(self.name, log_path=log_file)
     self.pool = eventlet.GreenPool()
     self.check_key = conf.get('check_key', 'canhazstatus')
     redis_host = conf.get('redis_host', '127.0.0.1')
     redis_port = int(conf.get('redis_port', '6379'))
     redis_pass = conf.get('redis_password', '')
     redis_usock = conf.get('redis_socket', None)
     self.wq = conf.get('worker_id', 'worker1')
     self.rc = redis.Redis(redis_host, redis_port, password=redis_pass,
                           unix_socket_path=redis_usock)
     mongo_host = conf.get('mongo_host', '127.0.0.1')
     mongo_port = int(conf.get('mongo_port', '27017'))
     db_name = conf.get('db_name', 'stalkerweb')
     self.c = MongoClient(host=mongo_host, port=mongo_port)
     self.debug = False
     self.db = self.c[db_name]
     self.checks = self.db['checks']
     self.state_log = self.db['state_log']
     self.notifications = self.db['notifications']
     self.host_window = int(conf.get('host_flood_window', '60'))
     self.host_threshold = int(conf.get('host_flood_threshold', '5'))
     self.flood_window = int(conf.get('dc_flood_window', '120'))
     self.flood_threshold = int(conf.get('dc_flood_threshold', '100'))
     self.flap_window = int(conf.get('flap_window', '1200'))
     self.flap_threshold = int(conf.get('flap_threshold', '5'))
     self.alert_threshold = int(conf.get('alert_threshold', '3'))
     self.urlopen_timeout = int(conf.get('urlopen_timeout', '15'))
     self.notify_plugins = {}
     self._load_notification_plugins(conf)
     self.statsd = StatsdEvent(conf, self.logger, 'stalker_runner.')
Exemple #4
0
 def __init__(self, conf):
     self.conf = conf
     log_file = conf.get('log_path', '/var/log/stalker/stalker-manager.log')
     self.logger = get_logger('stalker_manager', log_path=log_file)
     redis_host = conf.get('redis_host', '127.0.0.1')
     redis_port = int(conf.get('redis_port', '6379'))
     redis_pass = conf.get('redis_password', '')
     redis_usock = conf.get('redis_socket', None)
     self.wq = conf.get('qname', 'worker1')
     self.rc = redis.Redis(redis_host, redis_port, password=redis_pass,
                           unix_socket_path=redis_usock)
     mongo_host = conf.get('mongo_host', '127.0.0.1')
     mongo_port = int(conf.get('mongo_port', '27017'))
     db_name = conf.get('db_name', 'stalkerweb')
     self.c = MongoClient(host=mongo_host, port=mongo_port)
     self.db = self.c[db_name]
     self.checks = self.db['checks']
     self.notifications = self.db['notifications']
     self.scan_interval = int(conf.get('scan_interval', '5'))
     self.pause_file = conf.get('pause_file', '/tmp/.sm-pause')
     self.shuffle_on_start = True
     self.statsd = StatsdEvent(conf, self.logger, 'stalker_manager.')
     self.metrics = {'checks': 0, 'pending': 0, 'suspended': 0,
                     'failing': 0, 'flapping': 0, 'qsize': 0}
Exemple #5
0
from stalkerweb.auth import is_valid_login, login_required, remove_user
from stalkerweb.stutils import jsonify, genPrimaryKey64
from stalkerweb import app, rc, rdb
from stalker.stalker_utils import get_logger
from flask.ext.wtf import Form, Required, TextField, PasswordField, \
    BooleanField
from werkzeug.contrib.cache import RedisCache
import rethinkdb as r
from rethinkdb.errors import RqlDriverError, RqlRuntimeError

VALID_STATES = ['alerting', 'pending', 'in_maintenance', 'suspended']

cache = RedisCache(host=app.config['REDIS_HOST'], port=app.config['REDIS_PORT'], default_timeout=app.config['CACHE_TTL'])

logger = get_logger(app.config['LOG_NAME'],
                    log_path=app.config['LOG_FILE'],
                    count=app.config['LOG_COUNT'])


class SignInForm(Form):
    username = TextField(validators=[Required()])
    password = PasswordField(validators=[Required()])
    remember_me = BooleanField()


def _get_local_metrics():
    metrics = {}
    mkeys = ['checks', 'failing', 'flapping', 'pending', 'qsize',
             'suspended']
    try:
        values = rc.mget(mkeys)