def run(self, conf): name = 'stalker-agent' log_type = conf.get('log_type', 'syslog') log_file = conf.get('log_path', '/var/log/stalker/stalker-runner.log') if log_type == 'syslog': logger = get_syslogger(conf, name) else: logger = get_logger(name, log_path=log_file) def spawn_worker(): sr = StalkerRunner(conf) while 1: try: sr.start() except Exception as err: logger.info(err) worker_count = int(conf.get('workers', '1')) def kill_children(*args): """Kills the entire process group.""" logger.error('SIGTERM received') signal.signal(signal.SIGTERM, signal.SIG_IGN) running[0] = False os.killpg(0, signal.SIGTERM) def hup(*args): """Shuts down the server, but allow running requests to complete""" logger.error('SIGHUP received') signal.signal(signal.SIGHUP, signal.SIG_IGN) running[0] = False running = [True] signal.signal(signal.SIGTERM, kill_children) signal.signal(signal.SIGHUP, hup) children = [] while running[0]: while len(children) < worker_count: pid = os.fork() if pid == 0: signal.signal(signal.SIGHUP, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) spawn_worker() logger.info('Child %d exiting normally' % os.getpid()) return else: logger.info('Started child %s' % pid) children.append(pid) try: pid, status = os.wait() if os.WIFEXITED(status) or os.WIFSIGNALED(status): logger.error('Removing dead child %s' % pid) if pid in children: children.remove(pid) except OSError, err: if err.errno not in (errno.EINTR, errno.ECHILD): raise except KeyboardInterrupt: break
def __init__(self, fullconf): conf = fullconf['main'] self.fullconf = fullconf log_file = conf.get('log_path', '/var/log/stalker/stalker-agent.log') self.logger = get_logger('stalker_agent', log_path=log_file) self.request_logger = FileLikeLogger(self.logger) self.listen_addr = conf.get('listen_addr', '') self.listen_port = int(conf.get('listen_port', '5050')) self.ssl_crt_path = conf.get('ssl_cert', '/etc/stalker/ssl.crt') self.ssl_key_path = conf.get('ssl_key', '/etc/stalker/ssl.key') self.master_url = conf.get('master_url', 'http://localhost:5000') if self.master_url.startswith('https://'): self.master_scheme = 'https' else: self.master_scheme = 'http' self.register_key = conf.get('register_key', 'itsamario') self.check_key = conf.get('check_key', 'canhazstatus') self.script_dir = conf.get('script_dir', '/etc/stalker/scripts') self.default_interval = int(conf.get('default_interval', '300')) self.default_priority = int(conf.get('default_priority', '1')) self.scripts = {} hostname_parts = int(conf.get('hostname_parts', '1')) self.hostname = conf.get('hostname', '.'.join(getfqdn().split('.')[:hostname_parts])) self.roles = [x.strip() for x in conf.get('roles', 'server').split(',')] if not os.path.exists(self.script_dir): raise Exception("No script dir: %s" % self.script_dir) self._build_check_list() self.logger.info('Found checks: %s' % self.scripts)
def __init__(self, conf): self.conf = conf self.name = 'stalker-runner-%d' % os.getpid() log_type = conf.get('log_type', 'syslog') log_file = conf.get('log_file', '/var/log/stalker/stalker-runner.log') if log_type == 'syslog': self.logger = get_syslogger(conf, self.name) else: self.logger = get_logger(self.name, log_path=log_file) self.pool = eventlet.GreenPool() self.check_key = conf.get('check_key', 'canhazstatus') redis_host = conf.get('redis_host', '127.0.0.1') redis_port = int(conf.get('redis_port', '6379')) redis_pass = conf.get('redis_password', '') redis_usock = conf.get('redis_socket', None) self.wq = conf.get('worker_id', 'worker1') self.rc = redis.Redis(redis_host, redis_port, password=redis_pass, unix_socket_path=redis_usock) mongo_host = conf.get('mongo_host', '127.0.0.1') mongo_port = int(conf.get('mongo_port', '27017')) db_name = conf.get('db_name', 'stalkerweb') self.c = MongoClient(host=mongo_host, port=mongo_port) self.debug = False self.db = self.c[db_name] self.checks = self.db['checks'] self.state_log = self.db['state_log'] self.notifications = self.db['notifications'] self.host_window = int(conf.get('host_flood_window', '60')) self.host_threshold = int(conf.get('host_flood_threshold', '5')) self.flood_window = int(conf.get('dc_flood_window', '120')) self.flood_threshold = int(conf.get('dc_flood_threshold', '100')) self.flap_window = int(conf.get('flap_window', '1200')) self.flap_threshold = int(conf.get('flap_threshold', '5')) self.alert_threshold = int(conf.get('alert_threshold', '3')) self.urlopen_timeout = int(conf.get('urlopen_timeout', '15')) self.notify_plugins = {} self._load_notification_plugins(conf) self.statsd = StatsdEvent(conf, self.logger, 'stalker_runner.')
def __init__(self, conf): self.conf = conf log_file = conf.get('log_path', '/var/log/stalker/stalker-manager.log') self.logger = get_logger('stalker_manager', log_path=log_file) redis_host = conf.get('redis_host', '127.0.0.1') redis_port = int(conf.get('redis_port', '6379')) redis_pass = conf.get('redis_password', '') redis_usock = conf.get('redis_socket', None) self.wq = conf.get('qname', 'worker1') self.rc = redis.Redis(redis_host, redis_port, password=redis_pass, unix_socket_path=redis_usock) mongo_host = conf.get('mongo_host', '127.0.0.1') mongo_port = int(conf.get('mongo_port', '27017')) db_name = conf.get('db_name', 'stalkerweb') self.c = MongoClient(host=mongo_host, port=mongo_port) self.db = self.c[db_name] self.checks = self.db['checks'] self.notifications = self.db['notifications'] self.scan_interval = int(conf.get('scan_interval', '5')) self.pause_file = conf.get('pause_file', '/tmp/.sm-pause') self.shuffle_on_start = True self.statsd = StatsdEvent(conf, self.logger, 'stalker_manager.') self.metrics = {'checks': 0, 'pending': 0, 'suspended': 0, 'failing': 0, 'flapping': 0, 'qsize': 0}
from stalkerweb.auth import is_valid_login, login_required, remove_user from stalkerweb.stutils import jsonify, genPrimaryKey64 from stalkerweb import app, rc, rdb from stalker.stalker_utils import get_logger from flask.ext.wtf import Form, Required, TextField, PasswordField, \ BooleanField from werkzeug.contrib.cache import RedisCache import rethinkdb as r from rethinkdb.errors import RqlDriverError, RqlRuntimeError VALID_STATES = ['alerting', 'pending', 'in_maintenance', 'suspended'] cache = RedisCache(host=app.config['REDIS_HOST'], port=app.config['REDIS_PORT'], default_timeout=app.config['CACHE_TTL']) logger = get_logger(app.config['LOG_NAME'], log_path=app.config['LOG_FILE'], count=app.config['LOG_COUNT']) class SignInForm(Form): username = TextField(validators=[Required()]) password = PasswordField(validators=[Required()]) remember_me = BooleanField() def _get_local_metrics(): metrics = {} mkeys = ['checks', 'failing', 'flapping', 'pending', 'qsize', 'suspended'] try: values = rc.mget(mkeys)