コード例 #1
0
ファイル: vps_util.py プロジェクト: ekeyme/lantern_aws
def delete_q_backups():
    """
    Delete the queue backups generated by `fix_queue`.
    """
    for domain in redis_shell.smembers('user-regions') | redis_shell.smembers('cloudmasters'):
        qname = domain + ":srvq.bak"
        if redis_shell.exists(qname):
            redis_shell.delete(qname)
コード例 #2
0
def delete_q_backups():
    """
    Delete the queue backups generated by `fix_queue`.
    """
    for domain in redis_shell.smembers('user-regions') | redis_shell.smembers(
            'cloudmasters'):
        qname = domain + ":srvq.bak"
        if redis_shell.exists(qname):
            redis_shell.delete(qname)
コード例 #3
0
def fallbacks_and_honeypots_in_srv_table(region, cfgbysrv):
    ret = []
    for srv in redis_shell.smembers(region + ':fallbacks'):
        if srv not in cfgbysrv:
            ret.append("Fallback server %s for region %s is not in srv->cfg" % (srv, region))
    for srv in redis_shell.smembers(region + ':honeypots'):
        if srv not in cfgbysrv:
            ret.append("Honeypot server %s for region %s is not in srv->cfg" % (srv, region))
    return ret
コード例 #4
0
ファイル: vps_sanity_checks.py プロジェクト: AS-D/lantern_aws
def fallbacks_and_honeypots_in_srv_table(region, srv2cfg):
    ret = []
    for srv in redis_shell.smembers(region + ':fallbacks'):
        if srv not in srv2cfg:
            ret.append("Fallback server %s for region %s is not in srv->cfg" % (srv, region))
    for srv in redis_shell.smembers(region + ':honeypots'):
        if srv not in srv2cfg:
            ret.append("Honeypot server %s for region %s is not in srv->cfg" % (srv, region))
    return ret
コード例 #5
0
def fix_queues():
    regions = redis_shell.smembers('user-regions')
    cloudmasters = redis_shell.smembers('cloudmasters')
    for domain in regions | cloudmasters:
        qname = domain + ':srvq'
        if redis_shell.exists(qname):
            print "fixing queue for %s..." % domain
            fix_queue(qname)
        else:
            print "no queue for %s." % domain
コード例 #6
0
ファイル: vps_util.py プロジェクト: AS-D/lantern_aws
def fix_queues():
    regions = redis_shell.smembers('user-regions')
    cloudmasters = redis_shell.smembers('cloudmasters')
    for domain in regions | cloudmasters:
        qname = domain + ':srvq'
        if redis_shell.exists(qname):
            print "fixing queue for %s..." % domain
            fix_queue(qname)
        else:
            print "no queue for %s." % domain
コード例 #7
0
ファイル: vps_util.py プロジェクト: ekeyme/lantern_aws
def fix_all_configs(fix_fn):
    """
    Generic utility to fix broken configurations.

    fix_fn should take an unparsed config string and return a
    (needs_fixing, good_config) tuple.
    """
    for domain in redis_shell.smembers('user-regions') | redis_shell.smembers('cloudmasters'):
        qname = domain + ':srvq'
        if redis_shell.exists(qname):
            fix_queue(qname, fix_fn)
        else:
            print "Skipping nonexistant queue %s..." % qname
    fix_live_servers(fix_fn)
コード例 #8
0
def fix_all_configs(fix_fn):
    """
    Generic utility to fix broken configurations.

    fix_fn should take an unparsed config string and return a
    (needs_fixing, good_config) tuple.
    """
    for domain in redis_shell.smembers('user-regions') | redis_shell.smembers(
            'cloudmasters'):
        qname = domain + ':srvq'
        if redis_shell.exists(qname):
            fix_queue(qname, fix_fn)
        else:
            print "Skipping nonexistant queue %s..." % qname
    fix_live_servers(fix_fn)
コード例 #9
0
def run_all_checks():
    cfgbysrv = redis_shell.hgetall('srv->cfg')
    errors = configs_start_with_newline(cfgbysrv)
    regions = redis_shell.smembers('user-regions')
    for region in regions:
        errors.extend(srvs_in_cfgbysrv(region, cfgbysrv))
    for region in regions:
        errors.extend(check_srvq_size(region))
    report(errors)
コード例 #10
0
def run_all_checks():
    cfgbysrv = redis_shell.hgetall("srv->cfg")
    errors = configs_start_with_newline(cfgbysrv)
    regions = redis_shell.smembers("user-regions")
    for region in regions:
        errors.extend(srvs_in_cfgbysrv(region, cfgbysrv))
    for region in regions:
        errors.extend(check_srvq_size(region))
    report(errors)
コード例 #11
0
def run_all_checks():
    print "Fetching config data..."
    srv2cfg = redis_shell.hgetall('srv->cfg')
    print "Performing checks..."
    cache = model.make_cache()
    # This is new code, so let's test it in a cushion to start with.
    try:
        print "Checking that srv->cfg table is consistent with the VPS listing..."
        errors = srv2cfg_consistent_with_vps_list(srv2cfg, cache)
    except:
        alert_exception("trying to check consistency between srv->cfg and all_vpss")
        errors = []
    try:
        print "Check that we don't have duplicate names"
        errors.extend(no_duplicate_names(cache))
    except:
        alert_exception("trying to check for duplicate VPS names")
    print "Checking that configs start with a newline..."
    errors.extend(configs_start_with_newline(srv2cfg))
    regions = redis_shell.smembers('user-regions')
    print "Checking that slice server entries are in srv->cfg..."
    for region in regions:
        print "    (region %s)..." % region
        errors.extend(slice_srvs_in_srv2cfg(region, srv2cfg))
    print "Checking server queue size..."
    for region in regions:
        print "    (region %s)..." % region
        errors.extend(srvq_size(region))
    print "Checking server queue integrity..."
    for region in regions:
        print "    (region %s)..." % region
        try:
            errors.extend(srvq_integrity(region, cache=cache))
        except:
            alert_exception("trying to check server queue integrity")
    print "Check that regional fallbacks and honeypots are in srv->cfg..."
    for region in regions:
        print "    (region %s)..." % region
        errors.extend(fallbacks_and_honeypots_in_srv_table(region, srv2cfg))
    report(errors)
コード例 #12
0
ファイル: refill_srvq.py プロジェクト: JJediny/lantern_aws
def run():
    qname = QPREFIX + ":srvreqq"
    print "Serving queue", qname, ", MAXPROCS:", repr(MAXPROCS)
    quarantine = CM + ":quarantined_vpss"
    reqq = redisq.Queue(qname, redis_shell, LAUNCH_TIMEOUT)
    procq = multiprocessing.Queue()
    pending = {}
    def kill_task(reqid):
        print "Killing timed out process and vps..."
        task = pending.pop(reqid)
        task['proc'].terminate()
        proc = multiprocessing.Process(target=vps_shell.destroy_vps,
                                       args=(task['name'],))
        proc.daemon = True
        proc.start()
    while True:
        # If the request queue is totally empty (no tasks enqueued or even in
        # progress), flush the quarantine queue into the destroy queue.
        if redis_shell.llen(qname) == 1:  # 1 for the redisq sentinel entry
            names = redis_shell.smembers(quarantine)
            if names:
                print "Flushing %s VPSs from quarantine." % len(names)
                p = redis_shell.pipeline()
                p.srem(quarantine, *names)
                p.lpush(CM + ":destroyq", *names)
                p.execute()
        while not procq.empty():
            try:
                result = procq.get(False)
                print "Got result:", result
                task = pending.get(result['reqid'])
                if task and task['name'] == result['name']:
                    p = redis_shell.pipeline()
                    if result['blocked']:
                        print "Quarantining %(name)s (%(ip)s)." % result
                        p.sadd(quarantine, result['name'])
                        p.incr(CM + ":blocked_vps_count")  # stats
                        # We'll remove the original request anyway because we
                        # don't want it to stay around until timeout. Insert a
                        # new one to replace it instead.
                        reqid = redis_shell.incr('srvcount')
                        p.lpush(qname, reqid)
                    else:
                        p.incr(CM + ":unblocked_vps_count")  # stats
                        del pending[result['reqid']]
                        vps_util.enqueue_cfg(result['name'], result['access_data'], result['srvq'])
                        register_vps(task['name'])
                    task['remove_req'](p)
                    p.execute()
            except Empty:
                print "Wat?"
                break
        if len(pending) < MAXPROCS:
            req_string, remover = reqq.next_job()
            if req_string:
                print "Got request", req_string
                req = json.loads(req_string)
                if isinstance(req, int):
                    # Transition: support the old format while we are updating
                    # the config server etc.
                    req = {'id': req, 'srvq': QPREFIX + ':srvq'}
                    req_string = json.dumps(req)
                reqid = req['id']
                if reqid in pending:
                    print "Killing task %s because of queue timeout" % reqid
                    kill_task(reqid)
                name = new_proxy_name(req)
                proc = multiprocessing.Process(target=launch_one_server,
                                               args=(procq,
                                                     reqid,
                                                     name,
                                                     req_string))
                proc.daemon = True
                pending[reqid] = {
                    'name': name,
                    'proc': proc,
                    'starttime': time.time(),
                    'remove_req': remover}
                print "Starting process to launch", name
                proc.start()
        else:
            # Since we're not checking the queue when we've maxed out our
            # processes, we need to manually check for expired tasks.
            for reqid, d in pending.items():
                if time.time() - d['starttime'] > LAUNCH_TIMEOUT:
                    print "Killing task %s because of local timeout" % reqid
                    kill_task(reqid)
        time.sleep(10)
コード例 #13
0
def regions():
    return redis_shell.smembers('user-regions')
コード例 #14
0
ファイル: checkfallbacks.py プロジェクト: AS-D/lantern_aws
import json
import subprocess
import yaml

from alert import alert
from redis_util import redis_shell


prefix = 'fallbacks-to-check'
try:
    local_version = file(prefix + '-version').read()
except IOError:
    local_version = None
remote_version = redis_shell.get('srvcount')
if local_version != remote_version:
    suppress = redis_shell.smembers('checkfallbacks-suppress')
    json.dump([yaml.load(cfg).values()[0]
               for srv, cfg in redis_shell.hgetall('srv->cfg').iteritems()
               if srv not in suppress],
              file(prefix + '.json', 'w'))
    file(prefix + '-version', 'w').write(remote_version)

cmd = subprocess.Popen("checkfallbacks -fallbacks %s.json -connections 20 | grep '\[failed fallback check\]'" % prefix,
                       shell=True,
                       stdout=subprocess.PIPE)
errors = list(cmd.stdout)
if errors:
    for error in errors:
        print error
    alert(type='checkfallbacks-failures',
          details={'errors': errors},