예제 #1
0
def processEC2(ec2Kittens):
    keynames = db.keys('counts:*')
    counts   = {}

    for item in keynames:
        instanceType         = item.replace('counts:', '')
        counts[instanceType] = { 'current': 0 }

        count        = db.hgetall(item)
        for key in count.keys():
            counts[instanceType][key] = count[key]

    for kitten, r in ec2Kittens:
        host         = r['host']
        instanceType = host.info['class']
        if instanceType not in counts:
            log.error('%s has a instance type [%s] not found in our counts, assuming minimum of 2 and max of 50' % (kitten, instanceType))
            counts[instanceType]['max']     = 50
            counts[instanceType]['min']     = 2
            counts[instanceType]['current'] = 0

        if host.info['enabled'] and host.info['state'] == 'running':
            counts[instanceType]['current'] += 1

        if 'lastseen' in r:
            log.info('%s: count = %d idle: %dh %dm %ss' % (instanceType, counts[instanceType]['current'], r['lastseen']['hours'], r['lastseen']['minutes'], r['lastseen']['seconds']))

            if r['lastseen']['since'] > 3600:
                if host.info['enabled'] and host.info['state'] == 'running':
                    log.info('shutting down ec2 instance')
                    # if we can ssh to host, then try and do normal shutdowns
                    if host.graceful_shutdown():
                        log.info("instance was graceful'd")
                    try:
                        conn = connect_to_region(host.info['region'],
                                                 aws_access_key_id=getPassword('aws_access_key_id'),
                                                 aws_secret_access_key=getPassword('aws_secret_access_key'))
                        conn.stop_instances(instance_ids=[host.info['id'],])
                    except:
                        log.error('unable to stop ec2 instance %s [%s]' % (kitten, host.info['id']), exc_info=True)
                else:
                    log.error('ec2 instance flagged for reboot/recovery but it is not running')
예제 #2
0
def processEC2(ec2Kittens):
    keynames = db.keys('counts:*')
    counts   = {}

    for item in keynames:
        instanceType         = item.replace('counts:', '')
        counts[instanceType] = { 'current': 0 }

        count        = db.hgetall(item)
        for key in count.keys():
            counts[instanceType][key] = count[key]

    for kitten, r in ec2Kittens:
        host         = r['host']
        instanceType = host.info['class']
        if instanceType not in counts:
            log.error('%s has a instance type [%s] not found in our counts, assuming minimum of 2 and max of 50' % (kitten, instanceType))
            counts[instanceType]['max']     = 50
            counts[instanceType]['min']     = 2
            counts[instanceType]['current'] = 0

        if host.info['enabled'] and host.info['state'] == 'running':
            counts[instanceType]['current'] += 1

        if 'lastseen' in r:
            log.info('%s: count = %d idle: %dh %dm %ss' % (instanceType, counts[instanceType]['current'], r['lastseen']['hours'], r['lastseen']['minutes'], r['lastseen']['seconds']))

            if r['lastseen']['since'] > 3600:
                if host.info['enabled'] and host.info['state'] == 'running':
                    log.info('shutting down ec2 instance')
                    # if we can ssh to host, then try and do normal shutdowns
                    if host.graceful_shutdown():
                        log.info("instance was graceful'd")
                    try:
                        conn = connect_to_region(host.info['region'],
                                                 aws_access_key_id=getPassword('aws_access_key_id'),
                                                 aws_secret_access_key=getPassword('aws_secret_access_key'))
                        conn.stop_instances(instance_ids=[host.info['id'],])
                    except:
                        log.error('unable to stop ec2 instance %s [%s]' % (kitten, host.info['id']), exc_info=True)
                else:
                    log.error('ec2 instance flagged for reboot/recovery but it is not running')
예제 #3
0
def processKittens(options, jobs, results):
    remoteEnv = releng.remote.RemoteEnvironment(options.tools, db=db)
    dNow      = datetime.datetime.now()
    dDate     = dNow.strftime('%Y-%m-%d')
    dHour     = dNow.strftime('%H')

    while True:
        try:
            job = jobs.get(False)
        except Empty:
            job = None

        if job is not None:
            r = {}
            if job in remoteEnv.hosts:
                info = remoteEnv.hosts[job]
                if info['environment'] == options.environ:
                    if not info['enabled'] and not options.force:
                        if options.verbose:
                            log.info('%s not enabled, skipping' % job)
                    elif len(info['notes']) > 0 and not options.force:
                        if options.verbose:
                            log.info('%s has a slavealloc notes field, skipping' % job)
                    else:
                        log.info(job)
                        host = remoteEnv.getHost(job)
                        if host is None:
                            log.error('unknown host for %s' % job)
                        else:
                            r = remoteEnv.check(host, indent='    ', dryrun=options.dryrun, verbose=options.verbose)
                            d = remoteEnv.rebootIfNeeded(host, lastSeen=r['lastseen'], indent='    ', dryrun=options.dryrun, verbose=options.verbose)

                            for s in ['reboot', 'recovery', 'ipmi', 'pdu']:
                                r[s] = d[s]
                            r['output'] += d['output']

                            hostKey = 'kittenherder:%s.%s:%s' % (dDate, dHour, job)
                            for key in r:
                                db.hset(hostKey, key, r[key])
                            db.expire(hostKey, _keyExpire)

                            # all this because json cannot dumps() the timedelta object
                            td = r['lastseen']
                            if td is not None:
                                secs             = td.seconds
                                hours, remainder = divmod(secs, 3600)
                                minutes, seconds = divmod(remainder, 60)
                                r['lastseen']    = { 'hours':    hours,
                                                     'minutes':  minutes,
                                                     'seconds':  seconds,
                                                     'relative': relative(td),
                                                     'since':    secs,
                                                   }
                            log.info('%s: %s' % (job, json.dumps(r)))

                            if (host.farm == 'ec2') and (r['reboot'] or r['recovery']):
                                log.info('shutting down ec2 instance')
                                try:
                                    conn = connect_to_region(host.info['region'],
                                                             aws_access_key_id=getPassword('aws_access_key_id'),
                                                             aws_secret_access_key=getPassword('aws_secret_access_key'))
                                    conn.stop_instances(instance_ids=[host.info['id'],])
                                except:
                                    log.error('unable to stop ec2 instance %s [%s]' % (job, host.info['id']), exc_info=True)
                else:
                    if options.verbose:
                        log.info('%s not in requested environment %s (%s), skipping' % (job, options.environ, info['environment']))
            else:
                if options.verbose:
                    log.error('%s not listed in slavealloc, skipping' % job, exc_info=True)

            results.put((job, r))
예제 #4
0
def processKittens(options, jobs, results):
    remoteEnv = releng.remote.RemoteEnvironment(options.tools, db=db)
    dNow = datetime.datetime.now()
    dDate = dNow.strftime('%Y-%m-%d')
    dHour = dNow.strftime('%H')

    while True:
        try:
            job = jobs.get(False)
        except Empty:
            job = None

        if job is not None:
            r = {}
            if job in remoteEnv.hosts:
                info = remoteEnv.hosts[job]
                if info['environment'] == options.environ:
                    if not info['enabled'] and not options.force:
                        if options.verbose:
                            log.info('%s not enabled, skipping' % job)
                    elif len(info['notes']) > 0 and not options.force:
                        if options.verbose:
                            log.info(
                                '%s has a slavealloc notes field, skipping' %
                                job)
                    else:
                        log.info(job)
                        host = remoteEnv.getHost(job)
                        if host is None:
                            log.error('unknown host for %s' % job)
                        else:
                            r = remoteEnv.check(host,
                                                indent='    ',
                                                dryrun=options.dryrun,
                                                verbose=options.verbose)
                            d = remoteEnv.rebootIfNeeded(
                                host,
                                lastSeen=r['lastseen'],
                                indent='    ',
                                dryrun=options.dryrun,
                                verbose=options.verbose)

                            for s in ['reboot', 'recovery', 'ipmi', 'pdu']:
                                r[s] = d[s]
                            r['output'] += d['output']

                            hostKey = 'kittenherder:%s.%s:%s' % (dDate, dHour,
                                                                 job)
                            for key in r:
                                db.hset(hostKey, key, r[key])
                            db.expire(hostKey, _keyExpire)

                            # all this because json cannot dumps() the timedelta object
                            td = r['lastseen']
                            if td is not None:
                                secs = td.seconds
                                hours, remainder = divmod(secs, 3600)
                                minutes, seconds = divmod(remainder, 60)
                                r['lastseen'] = {
                                    'hours': hours,
                                    'minutes': minutes,
                                    'seconds': seconds,
                                    'relative': relative(td),
                                    'since': secs,
                                }
                            log.info('%s: %s' % (job, json.dumps(r)))

                            if (host.farm == 'ec2') and (r['reboot']
                                                         or r['recovery']):
                                log.info('shutting down ec2 instance')
                                try:
                                    conn = connect_to_region(
                                        host.info['region'],
                                        aws_access_key_id=getPassword(
                                            'aws_access_key_id'),
                                        aws_secret_access_key=getPassword(
                                            'aws_secret_access_key'))
                                    conn.stop_instances(instance_ids=[
                                        host.info['id'],
                                    ])
                                except:
                                    log.error(
                                        'unable to stop ec2 instance %s [%s]' %
                                        (job, host.info['id']),
                                        exc_info=True)
                else:
                    if options.verbose:
                        log.info(
                            '%s not in requested environment %s (%s), skipping'
                            % (job, options.environ, info['environment']))
            else:
                if options.verbose:
                    log.error('%s not listed in slavealloc, skipping' % job,
                              exc_info=True)

            results.put((job, r))