Ejemplo n.º 1
0
def do_summary(ctx):
    lockd = collections.defaultdict(lambda: [0, 0, 'unknown'])
    if ctx.machine_type:
        locks = query.list_locks(machine_type=ctx.machine_type)
    else:
        locks = query.list_locks()
    for l in locks:
        who = l['locked_by'] if l['locked'] == 1 \
            else '(free)', l['machine_type']
        lockd[who][0] += 1
        lockd[who][1] += 1 if l['up'] else 0
        lockd[who][2] = l['machine_type']

    locks = sorted([p for p in lockd.items()],
                   key=lambda sort: (sort[1][2], sort[1][0]))
    total_count, total_up = 0, 0
    print "TYPE     COUNT  UP  OWNER"

    for (owner, (count, upcount, machinetype)) in locks:
        # if machinetype == spectype:
        print "{machinetype:8s} {count:3d}  {up:3d}  {owner}".format(
            count=count, up=upcount, owner=owner[0], machinetype=machinetype)
        total_count += count
        total_up += upcount

    print "         ---  ---"
    print "{cnt:12d}  {up:3d}".format(cnt=total_count, up=total_up)
Ejemplo n.º 2
0
def do_summary(ctx):
    lockd = collections.defaultdict(lambda: [0, 0, 'unknown'])
    if ctx.machine_type:
        locks = query.list_locks(machine_type=ctx.machine_type)
    else:
        locks = query.list_locks()
    for l in locks:
        who = l['locked_by'] if l['locked'] == 1 \
            else '(free)', l['machine_type']
        lockd[who][0] += 1
        lockd[who][1] += 1 if l['up'] else 0
        lockd[who][2] = l['machine_type']

    locks = sorted([p for p in lockd.iteritems()
                    ], key=lambda sort: (sort[1][2], sort[1][0]))
    total_count, total_up = 0, 0
    print "TYPE     COUNT  UP  OWNER"

    for (owner, (count, upcount, machinetype)) in locks:
            # if machinetype == spectype:
            print "{machinetype:8s} {count:3d}  {up:3d}  {owner}".format(
                count=count, up=upcount, owner=owner[0],
                machinetype=machinetype)
            total_count += count
            total_up += upcount

    print "         ---  ---"
    print "{cnt:12d}  {up:3d}".format(cnt=total_count, up=total_up)
Ejemplo n.º 3
0
def stale_openstack(ctx):
    targets = dict(map(lambda i: (i['ID'], i), OpenStack.list_instances()))
    nodes = list_locks(keyed_by_name=True, locked=True)
    stale_openstack_instances(ctx, targets, nodes)
    stale_openstack_nodes(ctx, targets, nodes)
    stale_openstack_volumes(ctx, OpenStack.list_volumes())
    if not ctx.dry_run:
        openstack_remove_again()
Ejemplo n.º 4
0
def stale_openstack(ctx):
    targets = dict(map(lambda i: (i['ID'], i),
                       OpenStack.list_instances()))
    nodes = list_locks(keyed_by_name=True, locked=True)
    stale_openstack_instances(ctx, targets, nodes)
    stale_openstack_nodes(ctx, targets, nodes)
    stale_openstack_volumes(ctx, OpenStack.list_volumes())
    if not ctx.dry_run:
        openstack_remove_again()
Ejemplo n.º 5
0
def nuke(ctx, should_unlock, sync_clocks=True, noipmi=False, keep_logs=False, should_reboot=True):
    if 'targets' not in ctx.config:
        return
    total_unnuked = {}
    targets = dict(ctx.config['targets'])
    if ctx.name:
        log.info('Checking targets against current locks')
        locks = list_locks()
        # Remove targets who's description doesn't match archive name.
        for lock in locks:
            for target in targets:
                if target == lock['name']:
                    if ctx.name not in lock['description']:
                        del ctx.config['targets'][lock['name']]
                        log.info(
                            "Not nuking %s because description doesn't match",
                            lock['name'])
                    elif lock.get('up') is False:
                        del ctx.config['targets'][lock['name']]
                        log.info(
                            "Not nuking %s because it is down",
                            lock['name'])
    with parallel() as p:
        for target, hostkey in ctx.config['targets'].items():
            p.spawn(
                nuke_one,
                ctx,
                {target: hostkey},
                should_unlock,
                sync_clocks,
                ctx.config.get('check-locks', True),
                noipmi,
                keep_logs,
                should_reboot,
            )
        for unnuked in p:
            if unnuked:
                total_unnuked.update(unnuked)
    if total_unnuked:
        log.error('Could not nuke the following targets:\n' +
                  '\n  '.join(['targets:', ] +
                              yaml.safe_dump(
                                  total_unnuked,
                                  default_flow_style=False).splitlines()))
Ejemplo n.º 6
0
def nuke(ctx, should_unlock, sync_clocks=True, reboot_all=True, noipmi=False):
    if 'targets' not in ctx.config:
        return
    total_unnuked = {}
    targets = dict(ctx.config['targets'])
    if ctx.name:
        log.info('Checking targets against current locks')
        locks = list_locks()
        # Remove targets who's description doesn't match archive name.
        for lock in locks:
            for target in targets:
                if target == lock['name']:
                    if ctx.name not in lock['description']:
                        del ctx.config['targets'][lock['name']]
                        log.info(
                            "Not nuking %s because description doesn't match",
                            lock['name'])
    with parallel() as p:
        for target, hostkey in ctx.config['targets'].iteritems():
            p.spawn(
                nuke_one,
                ctx,
                {target: hostkey},
                should_unlock,
                sync_clocks,
                reboot_all,
                ctx.config.get('check-locks', True),
                noipmi,
            )
        for unnuked in p:
            if unnuked:
                total_unnuked.update(unnuked)
    if total_unnuked:
        log.error('Could not nuke the following targets:\n' +
                  '\n  '.join(['targets:', ] +
                              yaml.safe_dump(
                                  total_unnuked,
                                  default_flow_style=False).splitlines()))
Ejemplo n.º 7
0
def do_update_keys(machines, all_=False, _raise=True):
    reference = query.list_locks(keyed_by_name=True)
    if all_:
        machines = reference.keys()
    keys_dict = misc.ssh_keyscan(machines, _raise=_raise)
    return push_new_keys(keys_dict, reference), keys_dict
Ejemplo n.º 8
0
def block_and_lock_machines(ctx, total_requested, machine_type, reimage=True):
    # It's OK for os_type and os_version to be None here.  If we're trying
    # to lock a bare metal machine, we'll take whatever is available.  If
    # we want a vps, defaults will be provided by misc.get_distro and
    # misc.get_distro_version in provision.create_if_vm
    os_type = ctx.config.get("os_type")
    os_version = ctx.config.get("os_version")
    arch = ctx.config.get('arch')
    reserved = config.reserve_machines
    assert isinstance(reserved, int), 'reserve_machines must be integer'
    assert (reserved >= 0), 'reserve_machines should >= 0'

    log.info('Locking machines...')
    # change the status during the locking process
    report.try_push_job_info(ctx.config, dict(status='waiting'))

    all_locked = dict()
    requested = total_requested
    while True:
        # get a candidate list of machines
        machines = query.list_locks(machine_type=machine_type, up=True,
                                    locked=False, count=requested + reserved)
        if machines is None:
            if ctx.block:
                log.error('Error listing machines, trying again')
                time.sleep(20)
                continue
            else:
                raise RuntimeError('Error listing machines')

        # make sure there are machines for non-automated jobs to run
        if len(machines) < reserved + requested \
                and ctx.owner.startswith('scheduled'):
            if ctx.block:
                log.info(
                    'waiting for more %s machines to be free (need %s + %s, have %s)...',
                    machine_type,
                    reserved,
                    requested,
                    len(machines),
                )
                time.sleep(10)
                continue
            else:
                assert 0, ('not enough machines free; need %s + %s, have %s' %
                           (reserved, requested, len(machines)))

        try:
            newly_locked = lock_many(ctx, requested, machine_type,
                                     ctx.owner, ctx.archive, os_type,
                                     os_version, arch, reimage=reimage)
        except Exception:
            # Lock failures should map to the 'dead' status instead of 'fail'
            if 'summary' in ctx:
                set_status(ctx.summary, 'dead')
            raise
        all_locked.update(newly_locked)
        log.info(
            '{newly_locked} {mtype} machines locked this try, '
            '{total_locked}/{total_requested} locked so far'.format(
                newly_locked=len(newly_locked),
                mtype=machine_type,
                total_locked=len(all_locked),
                total_requested=total_requested,
            )
        )
        if len(all_locked) == total_requested:
            vmlist = []
            for lmach in all_locked:
                if teuthology.lock.query.is_vm(lmach):
                    vmlist.append(lmach)
            if vmlist:
                log.info('Waiting for virtual machines to come up')
                keys_dict = dict()
                loopcount = 0
                while len(keys_dict) != len(vmlist):
                    loopcount += 1
                    time.sleep(10)
                    keys_dict = misc.ssh_keyscan(vmlist)
                    log.info('virtual machine is still unavailable')
                    if loopcount == 40:
                        loopcount = 0
                        log.info('virtual machine(s) still not up, ' +
                                 'recreating unresponsive ones.')
                        for guest in vmlist:
                            if guest not in keys_dict.keys():
                                log.info('recreating: ' + guest)
                                full_name = misc.canonicalize_hostname(guest)
                                teuthology.provision.destroy_if_vm(ctx, full_name)
                                teuthology.provision.create_if_vm(ctx, full_name)
                if teuthology.lock.ops.do_update_keys(keys_dict)[0]:
                    log.info("Error in virtual machine keys")
                newscandict = {}
                for dkey in all_locked.keys():
                    stats = teuthology.lock.query.get_status(dkey)
                    newscandict[dkey] = stats['ssh_pub_key']
                ctx.config['targets'] = newscandict
            else:
                ctx.config['targets'] = all_locked
            locked_targets = yaml.safe_dump(
                ctx.config['targets'],
                default_flow_style=False
            ).splitlines()
            log.info('\n  '.join(['Locked targets:', ] + locked_targets))
            # successfully locked machines, change status back to running
            report.try_push_job_info(ctx.config, dict(status='running'))
            break
        elif not ctx.block:
            assert 0, 'not enough machines are available'
        else:
            requested = requested - len(newly_locked)
            assert requested > 0, "lock_machines: requested counter went" \
                                  "negative, this shouldn't happen"

        log.info(
            "{total} machines locked ({new} new); need {more} more".format(
                total=len(all_locked), new=len(newly_locked), more=requested)
        )
        log.warn('Could not lock enough machines, waiting...')
        time.sleep(10)