Example #1
0
def main(ctx):
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    beanstalk = teuthology.queue.connect(ctx)

    tube = ctx.worker
    beanstalk.use(tube)

    if ctx.show:
        for job_id in ctx.show:
            job = beanstalk.peek(job_id)
            if job is None and ctx.verbose:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                print '--- job {jid} priority {prio} ---\n'.format(
                    jid=job_id,
                    prio=job.stats()['pri']), job.body
        return

    if ctx.delete:
        for job_id in ctx.delete:
            job = beanstalk.peek(job_id)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                job.delete()
        return

    # strip out targets; the worker will allocate new ones when we run
    # the job with --lock.
    if ctx.config.get('targets'):
        del ctx.config['targets']

    job_config = dict(
        name=ctx.name,
        last_in_suite=ctx.last_in_suite,
        email=ctx.email,
        description=ctx.description,
        owner=ctx.owner,
        verbose=ctx.verbose,
        machine_type=ctx.worker,
    )
    # Merge job_config and ctx.config
    job_config.update(ctx.config)
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    num = ctx.num
    while num > 0:
        jid = beanstalk.put(
            job,
            ttr=60 * 60 * 24,
            priority=ctx.priority,
        )
        print 'Job scheduled with name {name} and ID {jid}'.format(
            name=ctx.name, jid=jid)
        num -= 1
Example #2
0
def main():
    from gevent import monkey
    monkey.patch_all()
    from .orchestra import monkey
    monkey.patch_all()

    import logging

    log = logging.getLogger(__name__)

    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(level=loglevel, )

    from teuthology.misc import read_config
    read_config(ctx)

    log.info('\n  '.join([
        'targets:',
    ] + yaml.safe_dump(ctx.config['targets'],
                       default_flow_style=False).splitlines()))

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    nuke(ctx, log)
Example #3
0
def build_config(args):
    """
    Given a dict of arguments, build a job config
    """
    config_paths = args.get('<conf_file>', list())
    conf_dict = merge_configs(config_paths)
    # strip out targets; the worker will allocate new ones when we run
    # the job with --lock.
    if 'targets' in conf_dict:
        del conf_dict['targets']
    args['config'] = conf_dict

    owner = args['--owner']
    if owner is None:
        owner = 'scheduled_{user}'.format(user=get_user())

    job_config = dict(
        name=args['--name'],
        last_in_suite=args['--last-in-suite'],
        email=args['--email'],
        description=args['--description'],
        owner=owner,
        verbose=args['--verbose'],
        machine_type=args['--worker'],
        tube=args['--worker'],
        priority=int(args['--priority']),
    )
    # Update the dict we just created, and not the other way around, to let
    # settings in the yaml override what's passed on the command line. This is
    # primarily to accommodate jobs with multiple machine types.
    job_config.update(conf_dict)
    if args['--timeout'] is not None:
        job_config['results_timeout'] = args['--timeout']
    return job_config
Example #4
0
def main():
    from gevent import monkey; monkey.patch_all()
    from .orchestra import monkey; monkey.patch_all()

    import logging

    log = logging.getLogger(__name__)

    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )

    from teuthology.misc import read_config
    read_config(ctx)

    log.info('\n  '.join(['targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all)
Example #5
0
def main():
    from gevent import monkey
    monkey.patch_all(dns=False)
    from .orchestra import monkey
    monkey.patch_all()
    from teuthology.run import config_file
    import os

    import logging

    log = logging.getLogger(__name__)

    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(level=loglevel, )

    info = {}
    if ctx.archive:
        ctx.config = config_file(ctx.archive + '/config.yaml')
        ifn = os.path.join(ctx.archive, 'info.yaml')
        if os.path.exists(ifn):
            with file(ifn, 'r') as fd:
                info = yaml.load(fd.read())
        if not ctx.pid:
            ctx.pid = info.get('pid')
            if not ctx.pid:
                ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
        if not ctx.owner:
            ctx.owner = info.get('owner')
            if not ctx.owner:
                ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
        ctx.name = info.get('name')

    from teuthology.misc import read_config
    read_config(ctx)

    log.info('\n  '.join([
        'targets:',
    ] + yaml.safe_dump(ctx.config['targets'],
                       default_flow_style=False).splitlines()))

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    if ctx.pid:
        if ctx.archive:
            log.info('Killing teuthology process at pid %d', ctx.pid)
            os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' %
                      (ctx.archive, ctx.pid, ctx.pid))
        else:
            import subprocess
            subprocess.check_call(["kill", "-9", str(ctx.pid)])

    nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
Example #6
0
def main(args):
    ctx = FakeNamespace(args)
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    info = {}
    if ctx.archive:
        ctx.config = config_file(ctx.archive + '/config.yaml')
        ifn = os.path.join(ctx.archive, 'info.yaml')
        if os.path.exists(ifn):
            with open(ifn, 'r') as fd:
                info = yaml.safe_load(fd.read())
        if not ctx.pid:
            ctx.pid = info.get('pid')
            if not ctx.pid:
                ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
        if not ctx.owner:
            ctx.owner = info.get('owner')
            if not ctx.owner:
                ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')

    if ctx.targets:
        ctx.config = merge_configs(ctx.targets)

    if ctx.stale:
        stale_nodes = find_stale_locks(ctx.owner)
        targets = dict()
        for node in stale_nodes:
            targets[node['name']] = node['ssh_pub_key']
        ctx.config = dict(targets=targets)

    if ctx.stale_openstack:
        stale_openstack(ctx)
        return

    log.info(
        '\n  '.join(
            ['targets:', ] + yaml.safe_dump(
                ctx.config['targets'],
                default_flow_style=False).splitlines()))

    if ctx.dry_run:
        log.info("Not actually nuking anything since --dry-run was passed")
        return

    if ctx.owner is None:
        ctx.owner = get_user()

    if ctx.pid:
        if ctx.archive:
            log.info('Killing teuthology process at pid %d', ctx.pid)
            os.system('grep -q %s /proc/%d/cmdline && sudo kill -9 %d' % (
                ctx.archive,
                ctx.pid,
                ctx.pid))
        else:
            subprocess.check_call(["kill", "-9", str(ctx.pid)])

    nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.noipmi, ctx.keep_logs, not ctx.no_reboot)
Example #7
0
def main():
    from gevent import monkey; monkey.patch_all(dns=False)
    from .orchestra import monkey; monkey.patch_all()
    from teuthology.run import config_file
    import os

    import logging

    log = logging.getLogger(__name__)

    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )

    info = {}
    if ctx.archive:
        ctx.config = config_file(ctx.archive + '/config.yaml')
        ifn = os.path.join(ctx.archive, 'info.yaml')
        if os.path.exists(ifn):
            with file(ifn, 'r') as fd:
                info = yaml.load(fd.read())
        if not ctx.pid:
            ctx.pid = info.get('pid')
            if not ctx.pid:
                ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
        if not ctx.owner:
            ctx.owner = info.get('owner')
            if not ctx.owner:
                ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
        ctx.name = info.get('name')

    from teuthology.misc import read_config
    read_config(ctx)

    log.info('\n  '.join(['targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines()))

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    if ctx.pid:
        if ctx.archive:
            log.info('Killing teuthology process at pid %d', ctx.pid)
            os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
                    ctx.archive,
                    ctx.pid,
                    ctx.pid))
        else:
            import subprocess
            subprocess.check_call(["kill", "-9", str(ctx.pid)]);

    nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
Example #8
0
def unlock(ctx, name, user=None):
    if user is None:
        user = teuthology.get_user()
    success, _ = send_request('DELETE', _lock_url(ctx) + '/' + name + '?' + \
                                  urllib.urlencode(dict(user=user)))
    if success:
        log.debug('unlocked %s', name)
    else:
        log.error('failed to unlock %s', name)
    return success
Example #9
0
def lock(ctx, name, user=None):
    if user is None:
        user = teuthology.get_user()
    success, _, _ = ls.send_request('POST', ls._lock_url(ctx) + '/' + name,
                              urllib.urlencode(dict(user=user)))
    if success:
        log.debug('locked %s as %s', name, user)
    else:
        log.error('failed to lock %s', name)
    return success
Example #10
0
def unlock(ctx, name, user=None):
    if user is None:
        user = teuthology.get_user()
    success, _ , _ = send_request('DELETE', _lock_url(ctx) + '/' + name + '?' + \
                                  urllib.urlencode(dict(user=user)))
    if success:
        log.debug('unlocked %s', name)
    else:
        log.error('failed to unlock %s', name)
    return success
Example #11
0
def lock_many(ctx, num, user=None):
    if user is None:
        user = teuthology.get_user()
    success, content = send_request('POST', _lock_url(ctx),
                                    urllib.urlencode(dict(user=user, num=num)))
    if success:
        machines = json.loads(content)
        log.debug('locked {machines}'.format(machines=', '.join(machines.keys())))
        return machines
    log.warn('Could not lock %d nodes', num)
    return []
Example #12
0
def lock_many(ctx, num, user=None):
    if user is None:
        user = teuthology.get_user()
    success, content = send_request('POST', _lock_url(ctx),
                                    urllib.urlencode(dict(user=user, num=num)))
    if success:
        machines = json.loads(content)
        log.debug(
            'locked {machines}'.format(machines=', '.join(machines.keys())))
        return machines
    log.warn('Could not lock %d nodes', num)
    return []
Example #13
0
def lock_many(ctx, num, user=None):
    if user is None:
        user = teuthology.get_user()
    success, content, status = send_request('POST', _lock_url(ctx),
                                    urllib.urlencode(dict(user=user, num=num)))
    if success:
        machines = json.loads(content)
        log.debug('locked {machines}'.format(machines=', '.join(machines.keys())))
        return machines
    if status == 503:
        log.error('Insufficient nodes avaiable to lock %d nodes.', num)
    else:
        log.error('Could not lock %d nodes, reason: unknown.', num)
    return []
Example #14
0
def main():
    from gevent import monkey

    monkey.patch_all()
    from .orchestra import monkey

    monkey.patch_all()
    from teuthology.run import config_file

    import logging

    log = logging.getLogger(__name__)

    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(level=loglevel)

    if ctx.archive:
        ctx.config = config_file(ctx.archive + "/config.yaml")
        if not ctx.pid:
            ctx.pid = int(open(ctx.archive + "/pid").read().rstrip("\n"))
        if not ctx.owner:
            ctx.owner = open(ctx.archive + "/owner").read().rstrip("\n")

    from teuthology.misc import read_config

    read_config(ctx)

    log.info("\n  ".join(["targets:"] + yaml.safe_dump(ctx.config["targets"], default_flow_style=False).splitlines()))

    if ctx.owner is None:
        from teuthology.misc import get_user

        ctx.owner = get_user()

    if ctx.pid:
        if ctx.archive:
            import os

            log.info("Killing teuthology process at pid %d", ctx.pid)
            os.system("grep -q %s /proc/%d/cmdline && sudo kill %d" % (ctx.archive, ctx.pid, ctx.pid))
        else:
            subprocess.check_call(["kill", "-9", str(ctx.pid)])

    nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all)
Example #15
0
def create_fake_context(job_config, block=False):
    owner = job_config.get('owner', get_user())
    os_version = job_config.get('os_version', None)

    ctx_args = {
        'config': job_config,
        'block': block,
        'owner': owner,
        'archive': job_config['archive_path'],
        'machine_type': job_config['machine_type'],
        'os_type': job_config.get('os_type', 'ubuntu'),
        'os_version': os_version,
        'name': job_config['name'],
    }

    return FakeNamespace(ctx_args)
Example #16
0
def main(args):
    if (args['--verbose']):
        teuthology.log.setLevel(logging.DEBUG)

    ctx = argparse.Namespace()
    ctx.os_type = args['--os-type']
    ctx.os_version = args['--os-version']

    nodes = args['<nodes>']

    reimage_types = get_reimage_types()
    statuses = query.get_statuses(nodes)
    owner = args['--owner'] or get_user()
    unlocked = [shortname(_['name']) for _ in statuses if not _['locked']]
    if unlocked:
        log.error("Some of the nodes are not locked: %s", unlocked)
        exit(1)

    improper = [
        shortname(_['name']) for _ in statuses if _['locked_by'] != owner
    ]
    if improper:
        log.error("Some of the nodes are not owned by '%s': %s", owner,
                  improper)
        exit(1)

    irreimageable = [
        shortname(_['name']) for _ in statuses
        if _['machine_type'] not in reimage_types
    ]
    if irreimageable:
        log.error(
            "Following nodes cannot be reimaged because theirs machine type "
            "is not reimageable: %s", irreimageable)
        exit(1)

    def reimage_node(ctx, machine_name, machine_type):
        ops.update_nodes([machine_name], True)
        reimage(ctx, machine_name, machine_type)
        ops.update_nodes([machine_name])
        log.debug("Node '%s' reimaging is complete", machine_name)

    with parallel() as p:
        for node in statuses:
            log.debug("Start node '%s' reimaging", node['name'])
            p.spawn(reimage_node, ctx, shortname(node['name']),
                    node['machine_type'])
Example #17
0
def lock_one(name, user=None, description=None):
    name = misc.canonicalize_hostname(name, user=None)
    if user is None:
        user = misc.get_user()
    request = dict(name=name, locked=True, locked_by=user,
                   description=description)
    uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
    response = requests.put(uri, json.dumps(request))
    success = response.ok
    if success:
        log.debug('locked %s as %s', name, user)
    else:
        try:
            reason = response.json().get('message')
        except ValueError:
            reason = str(response.status_code)
        log.error('failed to lock {node}. reason: {reason}'.format(
            node=name, reason=reason))
    return response
Example #18
0
def lock_one(name, user=None, description=None):
    name = misc.canonicalize_hostname(name, user=None)
    if user is None:
        user = misc.get_user()
    request = dict(name=name, locked=True, locked_by=user,
                   description=description)
    uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '')
    response = requests.put(uri, json.dumps(request))
    success = response.ok
    if success:
        log.debug('locked %s as %s', name, user)
    else:
        try:
            reason = response.json().get('message')
        except ValueError:
            reason = str(response.status_code)
        log.error('failed to lock {node}. reason: {reason}'.format(
            node=name, reason=reason))
    return response
Example #19
0
def schedule():
    parser = argparse.ArgumentParser(description='Schedule ceph integration tests')
    parser.add_argument(
        'config',
        metavar='CONFFILE',
        nargs='*',
        type=config_file,
        action=MergeConfig,
        default={},
        help='config file to read',
        )
    parser.add_argument(
        '--name',
        help='name of suite run the job is part of',
        )
    parser.add_argument(
        '--last-in-suite',
        action='store_true',
        default=False,
        help='mark the last job in a suite so suite post-processing can be run',
        )
    parser.add_argument(
        '--email',
        help='where to send the results of a suite (only applies to the last job in a suite)',
        )
    parser.add_argument(
        '--timeout',
        help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite',
        type=int,
        )
    parser.add_argument(
        '--description',
        help='job description',
        )
    parser.add_argument(
        '--owner',
        help='job owner',
        )
    parser.add_argument(
        '--delete',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='list of jobs to remove from the queue',
        )
    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        default=False,
        help='be more verbose',
        )

    ctx = parser.parse_args()
    if not ctx.last_in_suite:
        assert not ctx.email, '--email is only applicable to the last job in a suite'
        assert not ctx.timeout, '--timeout is only applicable to the last job in a suite'

    from teuthology.misc import read_config, get_user
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    import teuthology.queue
    beanstalk = teuthology.queue.connect(ctx)

    beanstalk.use('teuthology')

    if ctx.delete:
        for jobid in ctx.delete:
            job = beanstalk.peek(jobid)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=jobid)
            else:
                job.delete()
        return

    job_config = dict(
            config=ctx.config,
            name=ctx.name,
            last_in_suite=ctx.last_in_suite,
            email=ctx.email,
            description=ctx.description,
            owner=ctx.owner,
            verbose=ctx.verbose,
            )
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    jid = beanstalk.put(job, ttr=60*60*24)
    print 'Job scheduled with ID {jid}'.format(jid=jid)
Example #20
0
def main():
    from gevent import monkey
    monkey.patch_all(dns=False)
    from .orchestra import monkey
    monkey.patch_all()
    import logging

    ctx = parse_args()
    set_up_logging(ctx)
    log = logging.getLogger(__name__)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    write_initial_metadata(ctx)

    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)

    machine_type = ctx.machine_type
    if machine_type is None:
        fallback_default = ctx.config.get('machine_type', 'plana')
        machine_type = ctx.config.get('machine-type', fallback_default)

    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({'internal.lock_machines': (len(ctx.config['roles']), machine_type)})

    init_tasks.extend([
            {'internal.save_config': None},
            {'internal.check_lock': None},
            {'internal.connect': None},
            {'internal.check_conflict': None},
            {'internal.check_ceph_data': None},
            {'internal.vm_setup': None},
            ])
    if 'kernel' in ctx.config:
        from teuthology.misc import get_distro
        distro = get_distro(ctx)
        if distro == 'ubuntu':
            init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
            {'internal.base': None},
            {'internal.archive': None},
            {'internal.coredump': None},
            {'internal.sudo': None},
            {'internal.syslog': None},
            {'internal.timer': None},
            ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
        with contextlib.closing(StringIO.StringIO()) as f:
            yaml.safe_dump(ctx.summary, f)
            log.info('Summary data:\n%s' % f.getvalue())
        with contextlib.closing(StringIO.StringIO()) as f:
            if 'email-on-error' in ctx.config and not ctx.summary.get('success', False):
                yaml.safe_dump(ctx.summary, f)
                yaml.safe_dump(ctx.config, f)
                emsg = f.getvalue()
                subject = "Teuthology error -- %s" % ctx.summary['failure_reason']
                from teuthology.suite import email_results
                email_results(subject,"Teuthology",ctx.config['email-on-error'],emsg)
        if ctx.summary.get('success', True):
            log.info('pass')
        else:
            log.info('FAIL')
            import sys
            sys.exit(1)
Example #21
0
        assert ctx.owner is None, \
            '--all and --owner are mutually exclusive'
        assert not machines, \
            '--all and listing specific machines are incompatible'

    if ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list'

        if machines:
            statuses = [get_status(ctx, machine) for machine in machines]
        else:
            statuses = list_locks(ctx)

        if statuses:
            if not machines and ctx.owner is None and not ctx.all:
                ctx.owner = teuthology.get_user()
            if ctx.owner is not None:
                statuses = [status for status in statuses \
                                if status['locked_by'] == ctx.owner]
            if ctx.status is not None:
                statuses = [status for status in statuses \
                                if status['up'] == (ctx.status == 'up')]
            if ctx.list:
                print json.dumps(statuses, indent=4)
            else:
                frag = {'targets': {}}
                for f in statuses:
                    frag['targets'][f['name']] = f['sshpubkey']
                print yaml.safe_dump(frag, default_flow_style=False)
        else:
            log.error('error retrieving lock statuses')
Example #22
0
def main(ctx):
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    set_config_attr(ctx)

    ret = 0
    user = ctx.owner
    machines = [
        misc.canonicalize_hostname(m, user=False) for m in ctx.machines
    ]
    machines_to_update = []

    if ctx.targets:
        try:
            with open(ctx.targets) as f:
                g = yaml.safe_load_all(f)
                for new in g:
                    if 'targets' in new:
                        for t in new['targets'].iterkeys():
                            machines.append(t)
        except IOError as e:
            raise argparse.ArgumentTypeError(str(e))

    if ctx.f:
        assert ctx.lock or ctx.unlock, \
            '-f is only supported by --lock and --unlock'
    if machines:
        assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
            or ctx.update or ctx.brief, \
            'machines cannot be specified with that operation'
    else:
        if ctx.lock:
            log.error("--lock requires specific machines passed as arguments")
        else:
            # This condition might never be hit, but it's not clear.
            assert ctx.num_to_lock or ctx.list or ctx.list_targets or \
                ctx.summary or ctx.brief, \
                'machines must be specified for that operation'
    if ctx.all:
        assert ctx.list or ctx.list_targets or ctx.brief, \
            '--all can only be used with --list, --list-targets, and --brief'
        assert ctx.owner is None, \
            '--all and --owner are mutually exclusive'
        assert not machines, \
            '--all and listing specific machines are incompatible'
    if ctx.num_to_lock:
        assert ctx.machine_type, \
            'must specify machine type to lock'

    if ctx.brief or ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list/--brief'

        # we may need to update host keys for vms.  Don't do it for
        # every vm; however, update any vms included in the list given
        # to the CLI (machines), or any owned by the specified owner or
        # invoking user if no machines are specified.
        vmachines = []
        statuses = query.get_statuses(machines)
        owner = ctx.owner or misc.get_user()
        for machine in statuses:
            if query.is_vm(status=machine) and machine['locked'] and \
               (machines or machine['locked_by'] == owner):
                vmachines.append(machine['name'])
        if vmachines:
            log.info("updating host keys for %s", ' '.join(sorted(vmachines)))
            keys.do_update_keys(vmachines, _raise=False)
            # get statuses again to refresh any updated keys
            statuses = query.get_statuses(machines)
        if statuses:
            statuses = util.winnow(statuses, ctx.machine_type, 'machine_type')
            if not machines and ctx.owner is None and not ctx.all:
                ctx.owner = misc.get_user()
            statuses = util.winnow(statuses, ctx.owner, 'locked_by')
            statuses = util.winnow(statuses, ctx.status, 'up',
                                   lambda s: s['up'] == (ctx.status == 'up'))
            statuses = util.winnow(
                statuses, ctx.locked, 'locked', lambda s: s['locked'] ==
                (ctx.locked == 'true'))
            statuses = util.winnow(statuses, ctx.desc, 'description')
            statuses = util.winnow(statuses, ctx.desc_pattern, 'description',
                              lambda s: s['description'] and \
                                        ctx.desc_pattern in s['description'])
            if ctx.json_query:
                statuses = util.json_matching_statuses(ctx.json_query,
                                                       statuses)
            statuses = util.winnow(statuses, ctx.os_type, 'os_type')
            statuses = util.winnow(statuses, ctx.os_version, 'os_version')

            # When listing, only show the vm_host's name, not every detail
            for s in statuses:
                if not query.is_vm(status=s):
                    continue
                # with an OpenStack API, there is no host for a VM
                if s['vm_host'] is None:
                    continue
                vm_host_name = s.get('vm_host', dict())['name']
                if vm_host_name:
                    s['vm_host'] = vm_host_name
            if ctx.list:
                print json.dumps(statuses, indent=4)

            elif ctx.brief:
                for s in sorted(statuses, key=lambda s: s.get('name')):
                    locked = "un" if s['locked'] == 0 else "  "
                    mo = re.match('\w+@(\w+?)\..*', s['name'])
                    host = mo.group(1) if mo else s['name']
                    print '{host} {locked}locked {owner} "{desc}"'.format(
                        locked=locked,
                        host=host,
                        owner=s['locked_by'],
                        desc=s['description'])

            else:
                frag = {'targets': {}}
                for f in statuses:
                    frag['targets'][f['name']] = f['ssh_pub_key']
                print yaml.safe_dump(frag, default_flow_style=False)
        else:
            log.error('error retrieving lock statuses')
            ret = 1

    elif ctx.summary:
        do_summary(ctx)
        return 0

    elif ctx.lock:
        if not util.vps_version_or_type_valid(ctx.machine_type, ctx.os_type,
                                              ctx.os_version):
            log.error('Invalid os-type or version detected -- lock failed')
            return 1
        reimage_types = teuthology.provision.fog.get_types()
        reimage_machines = list()
        updatekeys_machines = list()
        for machine in machines:
            resp = ops.lock_one(machine, user, ctx.desc)
            if resp.ok:
                machine_status = resp.json()
                machine_type = machine_status['machine_type']
            if not resp.ok:
                ret = 1
                if not ctx.f:
                    return ret
            elif not query.is_vm(machine, machine_status):
                if machine_type in reimage_types:
                    # Reimage in parallel just below here
                    reimage_machines.append(machine)
                # Update keys last
                updatekeys_machines = list()
            else:
                machines_to_update.append(machine)
                teuthology.provision.create_if_vm(
                    ctx,
                    misc.canonicalize_hostname(machine),
                )
        with teuthology.parallel.parallel() as p:
            for machine in reimage_machines:
                p.spawn(teuthology.provision.reimage, ctx, machine)
        for machine in updatekeys_machines:
            keys.do_update_keys([machine])

    elif ctx.unlock:
        if ctx.owner is None and user is None:
            user = misc.get_user()
        # If none of them are vpm, do them all in one shot
        if not filter(query.is_vm, machines):
            res = ops.unlock_many(machines, user)
            return 0 if res else 1
        for machine in machines:
            if not ops.unlock_one(ctx, machine, user):
                ret = 1
                if not ctx.f:
                    return ret
            else:
                machines_to_update.append(machine)
    elif ctx.num_to_lock:
        result = ops.lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user,
                               ctx.desc, ctx.os_type, ctx.os_version, ctx.arch)
        if not result:
            ret = 1
        else:
            machines_to_update = result.keys()
            if ctx.machine_type == 'vps':
                shortnames = ' '.join([
                    misc.decanonicalize_hostname(name)
                    for name in result.keys()
                ])
                if len(result) < ctx.num_to_lock:
                    log.error("Locking failed.")
                    for machine in result:
                        ops.unlock_one(ctx, machine, user)
                    ret = 1
                else:
                    log.info("Successfully Locked:\n%s\n" % shortnames)
                    log.info("Unable to display keys at this time (virtual " +
                             "machines are booting).")
                    log.info(
                        "Please run teuthology-lock --list-targets %s once " +
                        "these machines come up.", shortnames)
            else:
                print yaml.safe_dump(dict(targets=result),
                                     default_flow_style=False)
    elif ctx.update:
        assert ctx.desc is not None or ctx.status is not None, \
            'you must specify description or status to update'
        assert ctx.owner is None, 'only description and status may be updated'
        machines_to_update = machines

        if ctx.desc is not None or ctx.status is not None:
            for machine in machines_to_update:
                ops.update_lock(machine, ctx.desc, ctx.status)

    return ret
Example #23
0
def main():
    from gevent import monkey; monkey.patch_all()
    from .orchestra import monkey; monkey.patch_all()

    import time
    import logging

    log = logging.getLogger(__name__)
    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )

    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    if ctx.archive is not None:
        os.mkdir(ctx.archive)

        handler = logging.FileHandler(
            filename=os.path.join(ctx.archive, 'teuthology.log'),
            )
        formatter = logging.Formatter(
            fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S',
            )
        handler.setFormatter(formatter)
        logging.getLogger().addHandler(handler)

        with file(os.path.join(ctx.archive, 'pid'), 'w') as f:
            f.write('%d' % os.getpid())

    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()
    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({'internal.lock_machines': len(ctx.config['roles'])})

    init_tasks.extend([
            {'internal.save_config': None},
            {'internal.check_lock': None},
            {'internal.connect': None},
            {'internal.check_conflict': None},
            ])
    if 'kernel' in ctx.config:
        init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
            {'internal.base': None},
            {'internal.archive': None},
            {'internal.coredump': None},
            {'internal.syslog': None},
            ])

    ctx.config['tasks'][:0] = init_tasks

    start_time = time.time()

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        end_time = time.time()
        duration = end_time - start_time
        ctx.summary['duration'] = duration
        log.info("Duration was %f seconds" % duration)
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.parallel import parallel
            with parallel() as p:
                for target, hostkey in ctx.config['targets'].iteritems():
                    p.spawn(
                        nuke,
                        targets={target: hostkey},
                        owner=ctx.owner,
                        log=log,
                        teuth_config=ctx.teuthology_config,
                        # only unlock if we locked them in the first place
                        should_unlock=ctx.lock,
                        )
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
Example #24
0
def main(ctx):
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    set_config_attr(ctx)

    ret = 0
    user = ctx.owner
    machines = [misc.canonicalize_hostname(m, user=False)
                for m in ctx.machines]
    machines_to_update = []

    if ctx.targets:
        try:
            with file(ctx.targets) as f:
                g = yaml.safe_load_all(f)
                for new in g:
                    if 'targets' in new:
                        for t in new['targets'].iterkeys():
                            machines.append(t)
        except IOError as e:
            raise argparse.ArgumentTypeError(str(e))

    if ctx.f:
        assert ctx.lock or ctx.unlock, \
            '-f is only supported by --lock and --unlock'
    if machines:
        assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \
            or ctx.update or ctx.brief, \
            'machines cannot be specified with that operation'
    else:
        if ctx.lock:
            log.error("--lock requires specific machines passed as arguments")
        else:
            # This condition might never be hit, but it's not clear.
            assert ctx.num_to_lock or ctx.list or ctx.list_targets or \
                ctx.summary or ctx.brief, \
                'machines must be specified for that operation'
    if ctx.all:
        assert ctx.list or ctx.list_targets or ctx.brief, \
            '--all can only be used with --list, --list-targets, and --brief'
        assert ctx.owner is None, \
            '--all and --owner are mutually exclusive'
        assert not machines, \
            '--all and listing specific machines are incompatible'
    if ctx.num_to_lock:
        assert ctx.machine_type, \
            'must specify machine type to lock'

    if ctx.brief or ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list/--brief'

        # we may need to update host keys for vms.  Don't do it for
        # every vm; however, update any vms included in the list given
        # to the CLI (machines), or any owned by the specified owner or
        # invoking user if no machines are specified.
        vmachines = []
        statuses = query.get_statuses(machines)
        owner = ctx.owner or misc.get_user()
        for machine in statuses:
            if query.is_vm(status=machine) and machine['locked'] and \
               (machines or machine['locked_by'] == owner):
                vmachines.append(machine['name'])
        if vmachines:
            log.info("updating host keys for %s", ' '.join(sorted(vmachines)))
            keys.do_update_keys(vmachines, _raise=False)
            # get statuses again to refresh any updated keys
            statuses = query.get_statuses(machines)
        if statuses:
            statuses = util.winnow(statuses, ctx.machine_type, 'machine_type')
            if not machines and ctx.owner is None and not ctx.all:
                ctx.owner = misc.get_user()
            statuses = util.winnow(statuses, ctx.owner, 'locked_by')
            statuses = util.winnow(statuses, ctx.status, 'up',
                                lambda s: s['up'] == (ctx.status == 'up'))
            statuses = util.winnow(statuses, ctx.locked, 'locked',
                                lambda s: s['locked'] == (ctx.locked == 'true'))
            statuses = util.winnow(statuses, ctx.desc, 'description')
            statuses = util.winnow(statuses, ctx.desc_pattern, 'description',
                              lambda s: s['description'] and \
                                        ctx.desc_pattern in s['description'])
            if ctx.json_query:
                statuses = util.json_matching_statuses(ctx.json_query, statuses)
            statuses = util.winnow(statuses, ctx.os_type, 'os_type')
            statuses = util.winnow(statuses, ctx.os_version, 'os_version')

            # When listing, only show the vm_host's name, not every detail
            for s in statuses:
                if not query.is_vm(status=s):
                    continue
                # with an OpenStack API, there is no host for a VM
                if s['vm_host'] is None:
                    continue
                vm_host_name = s.get('vm_host', dict())['name']
                if vm_host_name:
                    s['vm_host'] = vm_host_name
            if ctx.list:
                    print json.dumps(statuses, indent=4)

            elif ctx.brief:
                for s in sorted(statuses, key=lambda s: s.get('name')):
                    locked = "un" if s['locked'] == 0 else "  "
                    mo = re.match('\w+@(\w+?)\..*', s['name'])
                    host = mo.group(1) if mo else s['name']
                    print '{host} {locked}locked {owner} "{desc}"'.format(
                        locked=locked, host=host,
                        owner=s['locked_by'], desc=s['description'])

            else:
                frag = {'targets': {}}
                for f in statuses:
                    frag['targets'][f['name']] = f['ssh_pub_key']
                print yaml.safe_dump(frag, default_flow_style=False)
        else:
            log.error('error retrieving lock statuses')
            ret = 1

    elif ctx.summary:
        do_summary(ctx)
        return 0

    elif ctx.lock:
        if not util.vps_version_or_type_valid(
                ctx.machine_type, ctx.os_type, ctx.os_version):
            log.error('Invalid os-type or version detected -- lock failed')
            return 1
        reimage_types = teuthology.provision.fog.get_types()
        reimage_machines = list()
        updatekeys_machines = list()
        for machine in machines:
            resp = ops.lock_one(machine, user, ctx.desc)
            if resp.ok:
                machine_status = resp.json()
                machine_type = machine_status['machine_type']
            if not resp.ok:
                ret = 1
                if not ctx.f:
                    return ret
            elif not query.is_vm(machine, machine_status):
                if machine_type in reimage_types:
                    # Reimage in parallel just below here
                    reimage_machines.append(machine)
                # Update keys last
                updatekeys_machines = list()
            else:
                machines_to_update.append(machine)
                teuthology.provision.create_if_vm(
                    ctx,
                    misc.canonicalize_hostname(machine),
                )
        with teuthology.parallel.parallel() as p:
            for machine in reimage_machines:
                p.spawn(teuthology.provision.reimage, ctx, machine)
        for machine in updatekeys_machines:
            keys.do_update_keys([machine])

    elif ctx.unlock:
        if ctx.owner is None and user is None:
            user = misc.get_user()
        # If none of them are vpm, do them all in one shot
        if not filter(query.is_vm, machines):
            res = ops.unlock_many(machines, user)
            return 0 if res else 1
        for machine in machines:
            if not ops.unlock_one(ctx, machine, user):
                ret = 1
                if not ctx.f:
                    return ret
            else:
                machines_to_update.append(machine)
    elif ctx.num_to_lock:
        result = ops.lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user,
                           ctx.desc, ctx.os_type, ctx.os_version, ctx.arch)
        if not result:
            ret = 1
        else:
            machines_to_update = result.keys()
            if ctx.machine_type == 'vps':
                shortnames = ' '.join(
                    [misc.decanonicalize_hostname(name) for name in
                     result.keys()]
                )
                if len(result) < ctx.num_to_lock:
                    log.error("Locking failed.")
                    for machine in result:
                        ops.unlock_one(ctx, machine, user)
                    ret = 1
                else:
                    log.info("Successfully Locked:\n%s\n" % shortnames)
                    log.info(
                        "Unable to display keys at this time (virtual " +
                        "machines are booting).")
                    log.info(
                        "Please run teuthology-lock --list-targets %s once " +
                        "these machines come up.",
                        shortnames)
            else:
                print yaml.safe_dump(
                    dict(targets=result),
                    default_flow_style=False)
    elif ctx.update:
        assert ctx.desc is not None or ctx.status is not None, \
            'you must specify description or status to update'
        assert ctx.owner is None, 'only description and status may be updated'
        machines_to_update = machines

        if ctx.desc is not None or ctx.status is not None:
            for machine in machines_to_update:
                ops.update_lock(machine, ctx.desc, ctx.status)

    return ret
Example #25
0
def schedule():
    parser = argparse.ArgumentParser(description='Schedule ceph integration tests')
    parser.add_argument(
        'config',
        metavar='CONFFILE',
        nargs='*',
        type=config_file,
        action=MergeConfig,
        default={},
        help='config file to read',
        )
    parser.add_argument(
        '--name',
        help='name of suite run the job is part of',
        )
    parser.add_argument(
        '--last-in-suite',
        action='store_true',
        default=False,
        help='mark the last job in a suite so suite post-processing can be run',
        )
    parser.add_argument(
        '--email',
        help='where to send the results of a suite (only applies to the last job in a suite)',
        )
    parser.add_argument(
        '--timeout',
        help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite',
        type=int,
        )
    parser.add_argument(
        '--description',
        help='job description',
        )
    parser.add_argument(
        '--owner',
        help='job owner',
        )
    parser.add_argument(
        '--delete',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='list of jobs to remove from the queue',
        )
    parser.add_argument(
        '-n', '--num',
        default=1,
        type=int,
        help='number of times to run/queue the job'
        )
    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        default=False,
        help='be more verbose',
        )
    parser.add_argument(
        '-b', '--branch',
        default='master',
        help='which branch of teuthology to use',
        )
    parser.add_argument(
        '-s', '--show',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='output the contents of specified jobs in the queue',
        )

    ctx = parser.parse_args()
    if not ctx.last_in_suite:
        assert not ctx.email, '--email is only applicable to the last job in a suite'
        assert not ctx.timeout, '--timeout is only applicable to the last job in a suite'

    from teuthology.misc import read_config, get_user
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    import teuthology.queue
    beanstalk = teuthology.queue.connect(ctx)

    tube = 'teuthology'
    if ctx.branch != 'master':
        tube += '-' + ctx.branch
    beanstalk.use(tube)

    if ctx.show:
        for jobid in ctx.show:
            job = beanstalk.peek(jobid)
            if job is None and ctx.verbose:
                print 'job {jid} is not in the queue'.format(jid=jobid)
            else:
                print 'job {jid} contains: '.format(jid=jobid), job.body
        return

    if ctx.delete:
        for jobid in ctx.delete:
            job = beanstalk.peek(jobid)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=jobid)
            else:
                job.delete()
        return

    job_config = dict(
            config=ctx.config,
            name=ctx.name,
            last_in_suite=ctx.last_in_suite,
            email=ctx.email,
            description=ctx.description,
            owner=ctx.owner,
            verbose=ctx.verbose,
            )
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    num = ctx.num
    while num > 0:
        jid = beanstalk.put(job, ttr=60*60*24)
        print 'Job scheduled with ID {jid}'.format(jid=jid)
        num -= 1
Example #26
0
def lock_many(ctx,
              num,
              machine_type,
              user=None,
              description=None,
              os_type=None,
              os_version=None,
              arch=None):
    if user is None:
        user = misc.get_user()

    if not util.vps_version_or_type_valid(ctx.machine_type, os_type,
                                          os_version):
        log.error('Invalid os-type or version detected -- lock failed')
        return

    # In the for loop below we can safely query for all bare-metal machine_type
    # values at once. So, if we're being asked for 'plana,mira,burnupi', do it
    # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query
    # for 'plana,mira,burnupi' and one for 'vps'
    machine_types_list = misc.get_multi_machine_types(machine_type)
    if machine_types_list == ['vps']:
        machine_types = machine_types_list
    elif machine_types_list == ['openstack']:
        return lock_many_openstack(ctx,
                                   num,
                                   machine_type,
                                   user=user,
                                   description=description,
                                   arch=arch)
    elif 'vps' in machine_types_list:
        machine_types_non_vps = list(machine_types_list)
        machine_types_non_vps.remove('vps')
        machine_types_non_vps = '|'.join(machine_types_non_vps)
        machine_types = [machine_types_non_vps, 'vps']
    else:
        machine_types_str = '|'.join(machine_types_list)
        machine_types = [
            machine_types_str,
        ]

    for machine_type in machine_types:
        uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '')
        data = dict(
            locked_by=user,
            count=num,
            machine_type=machine_type,
            description=description,
        )
        # Only query for os_type/os_version if non-vps and non-libcloud, since
        # in that case we just create them.
        vm_types = ['vps'] + teuthology.provision.cloud.get_types()
        reimage_types = teuthology.provision.get_reimage_types()
        if machine_type not in vm_types + reimage_types:
            if os_type:
                data['os_type'] = os_type
            if os_version:
                data['os_version'] = os_version
        if arch:
            data['arch'] = arch
        log.debug("lock_many request: %s", repr(data))
        response = requests.post(
            uri,
            data=json.dumps(data),
            headers={'content-type': 'application/json'},
        )
        if response.ok:
            machines = {
                misc.canonicalize_hostname(machine['name']):
                machine['ssh_pub_key']
                for machine in response.json()
            }
            log.debug('locked {machines}'.format(
                machines=', '.join(machines.keys())))
            if machine_type in vm_types:
                ok_machs = {}
                update_nodes(machines, True)
                for machine in machines:
                    if teuthology.provision.create_if_vm(ctx, machine):
                        ok_machs[machine] = machines[machine]
                    else:
                        log.error('Unable to create virtual machine: %s',
                                  machine)
                        unlock_one(ctx, machine, user)
                    ok_machs = do_update_keys(list(ok_machs.keys()))[1]
                update_nodes(ok_machs)
                return ok_machs
            elif machine_type in reimage_types:
                reimaged = dict()
                console_log_conf = dict(
                    logfile_name='{shortname}_reimage.log',
                    remotes=[
                        teuthology.orchestra.remote.Remote(machine)
                        for machine in machines
                    ],
                )
                with console_log.task(ctx, console_log_conf):
                    update_nodes(reimaged, True)
                    with teuthology.parallel.parallel() as p:
                        for machine in machines:
                            p.spawn(teuthology.provision.reimage, ctx, machine,
                                    machine_type)
                            reimaged[machine] = machines[machine]
                reimaged = do_update_keys(reimaged.keys())[1]
                update_nodes(reimaged)
                return reimaged
            return machines
        elif response.status_code == 503:
            log.error('Insufficient nodes available to lock %d %s nodes.', num,
                      machine_type)
            log.error(response.text)
        else:
            log.error('Could not lock %d %s nodes, reason: unknown.', num,
                      machine_type)
    return []
Example #27
0
def lock_many(ctx, num, machine_type, user=None, description=None,
              os_type=None, os_version=None, arch=None):
    if user is None:
        user = misc.get_user()

    if not util.vps_version_or_type_valid(
            ctx.machine_type,
            os_type,
            os_version
    ):
        log.error('Invalid os-type or version detected -- lock failed')
        return

    # In the for loop below we can safely query for all bare-metal machine_type
    # values at once. So, if we're being asked for 'plana,mira,burnupi', do it
    # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query
    # for 'plana,mira,burnupi' and one for 'vps'
    machine_types_list = misc.get_multi_machine_types(machine_type)
    if machine_types_list == ['vps']:
        machine_types = machine_types_list
    elif machine_types_list == ['openstack']:
        return lock_many_openstack(ctx, num, machine_type,
                                   user=user,
                                   description=description,
                                   arch=arch)
    elif 'vps' in machine_types_list:
        machine_types_non_vps = list(machine_types_list)
        machine_types_non_vps.remove('vps')
        machine_types_non_vps = '|'.join(machine_types_non_vps)
        machine_types = [machine_types_non_vps, 'vps']
    else:
        machine_types_str = '|'.join(machine_types_list)
        machine_types = [machine_types_str, ]

    for machine_type in machine_types:
        uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '')
        data = dict(
            locked_by=user,
            count=num,
            machine_type=machine_type,
            description=description,
        )
        # Only query for os_type/os_version if non-vps and non-libcloud, since
        # in that case we just create them.
        vm_types = ['vps'] + teuthology.provision.cloud.get_types()
        reimage_types = teuthology.provision.fog.get_types()
        if machine_type not in vm_types + reimage_types:
            if os_type:
                data['os_type'] = os_type
            if os_version:
                data['os_version'] = os_version
        if arch:
            data['arch'] = arch
        log.debug("lock_many request: %s", repr(data))
        response = requests.post(
            uri,
            data=json.dumps(data),
            headers={'content-type': 'application/json'},
        )
        if response.ok:
            machines = {misc.canonicalize_hostname(machine['name']):
                        machine['ssh_pub_key'] for machine in response.json()}
            log.debug('locked {machines}'.format(
                machines=', '.join(machines.keys())))
            if machine_type in vm_types:
                ok_machs = {}
                for machine in machines:
                    if teuthology.provision.create_if_vm(ctx, machine):
                        ok_machs[machine] = machines[machine]
                    else:
                        log.error('Unable to create virtual machine: %s',
                                  machine)
                        unlock_one(ctx, machine, user)
                    ok_machs = keys.do_update_keys(ok_machs.keys())[1]
                return ok_machs
            elif machine_type in reimage_types:
                reimaged = dict()
                console_log_conf = dict(
                    logfile_name='{shortname}_reimage.log',
                    remotes=[teuthology.orchestra.remote.Remote(machine)
                             for machine in machines],
                )
                with console_log.task(
                        ctx, console_log_conf):
                    with teuthology.parallel.parallel() as p:
                        for machine in machines:
                            p.spawn(teuthology.provision.reimage, ctx, machine)
                            reimaged[machine] = machines[machine]
                reimaged = keys.do_update_keys(reimaged.keys())[1]
                return reimaged
            return machines
        elif response.status_code == 503:
            log.error('Insufficient nodes available to lock %d %s nodes.',
                      num, machine_type)
            log.error(response.text)
        else:
            log.error('Could not lock %d %s nodes, reason: unknown.',
                      num, machine_type)
    return []
Example #28
0
 def test_owner(self):
     args = self.basic_args
     args['--owner'] = None
     job_dict = build_config(self.basic_args)
     assert job_dict['owner'] == 'scheduled_%s' % get_user()
Example #29
0
def main(ctx):
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    beanstalk = teuthology.beanstalk.connect()

    tube = ctx.worker
    beanstalk.use(tube)

    if ctx.show:
        for job_id in ctx.show:
            job = beanstalk.peek(job_id)
            if job is None and ctx.verbose:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                print '--- job {jid} priority {prio} ---\n'.format(
                    jid=job_id, prio=job.stats()['pri']), job.body
        return

    if ctx.delete:
        for job_id in ctx.delete:
            job = beanstalk.peek(job_id)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                job.delete()
                name = yaml.safe_load(job.body).get('name')
                if name:
                    report.try_delete_jobs(name, job_id)
        return

    # strip out targets; the worker will allocate new ones when we run
    # the job with --lock.
    if ctx.config.get('targets'):
        del ctx.config['targets']

    job_config = dict(
        name=ctx.name,
        last_in_suite=ctx.last_in_suite,
        email=ctx.email,
        description=ctx.description,
        owner=ctx.owner,
        verbose=ctx.verbose,
        machine_type=ctx.worker,
    )
    # Merge job_config and ctx.config
    job_config.update(ctx.config)
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    num = ctx.num
    while num > 0:
        jid = beanstalk.put(
            job,
            ttr=60 * 60 * 24,
            priority=ctx.priority,
        )
        print 'Job scheduled with name {name} and ID {jid}'.format(
            name=ctx.name, jid=jid)
        job_config['job_id'] = str(jid)
        report.try_push_job_info(job_config, dict(status='queued'))
        num -= 1
Example #30
0
def main():
    from gevent import monkey; monkey.patch_all(dns=False)
    from .orchestra import monkey; monkey.patch_all()

    import logging

    log = logging.getLogger(__name__)
    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )


    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)
       
    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()
    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    if ctx.archive is not None:
        os.mkdir(ctx.archive)

        handler = logging.FileHandler(
            filename=os.path.join(ctx.archive, 'teuthology.log'),
            )
        formatter = logging.Formatter(
            fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S',
            )
        handler.setFormatter(formatter)
        logging.getLogger().addHandler(handler)

        with file(os.path.join(ctx.archive, 'pid'), 'w') as f:
            f.write('%d' % os.getpid())

        with file(os.path.join(ctx.archive, 'owner'), 'w') as f:
            f.write(ctx.owner + '\n')

        with file(os.path.join(ctx.archive, 'orig.config.yaml'), 'w') as f:
            yaml.safe_dump(ctx.config, f, default_flow_style=False)

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({'internal.lock_machines': len(ctx.config['roles'])})

    init_tasks.extend([
            {'internal.save_config': None},
            {'internal.check_lock': None},
            {'internal.connect': None},
            {'internal.check_conflict': None},
            ])
    if 'kernel' in ctx.config:
        init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
            {'internal.base': None},
            {'internal.archive': None},
            {'internal.coredump': None},
            {'internal.syslog': None},
            {'internal.timer': None},
            ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)

    if not ctx.summary.get('success', True):
        import sys
        sys.exit(1)
Example #31
0
def main():
    from gevent import monkey
    monkey.patch_all(dns=False)
    from .orchestra import monkey
    monkey.patch_all()
    import logging

    ctx = parse_args()
    set_up_logging(ctx)
    log = logging.getLogger(__name__)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    write_initial_metadata(ctx)

    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)

    machine_type = ctx.machine_type
    if machine_type is None:
        fallback_default = ctx.config.get('machine_type', 'plana')
        machine_type = ctx.config.get('machine-type', fallback_default)

    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join([
        'Config:',
    ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({
            'internal.lock_machines': (len(ctx.config['roles']), machine_type)
        })

    init_tasks.extend([
        {
            'internal.save_config': None
        },
        {
            'internal.check_lock': None
        },
        {
            'internal.connect': None
        },
        {
            'internal.check_conflict': None
        },
        {
            'internal.check_ceph_data': None
        },
        {
            'internal.vm_setup': None
        },
    ])
    if 'kernel' in ctx.config:
        from teuthology.misc import get_distro
        distro = get_distro(ctx)
        if distro == 'ubuntu':
            init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
        {
            'internal.base': None
        },
        {
            'internal.archive': None
        },
        {
            'internal.coredump': None
        },
        {
            'internal.sudo': None
        },
        {
            'internal.syslog': None
        },
        {
            'internal.timer': None
        },
    ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
        with contextlib.closing(StringIO.StringIO()) as f:
            yaml.safe_dump(ctx.summary, f)
            log.info('Summary data:\n%s' % f.getvalue())
        with contextlib.closing(StringIO.StringIO()) as f:
            if 'email-on-error' in ctx.config and not ctx.summary.get(
                    'success', False):
                yaml.safe_dump(ctx.summary, f)
                yaml.safe_dump(ctx.config, f)
                emsg = f.getvalue()
                subject = "Teuthology error -- %s" % ctx.summary[
                    'failure_reason']
                from teuthology.suite import email_results
                email_results(subject, "Teuthology",
                              ctx.config['email-on-error'], emsg)
        if ctx.summary.get('success', True):
            log.info('pass')
        else:
            log.info('FAIL')
            import sys
            sys.exit(1)
Example #32
0
            '--all and listing specific machines are incompatible'

    if ctx.brief:
        assert ctx.list, '--brief only applies to --list'

    if ctx.list or ctx.list_targets:
        assert ctx.desc is None, '--desc does nothing with --list'

        if machines:
            statuses = [get_status(ctx, machine) for machine in machines]
        else:
            statuses = list_locks(ctx)

        if statuses:
            if not machines and ctx.owner is None and not ctx.all:
                ctx.owner = teuthology.get_user()
            if ctx.owner is not None:
                statuses = [status for status in statuses \
                                if status['locked_by'] == ctx.owner]
            if ctx.status is not None:
                statuses = [status for status in statuses \
                                if status['up'] == (ctx.status == 'up')]
            if ctx.locked is not None:
                statuses = [status for status in statuses \
                                if status['locked'] == (ctx.locked == 'true')]
            if ctx.list:
                if ctx.brief:
                    for s in statuses:
                        locked = "un" if s['locked'] == 0 else "  "
                        mo = re.match('\w+@(\w+?)\..*', s['name'])
                        host = mo.group(1) if mo else s['name']
Example #33
0
def schedule():
    parser = argparse.ArgumentParser(
        description='Schedule ceph integration tests')
    parser.add_argument(
        'config',
        metavar='CONFFILE',
        nargs='*',
        type=config_file,
        action=MergeConfig,
        default={},
        help='config file to read',
    )
    parser.add_argument(
        '--name',
        help='name of suite run the job is part of',
    )
    parser.add_argument(
        '--last-in-suite',
        action='store_true',
        default=False,
        help='mark the last job in a suite so suite post-processing can be run',
    )
    parser.add_argument(
        '--email',
        help=
        'where to send the results of a suite (only applies to the last job in a suite)',
    )
    parser.add_argument(
        '--timeout',
        help=
        'how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite',
        type=int,
    )
    parser.add_argument(
        '--description',
        help='job description',
    )
    parser.add_argument(
        '--owner',
        help='job owner',
    )
    parser.add_argument(
        '--delete',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='list of jobs to remove from the queue',
    )
    parser.add_argument('-n',
                        '--num',
                        default=1,
                        type=int,
                        help='number of times to run/queue the job')
    parser.add_argument('-p',
                        '--priority',
                        default=1000,
                        type=int,
                        help='beanstalk priority (lower is sooner)')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        default=False,
        help='be more verbose',
    )
    parser.add_argument(
        '-w',
        '--worker',
        default='plana',
        help='which worker to use (type of machine)',
    )
    parser.add_argument(
        '-s',
        '--show',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='output the contents of specified jobs in the queue',
    )

    ctx = parser.parse_args()
    if not ctx.last_in_suite:
        assert not ctx.email, '--email is only applicable to the last job in a suite'
        assert not ctx.timeout, '--timeout is only applicable to the last job in a suite'

    from teuthology.misc import read_config, get_user
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    import teuthology.queue
    beanstalk = teuthology.queue.connect(ctx)

    tube = ctx.worker
    beanstalk.use(tube)

    if ctx.show:
        for job_id in ctx.show:
            job = beanstalk.peek(job_id)
            if job is None and ctx.verbose:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                print '--- job {jid} priority {prio} ---\n'.format(
                    jid=job_id, prio=job.stats()['pri']), job.body
        return

    if ctx.delete:
        for job_id in ctx.delete:
            job = beanstalk.peek(job_id)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                job.delete()
        return

    # strip out targets; the worker will allocate new ones when we run
    # the job with --lock.
    if ctx.config.get('targets'):
        del ctx.config['targets']

    job_config = dict(
        name=ctx.name,
        last_in_suite=ctx.last_in_suite,
        email=ctx.email,
        description=ctx.description,
        owner=ctx.owner,
        verbose=ctx.verbose,
    )
    # Merge job_config and ctx.config
    job_config.update(ctx.config)
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    num = ctx.num
    while num > 0:
        jid = beanstalk.put(
            job,
            ttr=60 * 60 * 24,
            priority=ctx.priority,
        )
        print 'Job scheduled with ID {jid}'.format(jid=jid)
        num -= 1
Example #34
0
def main(args):
    verbose = args["--verbose"]
    archive = args["--archive"]
    owner = args["--owner"]
    config = args["<config>"]
    name = args["--name"]
    description = args["--description"]
    machine_type = args["--machine-type"]
    block = args["--block"]
    lock = args["--lock"]
    suite_path = args["--suite-path"]
    os_type = args["--os-type"]
    os_version = args["--os-version"]

    set_up_logging(verbose, archive)

    # print the command being ran
    log.debug("Teuthology command: {0}".format(get_teuthology_command(args)))

    if owner is None:
        args["--owner"] = owner = get_user()

    config = setup_config(config)

    if archive is not None and 'archive_path' not in config:
        config['archive_path'] = archive

    write_initial_metadata(archive, config, name, description, owner)
    report.try_push_job_info(config, dict(status='running'))

    machine_type = get_machine_type(machine_type, config)
    args["--machine-type"] = machine_type

    if block:
        assert lock, \
            'the --block option is only supported with the --lock option'

    log.info('\n  '.join([
        'Config:',
    ] + yaml.safe_dump(config, default_flow_style=False).splitlines()))

    args["summary"] = get_summary(owner, description)

    ceph_repo = config.get('repo')
    if ceph_repo:
        teuth_config.ceph_git_url = ceph_repo
    suite_repo = config.get('suite_repo')
    if suite_repo:
        teuth_config.ceph_qa_suite_git_url = suite_repo

    # overwrite the config values of os_{type,version} if corresponding
    # command-line arguments are provided
    if os_type:
        config["os_type"] = os_type
    if os_version:
        config["os_version"] = os_version

    config["tasks"] = validate_tasks(config)

    init_tasks = get_initial_tasks(lock, config, machine_type)

    # prepend init_tasks to the front of the task list
    config['tasks'][:0] = init_tasks

    if suite_path is not None:
        config['suite_path'] = suite_path

    # fetches the tasks and returns a new suite_path if needed
    config["suite_path"] = fetch_tasks_if_needed(config)

    # If the job has a 'use_shaman' key, use that value to override the global
    # config's value.
    if config.get('use_shaman') is not None:
        teuth_config.use_shaman = config['use_shaman']

    # create a FakeNamespace instance that mimics the old argparse way of doing
    # things we do this so we can pass it to run_tasks without porting those
    # tasks to the new way of doing things right now
    args["<config>"] = config
    fake_ctx = FakeNamespace(args)

    # store on global config if interactive-on-error, for contextutil.nested()
    # FIXME this should become more generic, and the keys should use
    # '_' uniformly
    if fake_ctx.config.get('interactive-on-error'):
        teuthology.config.config.ctx = fake_ctx

    try:
        run_tasks(tasks=config['tasks'], ctx=fake_ctx)
    finally:
        # print to stdout the results and possibly send an email on any errors
        report_outcome(config, archive, fake_ctx.summary, fake_ctx)
Example #35
0
def schedule():
    parser = argparse.ArgumentParser(description='Schedule ceph integration tests')
    parser.add_argument(
        'config',
        metavar='CONFFILE',
        nargs='*',
        type=config_file,
        action=MergeConfig,
        default={},
        help='config file to read',
        )
    parser.add_argument(
        '--name',
        help='name of suite run the job is part of',
        )
    parser.add_argument(
        '--last-in-suite',
        action='store_true',
        default=False,
        help='mark the last job in a suite so suite post-processing can be run',
        )
    parser.add_argument(
        '--email',
        help='where to send the results of a suite (only applies to the last job in a suite)',
        )
    parser.add_argument(
        '--timeout',
        help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite',
        type=int,
        )
    parser.add_argument(
        '--description',
        help='job description',
        )
    parser.add_argument(
        '--owner',
        help='job owner',
        )
    parser.add_argument(
        '--delete',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='list of jobs to remove from the queue',
        )
    parser.add_argument(
        '-n', '--num',
        default=1,
        type=int,
        help='number of times to run/queue the job'
        )
    parser.add_argument(
        '-p', '--priority',
        default=1000,
        type=int,
        help='beanstalk priority (lower is sooner)'
        )
    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        default=False,
        help='be more verbose',
        )
    parser.add_argument(
        '-w', '--worker',
        default='plana',
        help='which worker to use (type of machine)',
        )
    parser.add_argument(
        '-s', '--show',
        metavar='JOBID',
        type=int,
        nargs='*',
        help='output the contents of specified jobs in the queue',
        )

    ctx = parser.parse_args()
    if not ctx.last_in_suite:
        assert not ctx.email, '--email is only applicable to the last job in a suite'
        assert not ctx.timeout, '--timeout is only applicable to the last job in a suite'

    from teuthology.misc import read_config, get_user
    if ctx.owner is None:
        ctx.owner = 'scheduled_{user}'.format(user=get_user())
    read_config(ctx)

    import teuthology.queue
    beanstalk = teuthology.queue.connect(ctx)

    tube=ctx.worker
    beanstalk.use(tube)

    if ctx.show:
        for job_id in ctx.show:
            job = beanstalk.peek(job_id)
            if job is None and ctx.verbose:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                print '--- job {jid} priority {prio} ---\n'.format(
                    jid=job_id,
                    prio=job.stats()['pri']), job.body
        return

    if ctx.delete:
        for job_id in ctx.delete:
            job = beanstalk.peek(job_id)
            if job is None:
                print 'job {jid} is not in the queue'.format(jid=job_id)
            else:
                job.delete()
        return

    # strip out targets; the worker will allocate new ones when we run
    # the job with --lock.
    if ctx.config.get('targets'):
        del ctx.config['targets']

    job_config = dict(
            name=ctx.name,
            last_in_suite=ctx.last_in_suite,
            email=ctx.email,
            description=ctx.description,
            owner=ctx.owner,
            verbose=ctx.verbose,
            )
    # Merge job_config and ctx.config
    job_config.update(ctx.config)
    if ctx.timeout is not None:
        job_config['results_timeout'] = ctx.timeout

    job = yaml.safe_dump(job_config)
    num = ctx.num
    while num > 0:
        jid = beanstalk.put(
            job,
            ttr=60*60*24,
            priority=ctx.priority,
            )
        print 'Job scheduled with ID {jid}'.format(jid=jid)
        num -= 1