def main(ctx): if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) beanstalk = teuthology.queue.connect(ctx) tube = ctx.worker beanstalk.use(tube) if ctx.show: for job_id in ctx.show: job = beanstalk.peek(job_id) if job is None and ctx.verbose: print 'job {jid} is not in the queue'.format(jid=job_id) else: print '--- job {jid} priority {prio} ---\n'.format( jid=job_id, prio=job.stats()['pri']), job.body return if ctx.delete: for job_id in ctx.delete: job = beanstalk.peek(job_id) if job is None: print 'job {jid} is not in the queue'.format(jid=job_id) else: job.delete() return # strip out targets; the worker will allocate new ones when we run # the job with --lock. if ctx.config.get('targets'): del ctx.config['targets'] job_config = dict( name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, machine_type=ctx.worker, ) # Merge job_config and ctx.config job_config.update(ctx.config) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) num = ctx.num while num > 0: jid = beanstalk.put( job, ttr=60 * 60 * 24, priority=ctx.priority, ) print 'Job scheduled with name {name} and ID {jid}'.format( name=ctx.name, jid=jid) num -= 1
def main(): from gevent import monkey monkey.patch_all() from .orchestra import monkey monkey.patch_all() import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig(level=loglevel, ) from teuthology.misc import read_config read_config(ctx) log.info('\n '.join([ 'targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines())) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() nuke(ctx, log)
def build_config(args): """ Given a dict of arguments, build a job config """ config_paths = args.get('<conf_file>', list()) conf_dict = merge_configs(config_paths) # strip out targets; the worker will allocate new ones when we run # the job with --lock. if 'targets' in conf_dict: del conf_dict['targets'] args['config'] = conf_dict owner = args['--owner'] if owner is None: owner = 'scheduled_{user}'.format(user=get_user()) job_config = dict( name=args['--name'], last_in_suite=args['--last-in-suite'], email=args['--email'], description=args['--description'], owner=owner, verbose=args['--verbose'], machine_type=args['--worker'], tube=args['--worker'], priority=int(args['--priority']), ) # Update the dict we just created, and not the other way around, to let # settings in the yaml override what's passed on the command line. This is # primarily to accommodate jobs with multiple machine types. job_config.update(conf_dict) if args['--timeout'] is not None: job_config['results_timeout'] = args['--timeout'] return job_config
def main(): from gevent import monkey; monkey.patch_all() from .orchestra import monkey; monkey.patch_all() import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) from teuthology.misc import read_config read_config(ctx) log.info('\n '.join(['targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines())) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all)
def main(): from gevent import monkey monkey.patch_all(dns=False) from .orchestra import monkey monkey.patch_all() from teuthology.run import config_file import os import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig(level=loglevel, ) info = {} if ctx.archive: ctx.config = config_file(ctx.archive + '/config.yaml') ifn = os.path.join(ctx.archive, 'info.yaml') if os.path.exists(ifn): with file(ifn, 'r') as fd: info = yaml.load(fd.read()) if not ctx.pid: ctx.pid = info.get('pid') if not ctx.pid: ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n')) if not ctx.owner: ctx.owner = info.get('owner') if not ctx.owner: ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n') ctx.name = info.get('name') from teuthology.misc import read_config read_config(ctx) log.info('\n '.join([ 'targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines())) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() if ctx.pid: if ctx.archive: log.info('Killing teuthology process at pid %d', ctx.pid) os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (ctx.archive, ctx.pid, ctx.pid)) else: import subprocess subprocess.check_call(["kill", "-9", str(ctx.pid)]) nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
def main(args): ctx = FakeNamespace(args) if ctx.verbose: teuthology.log.setLevel(logging.DEBUG) info = {} if ctx.archive: ctx.config = config_file(ctx.archive + '/config.yaml') ifn = os.path.join(ctx.archive, 'info.yaml') if os.path.exists(ifn): with open(ifn, 'r') as fd: info = yaml.safe_load(fd.read()) if not ctx.pid: ctx.pid = info.get('pid') if not ctx.pid: ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n')) if not ctx.owner: ctx.owner = info.get('owner') if not ctx.owner: ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n') if ctx.targets: ctx.config = merge_configs(ctx.targets) if ctx.stale: stale_nodes = find_stale_locks(ctx.owner) targets = dict() for node in stale_nodes: targets[node['name']] = node['ssh_pub_key'] ctx.config = dict(targets=targets) if ctx.stale_openstack: stale_openstack(ctx) return log.info( '\n '.join( ['targets:', ] + yaml.safe_dump( ctx.config['targets'], default_flow_style=False).splitlines())) if ctx.dry_run: log.info("Not actually nuking anything since --dry-run was passed") return if ctx.owner is None: ctx.owner = get_user() if ctx.pid: if ctx.archive: log.info('Killing teuthology process at pid %d', ctx.pid) os.system('grep -q %s /proc/%d/cmdline && sudo kill -9 %d' % ( ctx.archive, ctx.pid, ctx.pid)) else: subprocess.check_call(["kill", "-9", str(ctx.pid)]) nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.noipmi, ctx.keep_logs, not ctx.no_reboot)
def main(): from gevent import monkey; monkey.patch_all(dns=False) from .orchestra import monkey; monkey.patch_all() from teuthology.run import config_file import os import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) info = {} if ctx.archive: ctx.config = config_file(ctx.archive + '/config.yaml') ifn = os.path.join(ctx.archive, 'info.yaml') if os.path.exists(ifn): with file(ifn, 'r') as fd: info = yaml.load(fd.read()) if not ctx.pid: ctx.pid = info.get('pid') if not ctx.pid: ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n')) if not ctx.owner: ctx.owner = info.get('owner') if not ctx.owner: ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n') ctx.name = info.get('name') from teuthology.misc import read_config read_config(ctx) log.info('\n '.join(['targets:', ] + yaml.safe_dump(ctx.config['targets'], default_flow_style=False).splitlines())) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() if ctx.pid: if ctx.archive: log.info('Killing teuthology process at pid %d', ctx.pid) os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % ( ctx.archive, ctx.pid, ctx.pid)) else: import subprocess subprocess.check_call(["kill", "-9", str(ctx.pid)]); nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
def unlock(ctx, name, user=None): if user is None: user = teuthology.get_user() success, _ = send_request('DELETE', _lock_url(ctx) + '/' + name + '?' + \ urllib.urlencode(dict(user=user))) if success: log.debug('unlocked %s', name) else: log.error('failed to unlock %s', name) return success
def lock(ctx, name, user=None): if user is None: user = teuthology.get_user() success, _, _ = ls.send_request('POST', ls._lock_url(ctx) + '/' + name, urllib.urlencode(dict(user=user))) if success: log.debug('locked %s as %s', name, user) else: log.error('failed to lock %s', name) return success
def unlock(ctx, name, user=None): if user is None: user = teuthology.get_user() success, _ , _ = send_request('DELETE', _lock_url(ctx) + '/' + name + '?' + \ urllib.urlencode(dict(user=user))) if success: log.debug('unlocked %s', name) else: log.error('failed to unlock %s', name) return success
def lock_many(ctx, num, user=None): if user is None: user = teuthology.get_user() success, content = send_request('POST', _lock_url(ctx), urllib.urlencode(dict(user=user, num=num))) if success: machines = json.loads(content) log.debug('locked {machines}'.format(machines=', '.join(machines.keys()))) return machines log.warn('Could not lock %d nodes', num) return []
def lock_many(ctx, num, user=None): if user is None: user = teuthology.get_user() success, content = send_request('POST', _lock_url(ctx), urllib.urlencode(dict(user=user, num=num))) if success: machines = json.loads(content) log.debug( 'locked {machines}'.format(machines=', '.join(machines.keys()))) return machines log.warn('Could not lock %d nodes', num) return []
def lock_many(ctx, num, user=None): if user is None: user = teuthology.get_user() success, content, status = send_request('POST', _lock_url(ctx), urllib.urlencode(dict(user=user, num=num))) if success: machines = json.loads(content) log.debug('locked {machines}'.format(machines=', '.join(machines.keys()))) return machines if status == 503: log.error('Insufficient nodes avaiable to lock %d nodes.', num) else: log.error('Could not lock %d nodes, reason: unknown.', num) return []
def main(): from gevent import monkey monkey.patch_all() from .orchestra import monkey monkey.patch_all() from teuthology.run import config_file import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig(level=loglevel) if ctx.archive: ctx.config = config_file(ctx.archive + "/config.yaml") if not ctx.pid: ctx.pid = int(open(ctx.archive + "/pid").read().rstrip("\n")) if not ctx.owner: ctx.owner = open(ctx.archive + "/owner").read().rstrip("\n") from teuthology.misc import read_config read_config(ctx) log.info("\n ".join(["targets:"] + yaml.safe_dump(ctx.config["targets"], default_flow_style=False).splitlines())) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() if ctx.pid: if ctx.archive: import os log.info("Killing teuthology process at pid %d", ctx.pid) os.system("grep -q %s /proc/%d/cmdline && sudo kill %d" % (ctx.archive, ctx.pid, ctx.pid)) else: subprocess.check_call(["kill", "-9", str(ctx.pid)]) nuke(ctx, log, ctx.unlock, ctx.synch_clocks, ctx.reboot_all)
def create_fake_context(job_config, block=False): owner = job_config.get('owner', get_user()) os_version = job_config.get('os_version', None) ctx_args = { 'config': job_config, 'block': block, 'owner': owner, 'archive': job_config['archive_path'], 'machine_type': job_config['machine_type'], 'os_type': job_config.get('os_type', 'ubuntu'), 'os_version': os_version, 'name': job_config['name'], } return FakeNamespace(ctx_args)
def main(args): if (args['--verbose']): teuthology.log.setLevel(logging.DEBUG) ctx = argparse.Namespace() ctx.os_type = args['--os-type'] ctx.os_version = args['--os-version'] nodes = args['<nodes>'] reimage_types = get_reimage_types() statuses = query.get_statuses(nodes) owner = args['--owner'] or get_user() unlocked = [shortname(_['name']) for _ in statuses if not _['locked']] if unlocked: log.error("Some of the nodes are not locked: %s", unlocked) exit(1) improper = [ shortname(_['name']) for _ in statuses if _['locked_by'] != owner ] if improper: log.error("Some of the nodes are not owned by '%s': %s", owner, improper) exit(1) irreimageable = [ shortname(_['name']) for _ in statuses if _['machine_type'] not in reimage_types ] if irreimageable: log.error( "Following nodes cannot be reimaged because theirs machine type " "is not reimageable: %s", irreimageable) exit(1) def reimage_node(ctx, machine_name, machine_type): ops.update_nodes([machine_name], True) reimage(ctx, machine_name, machine_type) ops.update_nodes([machine_name]) log.debug("Node '%s' reimaging is complete", machine_name) with parallel() as p: for node in statuses: log.debug("Start node '%s' reimaging", node['name']) p.spawn(reimage_node, ctx, shortname(node['name']), node['machine_type'])
def lock_one(name, user=None, description=None): name = misc.canonicalize_hostname(name, user=None) if user is None: user = misc.get_user() request = dict(name=name, locked=True, locked_by=user, description=description) uri = os.path.join(config.lock_server, 'nodes', name, 'lock', '') response = requests.put(uri, json.dumps(request)) success = response.ok if success: log.debug('locked %s as %s', name, user) else: try: reason = response.json().get('message') except ValueError: reason = str(response.status_code) log.error('failed to lock {node}. reason: {reason}'.format( node=name, reason=reason)) return response
def schedule(): parser = argparse.ArgumentParser(description='Schedule ceph integration tests') parser.add_argument( 'config', metavar='CONFFILE', nargs='*', type=config_file, action=MergeConfig, default={}, help='config file to read', ) parser.add_argument( '--name', help='name of suite run the job is part of', ) parser.add_argument( '--last-in-suite', action='store_true', default=False, help='mark the last job in a suite so suite post-processing can be run', ) parser.add_argument( '--email', help='where to send the results of a suite (only applies to the last job in a suite)', ) parser.add_argument( '--timeout', help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite', type=int, ) parser.add_argument( '--description', help='job description', ) parser.add_argument( '--owner', help='job owner', ) parser.add_argument( '--delete', metavar='JOBID', type=int, nargs='*', help='list of jobs to remove from the queue', ) parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='be more verbose', ) ctx = parser.parse_args() if not ctx.last_in_suite: assert not ctx.email, '--email is only applicable to the last job in a suite' assert not ctx.timeout, '--timeout is only applicable to the last job in a suite' from teuthology.misc import read_config, get_user if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) import teuthology.queue beanstalk = teuthology.queue.connect(ctx) beanstalk.use('teuthology') if ctx.delete: for jobid in ctx.delete: job = beanstalk.peek(jobid) if job is None: print 'job {jid} is not in the queue'.format(jid=jobid) else: job.delete() return job_config = dict( config=ctx.config, name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, ) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) jid = beanstalk.put(job, ttr=60*60*24) print 'Job scheduled with ID {jid}'.format(jid=jid)
def main(): from gevent import monkey monkey.patch_all(dns=False) from .orchestra import monkey monkey.patch_all() import logging ctx = parse_args() set_up_logging(ctx) log = logging.getLogger(__name__) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() write_initial_metadata(ctx) if 'targets' in ctx.config and 'roles' in ctx.config: targets = len(ctx.config['targets']) roles = len(ctx.config['roles']) assert targets >= roles, \ '%d targets are needed for all roles but found %d listed.' % (roles, targets) machine_type = ctx.machine_type if machine_type is None: fallback_default = ctx.config.get('machine_type', 'plana') machine_type = ctx.config.get('machine-type', fallback_default) if ctx.block: assert ctx.lock, \ 'the --block option is only supported with the --lock option' from teuthology.misc import read_config read_config(ctx) log.debug('\n '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines())) ctx.summary = dict(success=True) ctx.summary['owner'] = ctx.owner if ctx.description is not None: ctx.summary['description'] = ctx.description for task in ctx.config['tasks']: assert 'kernel' not in task, \ 'kernel installation shouldn be a base-level item, not part of the tasks list' init_tasks = [] if ctx.lock: assert 'targets' not in ctx.config, \ 'You cannot specify targets in a config file when using the --lock option' init_tasks.append({'internal.lock_machines': (len(ctx.config['roles']), machine_type)}) init_tasks.extend([ {'internal.save_config': None}, {'internal.check_lock': None}, {'internal.connect': None}, {'internal.check_conflict': None}, {'internal.check_ceph_data': None}, {'internal.vm_setup': None}, ]) if 'kernel' in ctx.config: from teuthology.misc import get_distro distro = get_distro(ctx) if distro == 'ubuntu': init_tasks.append({'kernel': ctx.config['kernel']}) init_tasks.extend([ {'internal.base': None}, {'internal.archive': None}, {'internal.coredump': None}, {'internal.sudo': None}, {'internal.syslog': None}, {'internal.timer': None}, ]) ctx.config['tasks'][:0] = init_tasks from teuthology.run_tasks import run_tasks try: run_tasks(tasks=ctx.config['tasks'], ctx=ctx) finally: if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'): from teuthology.nuke import nuke # only unlock if we locked them in the first place nuke(ctx, log, ctx.lock) if ctx.archive is not None: with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f: yaml.safe_dump(ctx.summary, f, default_flow_style=False) with contextlib.closing(StringIO.StringIO()) as f: yaml.safe_dump(ctx.summary, f) log.info('Summary data:\n%s' % f.getvalue()) with contextlib.closing(StringIO.StringIO()) as f: if 'email-on-error' in ctx.config and not ctx.summary.get('success', False): yaml.safe_dump(ctx.summary, f) yaml.safe_dump(ctx.config, f) emsg = f.getvalue() subject = "Teuthology error -- %s" % ctx.summary['failure_reason'] from teuthology.suite import email_results email_results(subject,"Teuthology",ctx.config['email-on-error'],emsg) if ctx.summary.get('success', True): log.info('pass') else: log.info('FAIL') import sys sys.exit(1)
assert ctx.owner is None, \ '--all and --owner are mutually exclusive' assert not machines, \ '--all and listing specific machines are incompatible' if ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list' if machines: statuses = [get_status(ctx, machine) for machine in machines] else: statuses = list_locks(ctx) if statuses: if not machines and ctx.owner is None and not ctx.all: ctx.owner = teuthology.get_user() if ctx.owner is not None: statuses = [status for status in statuses \ if status['locked_by'] == ctx.owner] if ctx.status is not None: statuses = [status for status in statuses \ if status['up'] == (ctx.status == 'up')] if ctx.list: print json.dumps(statuses, indent=4) else: frag = {'targets': {}} for f in statuses: frag['targets'][f['name']] = f['sshpubkey'] print yaml.safe_dump(frag, default_flow_style=False) else: log.error('error retrieving lock statuses')
def main(ctx): if ctx.verbose: teuthology.log.setLevel(logging.DEBUG) set_config_attr(ctx) ret = 0 user = ctx.owner machines = [ misc.canonicalize_hostname(m, user=False) for m in ctx.machines ] machines_to_update = [] if ctx.targets: try: with open(ctx.targets) as f: g = yaml.safe_load_all(f) for new in g: if 'targets' in new: for t in new['targets'].iterkeys(): machines.append(t) except IOError as e: raise argparse.ArgumentTypeError(str(e)) if ctx.f: assert ctx.lock or ctx.unlock, \ '-f is only supported by --lock and --unlock' if machines: assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \ or ctx.update or ctx.brief, \ 'machines cannot be specified with that operation' else: if ctx.lock: log.error("--lock requires specific machines passed as arguments") else: # This condition might never be hit, but it's not clear. assert ctx.num_to_lock or ctx.list or ctx.list_targets or \ ctx.summary or ctx.brief, \ 'machines must be specified for that operation' if ctx.all: assert ctx.list or ctx.list_targets or ctx.brief, \ '--all can only be used with --list, --list-targets, and --brief' assert ctx.owner is None, \ '--all and --owner are mutually exclusive' assert not machines, \ '--all and listing specific machines are incompatible' if ctx.num_to_lock: assert ctx.machine_type, \ 'must specify machine type to lock' if ctx.brief or ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list/--brief' # we may need to update host keys for vms. Don't do it for # every vm; however, update any vms included in the list given # to the CLI (machines), or any owned by the specified owner or # invoking user if no machines are specified. vmachines = [] statuses = query.get_statuses(machines) owner = ctx.owner or misc.get_user() for machine in statuses: if query.is_vm(status=machine) and machine['locked'] and \ (machines or machine['locked_by'] == owner): vmachines.append(machine['name']) if vmachines: log.info("updating host keys for %s", ' '.join(sorted(vmachines))) keys.do_update_keys(vmachines, _raise=False) # get statuses again to refresh any updated keys statuses = query.get_statuses(machines) if statuses: statuses = util.winnow(statuses, ctx.machine_type, 'machine_type') if not machines and ctx.owner is None and not ctx.all: ctx.owner = misc.get_user() statuses = util.winnow(statuses, ctx.owner, 'locked_by') statuses = util.winnow(statuses, ctx.status, 'up', lambda s: s['up'] == (ctx.status == 'up')) statuses = util.winnow( statuses, ctx.locked, 'locked', lambda s: s['locked'] == (ctx.locked == 'true')) statuses = util.winnow(statuses, ctx.desc, 'description') statuses = util.winnow(statuses, ctx.desc_pattern, 'description', lambda s: s['description'] and \ ctx.desc_pattern in s['description']) if ctx.json_query: statuses = util.json_matching_statuses(ctx.json_query, statuses) statuses = util.winnow(statuses, ctx.os_type, 'os_type') statuses = util.winnow(statuses, ctx.os_version, 'os_version') # When listing, only show the vm_host's name, not every detail for s in statuses: if not query.is_vm(status=s): continue # with an OpenStack API, there is no host for a VM if s['vm_host'] is None: continue vm_host_name = s.get('vm_host', dict())['name'] if vm_host_name: s['vm_host'] = vm_host_name if ctx.list: print json.dumps(statuses, indent=4) elif ctx.brief: for s in sorted(statuses, key=lambda s: s.get('name')): locked = "un" if s['locked'] == 0 else " " mo = re.match('\w+@(\w+?)\..*', s['name']) host = mo.group(1) if mo else s['name'] print '{host} {locked}locked {owner} "{desc}"'.format( locked=locked, host=host, owner=s['locked_by'], desc=s['description']) else: frag = {'targets': {}} for f in statuses: frag['targets'][f['name']] = f['ssh_pub_key'] print yaml.safe_dump(frag, default_flow_style=False) else: log.error('error retrieving lock statuses') ret = 1 elif ctx.summary: do_summary(ctx) return 0 elif ctx.lock: if not util.vps_version_or_type_valid(ctx.machine_type, ctx.os_type, ctx.os_version): log.error('Invalid os-type or version detected -- lock failed') return 1 reimage_types = teuthology.provision.fog.get_types() reimage_machines = list() updatekeys_machines = list() for machine in machines: resp = ops.lock_one(machine, user, ctx.desc) if resp.ok: machine_status = resp.json() machine_type = machine_status['machine_type'] if not resp.ok: ret = 1 if not ctx.f: return ret elif not query.is_vm(machine, machine_status): if machine_type in reimage_types: # Reimage in parallel just below here reimage_machines.append(machine) # Update keys last updatekeys_machines = list() else: machines_to_update.append(machine) teuthology.provision.create_if_vm( ctx, misc.canonicalize_hostname(machine), ) with teuthology.parallel.parallel() as p: for machine in reimage_machines: p.spawn(teuthology.provision.reimage, ctx, machine) for machine in updatekeys_machines: keys.do_update_keys([machine]) elif ctx.unlock: if ctx.owner is None and user is None: user = misc.get_user() # If none of them are vpm, do them all in one shot if not filter(query.is_vm, machines): res = ops.unlock_many(machines, user) return 0 if res else 1 for machine in machines: if not ops.unlock_one(ctx, machine, user): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) elif ctx.num_to_lock: result = ops.lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user, ctx.desc, ctx.os_type, ctx.os_version, ctx.arch) if not result: ret = 1 else: machines_to_update = result.keys() if ctx.machine_type == 'vps': shortnames = ' '.join([ misc.decanonicalize_hostname(name) for name in result.keys() ]) if len(result) < ctx.num_to_lock: log.error("Locking failed.") for machine in result: ops.unlock_one(ctx, machine, user) ret = 1 else: log.info("Successfully Locked:\n%s\n" % shortnames) log.info("Unable to display keys at this time (virtual " + "machines are booting).") log.info( "Please run teuthology-lock --list-targets %s once " + "these machines come up.", shortnames) else: print yaml.safe_dump(dict(targets=result), default_flow_style=False) elif ctx.update: assert ctx.desc is not None or ctx.status is not None, \ 'you must specify description or status to update' assert ctx.owner is None, 'only description and status may be updated' machines_to_update = machines if ctx.desc is not None or ctx.status is not None: for machine in machines_to_update: ops.update_lock(machine, ctx.desc, ctx.status) return ret
def main(): from gevent import monkey; monkey.patch_all() from .orchestra import monkey; monkey.patch_all() import time import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) if ctx.block: assert ctx.lock, \ 'the --block option is only supported with the --lock option' from teuthology.misc import read_config read_config(ctx) if ctx.archive is not None: os.mkdir(ctx.archive) handler = logging.FileHandler( filename=os.path.join(ctx.archive, 'teuthology.log'), ) formatter = logging.Formatter( fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) handler.setFormatter(formatter) logging.getLogger().addHandler(handler) with file(os.path.join(ctx.archive, 'pid'), 'w') as f: f.write('%d' % os.getpid()) log.debug('\n '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines())) ctx.summary = dict(success=True) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() ctx.summary['owner'] = ctx.owner if ctx.description is not None: ctx.summary['description'] = ctx.description for task in ctx.config['tasks']: assert 'kernel' not in task, \ 'kernel installation shouldn be a base-level item, not part of the tasks list' init_tasks = [] if ctx.lock: assert 'targets' not in ctx.config, \ 'You cannot specify targets in a config file when using the --lock option' init_tasks.append({'internal.lock_machines': len(ctx.config['roles'])}) init_tasks.extend([ {'internal.save_config': None}, {'internal.check_lock': None}, {'internal.connect': None}, {'internal.check_conflict': None}, ]) if 'kernel' in ctx.config: init_tasks.append({'kernel': ctx.config['kernel']}) init_tasks.extend([ {'internal.base': None}, {'internal.archive': None}, {'internal.coredump': None}, {'internal.syslog': None}, ]) ctx.config['tasks'][:0] = init_tasks start_time = time.time() from teuthology.run_tasks import run_tasks try: run_tasks(tasks=ctx.config['tasks'], ctx=ctx) finally: end_time = time.time() duration = end_time - start_time ctx.summary['duration'] = duration log.info("Duration was %f seconds" % duration) if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'): from teuthology.parallel import parallel with parallel() as p: for target, hostkey in ctx.config['targets'].iteritems(): p.spawn( nuke, targets={target: hostkey}, owner=ctx.owner, log=log, teuth_config=ctx.teuthology_config, # only unlock if we locked them in the first place should_unlock=ctx.lock, ) if ctx.archive is not None: with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f: yaml.safe_dump(ctx.summary, f, default_flow_style=False)
def main(ctx): if ctx.verbose: teuthology.log.setLevel(logging.DEBUG) set_config_attr(ctx) ret = 0 user = ctx.owner machines = [misc.canonicalize_hostname(m, user=False) for m in ctx.machines] machines_to_update = [] if ctx.targets: try: with file(ctx.targets) as f: g = yaml.safe_load_all(f) for new in g: if 'targets' in new: for t in new['targets'].iterkeys(): machines.append(t) except IOError as e: raise argparse.ArgumentTypeError(str(e)) if ctx.f: assert ctx.lock or ctx.unlock, \ '-f is only supported by --lock and --unlock' if machines: assert ctx.lock or ctx.unlock or ctx.list or ctx.list_targets \ or ctx.update or ctx.brief, \ 'machines cannot be specified with that operation' else: if ctx.lock: log.error("--lock requires specific machines passed as arguments") else: # This condition might never be hit, but it's not clear. assert ctx.num_to_lock or ctx.list or ctx.list_targets or \ ctx.summary or ctx.brief, \ 'machines must be specified for that operation' if ctx.all: assert ctx.list or ctx.list_targets or ctx.brief, \ '--all can only be used with --list, --list-targets, and --brief' assert ctx.owner is None, \ '--all and --owner are mutually exclusive' assert not machines, \ '--all and listing specific machines are incompatible' if ctx.num_to_lock: assert ctx.machine_type, \ 'must specify machine type to lock' if ctx.brief or ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list/--brief' # we may need to update host keys for vms. Don't do it for # every vm; however, update any vms included in the list given # to the CLI (machines), or any owned by the specified owner or # invoking user if no machines are specified. vmachines = [] statuses = query.get_statuses(machines) owner = ctx.owner or misc.get_user() for machine in statuses: if query.is_vm(status=machine) and machine['locked'] and \ (machines or machine['locked_by'] == owner): vmachines.append(machine['name']) if vmachines: log.info("updating host keys for %s", ' '.join(sorted(vmachines))) keys.do_update_keys(vmachines, _raise=False) # get statuses again to refresh any updated keys statuses = query.get_statuses(machines) if statuses: statuses = util.winnow(statuses, ctx.machine_type, 'machine_type') if not machines and ctx.owner is None and not ctx.all: ctx.owner = misc.get_user() statuses = util.winnow(statuses, ctx.owner, 'locked_by') statuses = util.winnow(statuses, ctx.status, 'up', lambda s: s['up'] == (ctx.status == 'up')) statuses = util.winnow(statuses, ctx.locked, 'locked', lambda s: s['locked'] == (ctx.locked == 'true')) statuses = util.winnow(statuses, ctx.desc, 'description') statuses = util.winnow(statuses, ctx.desc_pattern, 'description', lambda s: s['description'] and \ ctx.desc_pattern in s['description']) if ctx.json_query: statuses = util.json_matching_statuses(ctx.json_query, statuses) statuses = util.winnow(statuses, ctx.os_type, 'os_type') statuses = util.winnow(statuses, ctx.os_version, 'os_version') # When listing, only show the vm_host's name, not every detail for s in statuses: if not query.is_vm(status=s): continue # with an OpenStack API, there is no host for a VM if s['vm_host'] is None: continue vm_host_name = s.get('vm_host', dict())['name'] if vm_host_name: s['vm_host'] = vm_host_name if ctx.list: print json.dumps(statuses, indent=4) elif ctx.brief: for s in sorted(statuses, key=lambda s: s.get('name')): locked = "un" if s['locked'] == 0 else " " mo = re.match('\w+@(\w+?)\..*', s['name']) host = mo.group(1) if mo else s['name'] print '{host} {locked}locked {owner} "{desc}"'.format( locked=locked, host=host, owner=s['locked_by'], desc=s['description']) else: frag = {'targets': {}} for f in statuses: frag['targets'][f['name']] = f['ssh_pub_key'] print yaml.safe_dump(frag, default_flow_style=False) else: log.error('error retrieving lock statuses') ret = 1 elif ctx.summary: do_summary(ctx) return 0 elif ctx.lock: if not util.vps_version_or_type_valid( ctx.machine_type, ctx.os_type, ctx.os_version): log.error('Invalid os-type or version detected -- lock failed') return 1 reimage_types = teuthology.provision.fog.get_types() reimage_machines = list() updatekeys_machines = list() for machine in machines: resp = ops.lock_one(machine, user, ctx.desc) if resp.ok: machine_status = resp.json() machine_type = machine_status['machine_type'] if not resp.ok: ret = 1 if not ctx.f: return ret elif not query.is_vm(machine, machine_status): if machine_type in reimage_types: # Reimage in parallel just below here reimage_machines.append(machine) # Update keys last updatekeys_machines = list() else: machines_to_update.append(machine) teuthology.provision.create_if_vm( ctx, misc.canonicalize_hostname(machine), ) with teuthology.parallel.parallel() as p: for machine in reimage_machines: p.spawn(teuthology.provision.reimage, ctx, machine) for machine in updatekeys_machines: keys.do_update_keys([machine]) elif ctx.unlock: if ctx.owner is None and user is None: user = misc.get_user() # If none of them are vpm, do them all in one shot if not filter(query.is_vm, machines): res = ops.unlock_many(machines, user) return 0 if res else 1 for machine in machines: if not ops.unlock_one(ctx, machine, user): ret = 1 if not ctx.f: return ret else: machines_to_update.append(machine) elif ctx.num_to_lock: result = ops.lock_many(ctx, ctx.num_to_lock, ctx.machine_type, user, ctx.desc, ctx.os_type, ctx.os_version, ctx.arch) if not result: ret = 1 else: machines_to_update = result.keys() if ctx.machine_type == 'vps': shortnames = ' '.join( [misc.decanonicalize_hostname(name) for name in result.keys()] ) if len(result) < ctx.num_to_lock: log.error("Locking failed.") for machine in result: ops.unlock_one(ctx, machine, user) ret = 1 else: log.info("Successfully Locked:\n%s\n" % shortnames) log.info( "Unable to display keys at this time (virtual " + "machines are booting).") log.info( "Please run teuthology-lock --list-targets %s once " + "these machines come up.", shortnames) else: print yaml.safe_dump( dict(targets=result), default_flow_style=False) elif ctx.update: assert ctx.desc is not None or ctx.status is not None, \ 'you must specify description or status to update' assert ctx.owner is None, 'only description and status may be updated' machines_to_update = machines if ctx.desc is not None or ctx.status is not None: for machine in machines_to_update: ops.update_lock(machine, ctx.desc, ctx.status) return ret
def schedule(): parser = argparse.ArgumentParser(description='Schedule ceph integration tests') parser.add_argument( 'config', metavar='CONFFILE', nargs='*', type=config_file, action=MergeConfig, default={}, help='config file to read', ) parser.add_argument( '--name', help='name of suite run the job is part of', ) parser.add_argument( '--last-in-suite', action='store_true', default=False, help='mark the last job in a suite so suite post-processing can be run', ) parser.add_argument( '--email', help='where to send the results of a suite (only applies to the last job in a suite)', ) parser.add_argument( '--timeout', help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite', type=int, ) parser.add_argument( '--description', help='job description', ) parser.add_argument( '--owner', help='job owner', ) parser.add_argument( '--delete', metavar='JOBID', type=int, nargs='*', help='list of jobs to remove from the queue', ) parser.add_argument( '-n', '--num', default=1, type=int, help='number of times to run/queue the job' ) parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='be more verbose', ) parser.add_argument( '-b', '--branch', default='master', help='which branch of teuthology to use', ) parser.add_argument( '-s', '--show', metavar='JOBID', type=int, nargs='*', help='output the contents of specified jobs in the queue', ) ctx = parser.parse_args() if not ctx.last_in_suite: assert not ctx.email, '--email is only applicable to the last job in a suite' assert not ctx.timeout, '--timeout is only applicable to the last job in a suite' from teuthology.misc import read_config, get_user if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) import teuthology.queue beanstalk = teuthology.queue.connect(ctx) tube = 'teuthology' if ctx.branch != 'master': tube += '-' + ctx.branch beanstalk.use(tube) if ctx.show: for jobid in ctx.show: job = beanstalk.peek(jobid) if job is None and ctx.verbose: print 'job {jid} is not in the queue'.format(jid=jobid) else: print 'job {jid} contains: '.format(jid=jobid), job.body return if ctx.delete: for jobid in ctx.delete: job = beanstalk.peek(jobid) if job is None: print 'job {jid} is not in the queue'.format(jid=jobid) else: job.delete() return job_config = dict( config=ctx.config, name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, ) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) num = ctx.num while num > 0: jid = beanstalk.put(job, ttr=60*60*24) print 'Job scheduled with ID {jid}'.format(jid=jid) num -= 1
def lock_many(ctx, num, machine_type, user=None, description=None, os_type=None, os_version=None, arch=None): if user is None: user = misc.get_user() if not util.vps_version_or_type_valid(ctx.machine_type, os_type, os_version): log.error('Invalid os-type or version detected -- lock failed') return # In the for loop below we can safely query for all bare-metal machine_type # values at once. So, if we're being asked for 'plana,mira,burnupi', do it # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query # for 'plana,mira,burnupi' and one for 'vps' machine_types_list = misc.get_multi_machine_types(machine_type) if machine_types_list == ['vps']: machine_types = machine_types_list elif machine_types_list == ['openstack']: return lock_many_openstack(ctx, num, machine_type, user=user, description=description, arch=arch) elif 'vps' in machine_types_list: machine_types_non_vps = list(machine_types_list) machine_types_non_vps.remove('vps') machine_types_non_vps = '|'.join(machine_types_non_vps) machine_types = [machine_types_non_vps, 'vps'] else: machine_types_str = '|'.join(machine_types_list) machine_types = [ machine_types_str, ] for machine_type in machine_types: uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '') data = dict( locked_by=user, count=num, machine_type=machine_type, description=description, ) # Only query for os_type/os_version if non-vps and non-libcloud, since # in that case we just create them. vm_types = ['vps'] + teuthology.provision.cloud.get_types() reimage_types = teuthology.provision.get_reimage_types() if machine_type not in vm_types + reimage_types: if os_type: data['os_type'] = os_type if os_version: data['os_version'] = os_version if arch: data['arch'] = arch log.debug("lock_many request: %s", repr(data)) response = requests.post( uri, data=json.dumps(data), headers={'content-type': 'application/json'}, ) if response.ok: machines = { misc.canonicalize_hostname(machine['name']): machine['ssh_pub_key'] for machine in response.json() } log.debug('locked {machines}'.format( machines=', '.join(machines.keys()))) if machine_type in vm_types: ok_machs = {} update_nodes(machines, True) for machine in machines: if teuthology.provision.create_if_vm(ctx, machine): ok_machs[machine] = machines[machine] else: log.error('Unable to create virtual machine: %s', machine) unlock_one(ctx, machine, user) ok_machs = do_update_keys(list(ok_machs.keys()))[1] update_nodes(ok_machs) return ok_machs elif machine_type in reimage_types: reimaged = dict() console_log_conf = dict( logfile_name='{shortname}_reimage.log', remotes=[ teuthology.orchestra.remote.Remote(machine) for machine in machines ], ) with console_log.task(ctx, console_log_conf): update_nodes(reimaged, True) with teuthology.parallel.parallel() as p: for machine in machines: p.spawn(teuthology.provision.reimage, ctx, machine, machine_type) reimaged[machine] = machines[machine] reimaged = do_update_keys(reimaged.keys())[1] update_nodes(reimaged) return reimaged return machines elif response.status_code == 503: log.error('Insufficient nodes available to lock %d %s nodes.', num, machine_type) log.error(response.text) else: log.error('Could not lock %d %s nodes, reason: unknown.', num, machine_type) return []
def lock_many(ctx, num, machine_type, user=None, description=None, os_type=None, os_version=None, arch=None): if user is None: user = misc.get_user() if not util.vps_version_or_type_valid( ctx.machine_type, os_type, os_version ): log.error('Invalid os-type or version detected -- lock failed') return # In the for loop below we can safely query for all bare-metal machine_type # values at once. So, if we're being asked for 'plana,mira,burnupi', do it # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query # for 'plana,mira,burnupi' and one for 'vps' machine_types_list = misc.get_multi_machine_types(machine_type) if machine_types_list == ['vps']: machine_types = machine_types_list elif machine_types_list == ['openstack']: return lock_many_openstack(ctx, num, machine_type, user=user, description=description, arch=arch) elif 'vps' in machine_types_list: machine_types_non_vps = list(machine_types_list) machine_types_non_vps.remove('vps') machine_types_non_vps = '|'.join(machine_types_non_vps) machine_types = [machine_types_non_vps, 'vps'] else: machine_types_str = '|'.join(machine_types_list) machine_types = [machine_types_str, ] for machine_type in machine_types: uri = os.path.join(config.lock_server, 'nodes', 'lock_many', '') data = dict( locked_by=user, count=num, machine_type=machine_type, description=description, ) # Only query for os_type/os_version if non-vps and non-libcloud, since # in that case we just create them. vm_types = ['vps'] + teuthology.provision.cloud.get_types() reimage_types = teuthology.provision.fog.get_types() if machine_type not in vm_types + reimage_types: if os_type: data['os_type'] = os_type if os_version: data['os_version'] = os_version if arch: data['arch'] = arch log.debug("lock_many request: %s", repr(data)) response = requests.post( uri, data=json.dumps(data), headers={'content-type': 'application/json'}, ) if response.ok: machines = {misc.canonicalize_hostname(machine['name']): machine['ssh_pub_key'] for machine in response.json()} log.debug('locked {machines}'.format( machines=', '.join(machines.keys()))) if machine_type in vm_types: ok_machs = {} for machine in machines: if teuthology.provision.create_if_vm(ctx, machine): ok_machs[machine] = machines[machine] else: log.error('Unable to create virtual machine: %s', machine) unlock_one(ctx, machine, user) ok_machs = keys.do_update_keys(ok_machs.keys())[1] return ok_machs elif machine_type in reimage_types: reimaged = dict() console_log_conf = dict( logfile_name='{shortname}_reimage.log', remotes=[teuthology.orchestra.remote.Remote(machine) for machine in machines], ) with console_log.task( ctx, console_log_conf): with teuthology.parallel.parallel() as p: for machine in machines: p.spawn(teuthology.provision.reimage, ctx, machine) reimaged[machine] = machines[machine] reimaged = keys.do_update_keys(reimaged.keys())[1] return reimaged return machines elif response.status_code == 503: log.error('Insufficient nodes available to lock %d %s nodes.', num, machine_type) log.error(response.text) else: log.error('Could not lock %d %s nodes, reason: unknown.', num, machine_type) return []
def test_owner(self): args = self.basic_args args['--owner'] = None job_dict = build_config(self.basic_args) assert job_dict['owner'] == 'scheduled_%s' % get_user()
def main(ctx): if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) beanstalk = teuthology.beanstalk.connect() tube = ctx.worker beanstalk.use(tube) if ctx.show: for job_id in ctx.show: job = beanstalk.peek(job_id) if job is None and ctx.verbose: print 'job {jid} is not in the queue'.format(jid=job_id) else: print '--- job {jid} priority {prio} ---\n'.format( jid=job_id, prio=job.stats()['pri']), job.body return if ctx.delete: for job_id in ctx.delete: job = beanstalk.peek(job_id) if job is None: print 'job {jid} is not in the queue'.format(jid=job_id) else: job.delete() name = yaml.safe_load(job.body).get('name') if name: report.try_delete_jobs(name, job_id) return # strip out targets; the worker will allocate new ones when we run # the job with --lock. if ctx.config.get('targets'): del ctx.config['targets'] job_config = dict( name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, machine_type=ctx.worker, ) # Merge job_config and ctx.config job_config.update(ctx.config) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) num = ctx.num while num > 0: jid = beanstalk.put( job, ttr=60 * 60 * 24, priority=ctx.priority, ) print 'Job scheduled with name {name} and ID {jid}'.format( name=ctx.name, jid=jid) job_config['job_id'] = str(jid) report.try_push_job_info(job_config, dict(status='queued')) num -= 1
def main(): from gevent import monkey; monkey.patch_all(dns=False) from .orchestra import monkey; monkey.patch_all() import logging log = logging.getLogger(__name__) ctx = parse_args() loglevel = logging.INFO if ctx.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) if 'targets' in ctx.config and 'roles' in ctx.config: targets = len(ctx.config['targets']) roles = len(ctx.config['roles']) assert targets >= roles, \ '%d targets are needed for all roles but found %d listed.' % (roles, targets) if ctx.block: assert ctx.lock, \ 'the --block option is only supported with the --lock option' from teuthology.misc import read_config read_config(ctx) log.debug('\n '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines())) ctx.summary = dict(success=True) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() ctx.summary['owner'] = ctx.owner if ctx.description is not None: ctx.summary['description'] = ctx.description if ctx.archive is not None: os.mkdir(ctx.archive) handler = logging.FileHandler( filename=os.path.join(ctx.archive, 'teuthology.log'), ) formatter = logging.Formatter( fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) handler.setFormatter(formatter) logging.getLogger().addHandler(handler) with file(os.path.join(ctx.archive, 'pid'), 'w') as f: f.write('%d' % os.getpid()) with file(os.path.join(ctx.archive, 'owner'), 'w') as f: f.write(ctx.owner + '\n') with file(os.path.join(ctx.archive, 'orig.config.yaml'), 'w') as f: yaml.safe_dump(ctx.config, f, default_flow_style=False) for task in ctx.config['tasks']: assert 'kernel' not in task, \ 'kernel installation shouldn be a base-level item, not part of the tasks list' init_tasks = [] if ctx.lock: assert 'targets' not in ctx.config, \ 'You cannot specify targets in a config file when using the --lock option' init_tasks.append({'internal.lock_machines': len(ctx.config['roles'])}) init_tasks.extend([ {'internal.save_config': None}, {'internal.check_lock': None}, {'internal.connect': None}, {'internal.check_conflict': None}, ]) if 'kernel' in ctx.config: init_tasks.append({'kernel': ctx.config['kernel']}) init_tasks.extend([ {'internal.base': None}, {'internal.archive': None}, {'internal.coredump': None}, {'internal.syslog': None}, {'internal.timer': None}, ]) ctx.config['tasks'][:0] = init_tasks from teuthology.run_tasks import run_tasks try: run_tasks(tasks=ctx.config['tasks'], ctx=ctx) finally: if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'): from teuthology.nuke import nuke # only unlock if we locked them in the first place nuke(ctx, log, ctx.lock) if ctx.archive is not None: with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f: yaml.safe_dump(ctx.summary, f, default_flow_style=False) if not ctx.summary.get('success', True): import sys sys.exit(1)
def main(): from gevent import monkey monkey.patch_all(dns=False) from .orchestra import monkey monkey.patch_all() import logging ctx = parse_args() set_up_logging(ctx) log = logging.getLogger(__name__) if ctx.owner is None: from teuthology.misc import get_user ctx.owner = get_user() write_initial_metadata(ctx) if 'targets' in ctx.config and 'roles' in ctx.config: targets = len(ctx.config['targets']) roles = len(ctx.config['roles']) assert targets >= roles, \ '%d targets are needed for all roles but found %d listed.' % (roles, targets) machine_type = ctx.machine_type if machine_type is None: fallback_default = ctx.config.get('machine_type', 'plana') machine_type = ctx.config.get('machine-type', fallback_default) if ctx.block: assert ctx.lock, \ 'the --block option is only supported with the --lock option' from teuthology.misc import read_config read_config(ctx) log.debug('\n '.join([ 'Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines())) ctx.summary = dict(success=True) ctx.summary['owner'] = ctx.owner if ctx.description is not None: ctx.summary['description'] = ctx.description for task in ctx.config['tasks']: assert 'kernel' not in task, \ 'kernel installation shouldn be a base-level item, not part of the tasks list' init_tasks = [] if ctx.lock: assert 'targets' not in ctx.config, \ 'You cannot specify targets in a config file when using the --lock option' init_tasks.append({ 'internal.lock_machines': (len(ctx.config['roles']), machine_type) }) init_tasks.extend([ { 'internal.save_config': None }, { 'internal.check_lock': None }, { 'internal.connect': None }, { 'internal.check_conflict': None }, { 'internal.check_ceph_data': None }, { 'internal.vm_setup': None }, ]) if 'kernel' in ctx.config: from teuthology.misc import get_distro distro = get_distro(ctx) if distro == 'ubuntu': init_tasks.append({'kernel': ctx.config['kernel']}) init_tasks.extend([ { 'internal.base': None }, { 'internal.archive': None }, { 'internal.coredump': None }, { 'internal.sudo': None }, { 'internal.syslog': None }, { 'internal.timer': None }, ]) ctx.config['tasks'][:0] = init_tasks from teuthology.run_tasks import run_tasks try: run_tasks(tasks=ctx.config['tasks'], ctx=ctx) finally: if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'): from teuthology.nuke import nuke # only unlock if we locked them in the first place nuke(ctx, log, ctx.lock) if ctx.archive is not None: with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f: yaml.safe_dump(ctx.summary, f, default_flow_style=False) with contextlib.closing(StringIO.StringIO()) as f: yaml.safe_dump(ctx.summary, f) log.info('Summary data:\n%s' % f.getvalue()) with contextlib.closing(StringIO.StringIO()) as f: if 'email-on-error' in ctx.config and not ctx.summary.get( 'success', False): yaml.safe_dump(ctx.summary, f) yaml.safe_dump(ctx.config, f) emsg = f.getvalue() subject = "Teuthology error -- %s" % ctx.summary[ 'failure_reason'] from teuthology.suite import email_results email_results(subject, "Teuthology", ctx.config['email-on-error'], emsg) if ctx.summary.get('success', True): log.info('pass') else: log.info('FAIL') import sys sys.exit(1)
'--all and listing specific machines are incompatible' if ctx.brief: assert ctx.list, '--brief only applies to --list' if ctx.list or ctx.list_targets: assert ctx.desc is None, '--desc does nothing with --list' if machines: statuses = [get_status(ctx, machine) for machine in machines] else: statuses = list_locks(ctx) if statuses: if not machines and ctx.owner is None and not ctx.all: ctx.owner = teuthology.get_user() if ctx.owner is not None: statuses = [status for status in statuses \ if status['locked_by'] == ctx.owner] if ctx.status is not None: statuses = [status for status in statuses \ if status['up'] == (ctx.status == 'up')] if ctx.locked is not None: statuses = [status for status in statuses \ if status['locked'] == (ctx.locked == 'true')] if ctx.list: if ctx.brief: for s in statuses: locked = "un" if s['locked'] == 0 else " " mo = re.match('\w+@(\w+?)\..*', s['name']) host = mo.group(1) if mo else s['name']
def schedule(): parser = argparse.ArgumentParser( description='Schedule ceph integration tests') parser.add_argument( 'config', metavar='CONFFILE', nargs='*', type=config_file, action=MergeConfig, default={}, help='config file to read', ) parser.add_argument( '--name', help='name of suite run the job is part of', ) parser.add_argument( '--last-in-suite', action='store_true', default=False, help='mark the last job in a suite so suite post-processing can be run', ) parser.add_argument( '--email', help= 'where to send the results of a suite (only applies to the last job in a suite)', ) parser.add_argument( '--timeout', help= 'how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite', type=int, ) parser.add_argument( '--description', help='job description', ) parser.add_argument( '--owner', help='job owner', ) parser.add_argument( '--delete', metavar='JOBID', type=int, nargs='*', help='list of jobs to remove from the queue', ) parser.add_argument('-n', '--num', default=1, type=int, help='number of times to run/queue the job') parser.add_argument('-p', '--priority', default=1000, type=int, help='beanstalk priority (lower is sooner)') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='be more verbose', ) parser.add_argument( '-w', '--worker', default='plana', help='which worker to use (type of machine)', ) parser.add_argument( '-s', '--show', metavar='JOBID', type=int, nargs='*', help='output the contents of specified jobs in the queue', ) ctx = parser.parse_args() if not ctx.last_in_suite: assert not ctx.email, '--email is only applicable to the last job in a suite' assert not ctx.timeout, '--timeout is only applicable to the last job in a suite' from teuthology.misc import read_config, get_user if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) import teuthology.queue beanstalk = teuthology.queue.connect(ctx) tube = ctx.worker beanstalk.use(tube) if ctx.show: for job_id in ctx.show: job = beanstalk.peek(job_id) if job is None and ctx.verbose: print 'job {jid} is not in the queue'.format(jid=job_id) else: print '--- job {jid} priority {prio} ---\n'.format( jid=job_id, prio=job.stats()['pri']), job.body return if ctx.delete: for job_id in ctx.delete: job = beanstalk.peek(job_id) if job is None: print 'job {jid} is not in the queue'.format(jid=job_id) else: job.delete() return # strip out targets; the worker will allocate new ones when we run # the job with --lock. if ctx.config.get('targets'): del ctx.config['targets'] job_config = dict( name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, ) # Merge job_config and ctx.config job_config.update(ctx.config) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) num = ctx.num while num > 0: jid = beanstalk.put( job, ttr=60 * 60 * 24, priority=ctx.priority, ) print 'Job scheduled with ID {jid}'.format(jid=jid) num -= 1
def main(args): verbose = args["--verbose"] archive = args["--archive"] owner = args["--owner"] config = args["<config>"] name = args["--name"] description = args["--description"] machine_type = args["--machine-type"] block = args["--block"] lock = args["--lock"] suite_path = args["--suite-path"] os_type = args["--os-type"] os_version = args["--os-version"] set_up_logging(verbose, archive) # print the command being ran log.debug("Teuthology command: {0}".format(get_teuthology_command(args))) if owner is None: args["--owner"] = owner = get_user() config = setup_config(config) if archive is not None and 'archive_path' not in config: config['archive_path'] = archive write_initial_metadata(archive, config, name, description, owner) report.try_push_job_info(config, dict(status='running')) machine_type = get_machine_type(machine_type, config) args["--machine-type"] = machine_type if block: assert lock, \ 'the --block option is only supported with the --lock option' log.info('\n '.join([ 'Config:', ] + yaml.safe_dump(config, default_flow_style=False).splitlines())) args["summary"] = get_summary(owner, description) ceph_repo = config.get('repo') if ceph_repo: teuth_config.ceph_git_url = ceph_repo suite_repo = config.get('suite_repo') if suite_repo: teuth_config.ceph_qa_suite_git_url = suite_repo # overwrite the config values of os_{type,version} if corresponding # command-line arguments are provided if os_type: config["os_type"] = os_type if os_version: config["os_version"] = os_version config["tasks"] = validate_tasks(config) init_tasks = get_initial_tasks(lock, config, machine_type) # prepend init_tasks to the front of the task list config['tasks'][:0] = init_tasks if suite_path is not None: config['suite_path'] = suite_path # fetches the tasks and returns a new suite_path if needed config["suite_path"] = fetch_tasks_if_needed(config) # If the job has a 'use_shaman' key, use that value to override the global # config's value. if config.get('use_shaman') is not None: teuth_config.use_shaman = config['use_shaman'] # create a FakeNamespace instance that mimics the old argparse way of doing # things we do this so we can pass it to run_tasks without porting those # tasks to the new way of doing things right now args["<config>"] = config fake_ctx = FakeNamespace(args) # store on global config if interactive-on-error, for contextutil.nested() # FIXME this should become more generic, and the keys should use # '_' uniformly if fake_ctx.config.get('interactive-on-error'): teuthology.config.config.ctx = fake_ctx try: run_tasks(tasks=config['tasks'], ctx=fake_ctx) finally: # print to stdout the results and possibly send an email on any errors report_outcome(config, archive, fake_ctx.summary, fake_ctx)
def schedule(): parser = argparse.ArgumentParser(description='Schedule ceph integration tests') parser.add_argument( 'config', metavar='CONFFILE', nargs='*', type=config_file, action=MergeConfig, default={}, help='config file to read', ) parser.add_argument( '--name', help='name of suite run the job is part of', ) parser.add_argument( '--last-in-suite', action='store_true', default=False, help='mark the last job in a suite so suite post-processing can be run', ) parser.add_argument( '--email', help='where to send the results of a suite (only applies to the last job in a suite)', ) parser.add_argument( '--timeout', help='how many seconds to wait for jobs to finish before emailing results (only applies to the last job in a suite', type=int, ) parser.add_argument( '--description', help='job description', ) parser.add_argument( '--owner', help='job owner', ) parser.add_argument( '--delete', metavar='JOBID', type=int, nargs='*', help='list of jobs to remove from the queue', ) parser.add_argument( '-n', '--num', default=1, type=int, help='number of times to run/queue the job' ) parser.add_argument( '-p', '--priority', default=1000, type=int, help='beanstalk priority (lower is sooner)' ) parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='be more verbose', ) parser.add_argument( '-w', '--worker', default='plana', help='which worker to use (type of machine)', ) parser.add_argument( '-s', '--show', metavar='JOBID', type=int, nargs='*', help='output the contents of specified jobs in the queue', ) ctx = parser.parse_args() if not ctx.last_in_suite: assert not ctx.email, '--email is only applicable to the last job in a suite' assert not ctx.timeout, '--timeout is only applicable to the last job in a suite' from teuthology.misc import read_config, get_user if ctx.owner is None: ctx.owner = 'scheduled_{user}'.format(user=get_user()) read_config(ctx) import teuthology.queue beanstalk = teuthology.queue.connect(ctx) tube=ctx.worker beanstalk.use(tube) if ctx.show: for job_id in ctx.show: job = beanstalk.peek(job_id) if job is None and ctx.verbose: print 'job {jid} is not in the queue'.format(jid=job_id) else: print '--- job {jid} priority {prio} ---\n'.format( jid=job_id, prio=job.stats()['pri']), job.body return if ctx.delete: for job_id in ctx.delete: job = beanstalk.peek(job_id) if job is None: print 'job {jid} is not in the queue'.format(jid=job_id) else: job.delete() return # strip out targets; the worker will allocate new ones when we run # the job with --lock. if ctx.config.get('targets'): del ctx.config['targets'] job_config = dict( name=ctx.name, last_in_suite=ctx.last_in_suite, email=ctx.email, description=ctx.description, owner=ctx.owner, verbose=ctx.verbose, ) # Merge job_config and ctx.config job_config.update(ctx.config) if ctx.timeout is not None: job_config['results_timeout'] = ctx.timeout job = yaml.safe_dump(job_config) num = ctx.num while num > 0: jid = beanstalk.put( job, ttr=60*60*24, priority=ctx.priority, ) print 'Job scheduled with ID {jid}'.format(jid=jid) num -= 1