def run_rest_api_daemon(ctx, api_clients): """ Wrapper starts the rest api daemons """ if not hasattr(ctx, 'daemons'): ctx.daemons = DaemonGroup() remotes = ctx.cluster.only(teuthology.is_type('client')).remotes for rems, roles in remotes.iteritems(): for whole_id_ in roles: if whole_id_ in api_clients: id_ = whole_id_[len('clients'):] run_cmd = [ 'sudo', 'daemon-helper', 'kill', 'ceph-rest-api', '-n', 'client.rest{id}'.format(id=id_), ] cl_rest_id = 'client.rest{id}'.format(id=id_) ctx.daemons.add_daemon(rems, 'restapi', cl_rest_id, args=run_cmd, logger=log.getChild(cl_rest_id), stdin=run.PIPE, wait=False, ) for i in range(1, 12): log.info('testing for ceph-rest-api try {0}'.format(i)) run_cmd = [ 'wget', '-O', '/dev/null', '-q', 'http://localhost:5000/api/v0.1/status' ] proc = rems.run( args=run_cmd, check_status=False ) if proc.exitstatus == 0: break time.sleep(5) if proc.exitstatus != 0: raise RuntimeError('Cannot contact ceph-rest-api') try: yield finally: """ TO DO: destroy daemons started -- modify iter_daemons_of_role """ teuthology.stop_daemons_of_type(ctx, 'restapi')
def run_pykmip(ctx, config): assert isinstance(config, dict) if hasattr(ctx, 'daemons'): pass elif has_ceph_task(ctx.config['tasks']): log.info('Delay start pykmip so ceph can do once-only daemon logic') try: yield finally: pass else: ctx.daemons = DaemonGroup() log.info('Running pykmip...') pykmipdir = get_pykmip_dir(ctx) for (client, _) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, _, client_id = teuthology.split_role(client) # start the public endpoint client_public_with_id = 'pykmip.public' + '.' + client_id run_cmd = 'cd ' + pykmipdir + ' && ' + \ '. .pykmipenv/bin/activate && ' + \ 'HOME={}'.format(pykmipdir) + ' && ' + \ 'exec pykmip-server -f pykmip.conf -l ' + \ pykmipdir + '/pykmip.log & { read; kill %1; }' ctx.daemons.add_daemon( remote, 'pykmip', client_public_with_id, cluster=cluster_name, args=['bash', '-c', run_cmd], logger=log.getChild(client), stdin=run.PIPE, cwd=pykmipdir, wait=False, check_status=False, ) # sleep driven synchronization time.sleep(10) try: yield finally: log.info('Stopping PyKMIP instance') ctx.daemons.get_daemon('pykmip', client_public_with_id, cluster_name).stop()
def __init__(self): self.config = {} self.teuthology_config = teuth_config self.cluster = LocalCluster() self.daemons = DaemonGroup() # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any # tests that want to look these up via ctx can do so. # Inspect ceph.conf to see what roles exist for conf_line in open("ceph.conf").readlines(): for svc_type in ["mon", "osd", "mds", "mgr"]: if svc_type not in self.daemons.daemons: self.daemons.daemons[svc_type] = {} match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line) if match: svc_id = match.group(1) self.daemons.daemons[svc_type][svc_id] = LocalDaemon(svc_type, svc_id)
def initialize_config(ctx, config): cluster_name = config['cluster'] testdir = teuthology.get_testdir(ctx) ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py ctx.ceph[cluster_name].roleless = False # see below first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup( use_cephadm=ctx.cephadm) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] if config.get('roleless', False): # mons will be named after hosts first_mon = None for remote, _ in remotes_and_roles: ctx.cluster.remotes[remote].append('mon.' + remote.shortname) if not first_mon: first_mon = remote.shortname bootstrap_remote = remote log.info('No mon roles; fabricating mons') roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) if config.get('roleless', False): ctx.ceph[cluster_name].roleless = True ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon else: first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] _, _, first_mon = teuthology.split_role(first_mon_role) (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() log.info('First mon is mon.%s on %s' % (first_mon, bootstrap_remote.shortname)) ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = first_mon_role others = ctx.cluster.remotes[bootstrap_remote] mgrs = sorted([r for r in others if teuthology.is_type('mgr', cluster_name)(r)]) if not mgrs: raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) _, _, first_mgr = teuthology.split_role(mgrs[0]) log.info('First mgr is %s' % (first_mgr)) ctx.ceph[cluster_name].first_mgr = first_mgr yield
def task(ctx, config): """ Setup samba smbd with ceph vfs module. This task assumes the samba package has already been installed via the install task. The config is optional and defaults to starting samba on all nodes. If a config is given, it is expected to be a list of samba nodes to start smbd servers on. Example that starts smbd on all samba nodes:: tasks: - install: - install: project: samba extra_packages: ['samba'] - ceph: - samba: - interactive: Example that starts smbd on just one of the samba nodes and cifs on the other:: tasks: - samba: [samba.0] - cifs: [samba.1] An optional backend can be specified, and requires a path which smbd will use as the backend storage location: roles: - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a] - [client.0, samba.0] tasks: - ceph: - ceph-fuse: [client.0] - samba: samba.0: cephfuse: "{testdir}/mnt.0" This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with a UNC of //localhost/cephfuse. Access through that UNC will be on the ceph fuse mount point. If no arguments are specified in the samba role, the default behavior is to enable the ceph UNC //localhost/ceph and use the ceph vfs module as the smbd backend. :param ctx: Context :param config: Configuration """ log.info("Setting up smbd with ceph vfs...") assert config is None or isinstance(config, list) or isinstance(config, dict), \ "task samba got invalid config" if config is None: config = dict( ('samba.{id}'.format(id=id_), None) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')) elif isinstance(config, list): config = dict((name, None) for name in config) samba_servers = list(get_sambas(ctx=ctx, roles=config.keys())) testdir = teuthology.get_testdir(ctx) if not hasattr(ctx, 'daemons'): ctx.daemons = DaemonGroup() for id_, remote in samba_servers: rolestr = "samba.{id_}".format(id_=id_) confextras = """vfs objects = ceph ceph:config_file = /etc/ceph/ceph.conf""" unc = "ceph" backend = "/" if config[rolestr] is not None: # verify that there's just one parameter in role if len(config[rolestr]) != 1: log.error( "samba config for role samba.{id_} must have only one parameter" .format(id_=id_)) raise Exception('invalid config') confextras = "" (unc, backendstr) = config[rolestr].items()[0] backend = backendstr.format(testdir=testdir) # on first samba role, set ownership and permissions of ceph root # so that samba tests succeed if config[rolestr] is None and id_ == samba_servers[0][0]: remote.run(args=[ 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'), 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'), 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'), 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'), 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'), 'rm', '-rf', '/tmp/cmnt', ], ) else: remote.run(args=[ 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'), 'sudo', 'chmod', '1777', backend, ], ) teuthology.sudo_write_file( remote, "/usr/local/samba/etc/smb.conf", """ [global] workgroup = WORKGROUP netbios name = DOMAIN [{unc}] path = {backend} {extras} writeable = yes valid users = ubuntu """.format(extras=confextras, unc=unc, backend=backend)) # create ubuntu user remote.run(args=[ 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu', run.Raw('||'), 'printf', run.Raw('"ubuntu\nubuntu\n"'), run.Raw('|'), 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu' ]) smbd_cmd = [ 'sudo', 'daemon-helper', 'term', 'nostdin', '/usr/local/samba/sbin/smbd', '-F', ] ctx.daemons.add_daemon( remote, 'smbd', id_, args=smbd_cmd, logger=log.getChild("smbd.{id_}".format(id_=id_)), stdin=run.PIPE, wait=False, ) # let smbd initialize, probably a better way... seconds_to_sleep = 100 log.info('Sleeping for %s seconds...' % seconds_to_sleep) time.sleep(seconds_to_sleep) log.info('Sleeping stopped...') try: yield finally: log.info('Stopping smbd processes...') exc_info = (None, None, None) for d in ctx.daemons.iter_daemons_of_role('smbd'): try: d.stop() except (run.CommandFailedError, run.CommandCrashedError, run.ConnectionLostError): exc_info = sys.exc_info() log.exception('Saw exception from %s.%s', d.role, d.id_) if exc_info != (None, None, None): raise exc_info[0], exc_info[1], exc_info[2] for id_, remote in samba_servers: remote.run(args=[ 'sudo', 'rm', '-rf', '/usr/local/samba/etc/smb.conf', '/usr/local/samba/private/*', '/usr/local/samba/var/run/', '/usr/local/samba/var/locks', '/usr/local/samba/var/lock', ], ) # make sure daemons are gone try: remote.run(args=[ 'while', 'sudo', 'killall', '-9', 'smbd', run.Raw(';'), 'do', 'sleep', '1', run.Raw(';'), 'done', ], ) remote.run(args=[ 'sudo', 'lsof', backend, ], check_status=False) remote.run(args=[ 'sudo', 'fuser', '-M', backend, ], check_status=False) except Exception: log.exception("Saw exception") pass
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm) # image ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: sha1 = config.get('sha1') if sha1: ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) with contextutil.nested( lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: ceph_bootstrap(ctx=ctx, config=config), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config ), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')
def task(ctx, config): """ Set up and tear down a Ceph cluster. For example:: tasks: - ceph: - interactive: You can also specify what branch to run:: tasks: - ceph: branch: foo Or a tag:: tasks: - ceph: tag: v0.42.13 Or a sha1:: tasks: - ceph: sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed Or a local source dir:: tasks: - ceph: path: /home/sage/ceph To capture code coverage data, use:: tasks: - ceph: coverage: true To use btrfs, ext4, or xfs on the target's scratch disks, use:: tasks: - ceph: fs: xfs mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1] mount_options: [nobarrier, inode64] Note, this will cause the task to check the /scratch_devs file on each node for available devices. If no such file is found, /dev/sdb will be used. To run some daemons under valgrind, include their names and the tool/args to use in a valgrind section:: tasks: - ceph: valgrind: mds.1: --tool=memcheck osd.1: [--tool=memcheck, --leak-check=no] Those nodes which are using memcheck or valgrind will get checked for bad results. To adjust or modify config options, use:: tasks: - ceph: conf: section: key: value For example:: tasks: - ceph: conf: mds.0: some option: value other key: other value client.0: debug client: 10 debug ms: 1 By default, the cluster log is checked for errors and warnings, and the run marked failed if any appear. You can ignore log entries by giving a list of egrep compatible regexes, i.e.: tasks: - ceph: log-whitelist: ['foo.*bar', 'bad message'] To run multiple ceph clusters, use multiple ceph tasks, and roles with a cluster name prefix, e.g. cluster1.client.0. Roles with no cluster use the default cluster name, 'ceph'. OSDs from separate clusters must be on separate hosts. Clients and non-osd daemons from multiple clusters may be colocated. For each cluster, add an instance of the ceph task with the cluster name specified, e.g.:: roles: - [mon.a, osd.0, osd.1] - [backup.mon.a, backup.osd.0, backup.osd.1] - [client.0, backup.client.0] tasks: - ceph: cluster: ceph - ceph: cluster: backup :param ctx: Context :param config: Configuration """ if config is None: config = {} assert isinstance(config, dict), \ "task ceph only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True ctx.daemons = DaemonGroup() testdir = teuthology.get_testdir(ctx) if config.get('coverage'): coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) log.info('Creating coverage directory...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', coverage_dir, ], wait=False, )) if 'cluster' not in config: config['cluster'] = 'ceph' validate_config(ctx, config) subtasks = [] if first_ceph_cluster: # these tasks handle general log setup and parsing on all hosts, # so they should only be run once subtasks = [ lambda: ceph_log(ctx=ctx, config=None), lambda: valgrind_post(ctx=ctx, config=config), ] subtasks += [ lambda: cluster(ctx=ctx, config=dict( conf=config.get('conf', {}), fs=config.get('fs', None), mkfs_options=config.get('mkfs_options', None), mount_options=config.get('mount_options', None), block_journal=config.get('block_journal', None), tmpfs_journal=config.get('tmpfs_journal', None), log_whitelist=config.get('log-whitelist', []), cpu_profile=set(config.get('cpu_profile', []), ), cluster=config['cluster'], )), lambda: run_daemon(ctx=ctx, config=config, type_='mon'), lambda: crush_setup(ctx=ctx, config=config), lambda: run_daemon(ctx=ctx, config=config, type_='osd'), lambda: cephfs_setup(ctx=ctx, config=config), lambda: run_daemon(ctx=ctx, config=config, type_='mds'), ] with contextutil.nested(*subtasks): try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=dict(cluster=config['cluster'])) first_mon = teuthology.get_first_mon(ctx, config, config['cluster']) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() if not hasattr(ctx, 'managers'): ctx.managers = {} ctx.managers[config['cluster']] = CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager.' + config['cluster']), cluster=config['cluster'], ) yield finally: if config.get('wait-for-scrub', True): osd_scrub_pgs(ctx, config)
def _fix_roles_map(self): ctx = self.ctx # Find rhbuild from config and check for cluster version # In case 4.x, support multiple RGW daemons in single node # else, continue multi_rgw_support = False try: if str(ctx.config.get('redhat', str()).get("rhbuild")).startswith("4"): multi_rgw_support = True except AttributeError: pass if not hasattr(ctx, 'managers'): ctx.managers = {} ctx.daemons = DaemonGroup(use_systemd=True) if not hasattr(ctx, 'new_remote_role'): new_remote_role = dict() ctx.new_remote_role = new_remote_role else: new_remote_role = ctx.new_remote_role for remote, roles in self.ready_cluster.remotes.items(): new_remote_role[remote] = [] generate_osd_list = True rgw_count = 0 for role in roles: cluster, rol, id = misc.split_role(role) if rol.startswith('osd'): if generate_osd_list: # gather osd ids as seen on host out = StringIO() remote.run(args=[ 'ps', '-eaf', run.Raw('|'), 'grep', 'ceph-osd', run.Raw('|'), run.Raw('awk {\'print $13\'}') ], stdout=out) osd_list_all = out.getvalue().split('\n') generate_osd_list = False osd_list = [] for osd_id in osd_list_all: try: if isinstance(int(osd_id), int): osd_list.append(osd_id) except ValueError: # ignore any empty lines as part of output pass id = osd_list.pop() log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id)) ctx.daemons.add_daemon(remote, rol, id) if len(role.split('.')) == 2: osd_role = "{rol}.{id}".format(rol=rol, id=id) else: osd_role = "{c}.{rol}.{id}".format(c=cluster, rol=rol, id=id) new_remote_role[remote].append(osd_role) elif rol.startswith('mon') or rol.startswith( 'mgr') or rol.startswith('mds'): hostname = remote.shortname new_remote_role[remote].append(role) log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id)) ctx.daemons.add_daemon(remote, rol, hostname) elif rol.startswith('rgw'): hostname = remote.shortname new_remote_role[remote].append(role) id_ = "{}.{}".format('rgw', hostname) if multi_rgw_support: id_ = "{}.{}.rgw{}".format('rgw', hostname, rgw_count) rgw_count += 1 log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id_)) ctx.daemons.add_daemon(remote, rol, id_=id_) else: new_remote_role[remote].append(role) self.each_cluster.remotes.update(new_remote_role) (ceph_first_mon, ) = iter( self.ctx.cluster.only( misc.get_first_mon(self.ctx, self.config, self.cluster_name)).remotes.keys()) from tasks.ceph_manager import CephManager ctx.managers['ceph'] = CephManager( ceph_first_mon, ctx=ctx, logger=log.getChild('ceph_manager.' + 'ceph'), )
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True ctx.daemons = DaemonGroup( use_ceph_daemon='{}/ceph-daemon'.format(testdir)) if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] ctx.ceph[cluster_name] = argparse.Namespace() #validate_config(ctx, config) # image ctx.image = config.get('image') ref = None if not ctx.image: sha1 = config.get('sha1') if sha1: ctx.image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch # FIXME when ceph-ci builds all branches if branch in ['master', 'nautilus']: ctx.image = 'ceph/daemon-base:latest-%s-devel' % branch else: ctx.image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.image) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) with contextutil.nested( lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: download_ceph_daemon(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: ceph_bootstrap(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config ), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, ceph_daemon=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py ctx.ceph[cluster_name].roleless = False # see below # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm) # image ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: sha1 = config.get('sha1') if sha1: ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] if config.get('roleless', False): # mons will be named after hosts n = len(roles) roles = [] first_mon = None for remote, _ in remotes_and_roles: roles.append(['mon.' + remote.shortname]) if not first_mon: first_mon = remote.shortname bootstrap_remote = remote log.info('No roles; fabricating mons %s' % roles) ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) if config.get('roleless', False): ctx.ceph[cluster_name].roleless = True ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon else: first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] _, _, first_mon = teuthology.split_role(first_mon_role) (bootstrap_remote, ) = ctx.cluster.only(first_mon_role).remotes.keys() log.info('First mon is mon.%s on %s' % (first_mon, bootstrap_remote.shortname)) ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = first_mon_role others = ctx.cluster.remotes[bootstrap_remote] mgrs = sorted( [r for r in others if teuthology.is_type('mgr', cluster_name)(r)]) if not mgrs: raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) _, _, first_mgr = teuthology.split_role(mgrs[0]) log.info('First mgr is %s' % (first_mgr)) ctx.ceph[cluster_name].first_mgr = first_mgr with contextutil.nested( lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: ceph_bootstrap(ctx=ctx, config=config), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config ), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_rgw(ctx=ctx, config=config), lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), lambda: ceph_monitoring('grafana', ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')