def test_standby_for_invalid_fscid(self): # Set invalid standby_fscid with other mds standby_rank # stopping active mds service should not end up in mon crash # Get configured mons in the cluster first_mon = teuthology.get_first_mon(self.ctx, self.configs_set) (mon, ) = self.ctx.cluster.only(first_mon).remotes.iterkeys() manager = CephManager( mon, ctx=self.ctx, logger=log.getChild('ceph_manager'), ) configured_mons = manager.get_mon_quorum() use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) mds_a, mds_b, mds_c = use_daemons log.info("Using MDS daemons: {0}".format(use_daemons)) def set_standby_for_rank(leader_rank, follower_id): self.set_conf("mds.{0}".format(follower_id), "mds_standby_for_rank", leader_rank) # Create one fs fs_a = self.mds_cluster.newfs("cephfs") # Set all the daemons to have a rank assignment but no other # standby preferences. set_standby_for_rank(0, mds_a) set_standby_for_rank(0, mds_b) # Set third daemon to have invalid fscid assignment and no other # standby preferences invalid_fscid = 123 self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid) #Restart all the daemons to make the standby preference applied self.mds_cluster.mds_restart(mds_a) self.mds_cluster.mds_restart(mds_b) self.mds_cluster.mds_restart(mds_c) self.wait_for_daemon_start([mds_a, mds_b, mds_c]) #Stop active mds daemon service of fs if (fs_a.get_active_names(), [mds_a]): self.mds_cluster.mds_stop(mds_a) self.mds_cluster.mds_fail(mds_a) fs_a.wait_for_daemons() else: self.mds_cluster.mds_stop(mds_b) self.mds_cluster.mds_fail(mds_b) fs_a.wait_for_daemons() #Get active mons from cluster active_mons = manager.get_mon_quorum() #Check for active quorum mon status and configured mon status self.assertEqual( active_mons, configured_mons, "Not all mons are in quorum Invalid standby invalid fscid test failed!" )
def test_standby_for_invalid_fscid(self): # Set invalid standby_fscid with other mds standby_rank # stopping active mds service should not end up in mon crash # Get configured mons in the cluster first_mon = teuthology.get_first_mon(self.ctx, self.configs_set) (mon,) = self.ctx.cluster.only(first_mon).remotes.iterkeys() manager = CephManager( mon, ctx=self.ctx, logger=log.getChild('ceph_manager'), ) configured_mons = manager.get_mon_quorum() use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) mds_a, mds_b, mds_c = use_daemons log.info("Using MDS daemons: {0}".format(use_daemons)) def set_standby_for_rank(leader_rank, follower_id): self.set_conf("mds.{0}".format(follower_id), "mds_standby_for_rank", leader_rank) # Create one fs fs_a = self.mds_cluster.newfs("cephfs") # Set all the daemons to have a rank assignment but no other # standby preferences. set_standby_for_rank(0, mds_a) set_standby_for_rank(0, mds_b) # Set third daemon to have invalid fscid assignment and no other # standby preferences invalid_fscid = 123 self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid) #Restart all the daemons to make the standby preference applied self.mds_cluster.mds_restart(mds_a) self.mds_cluster.mds_restart(mds_b) self.mds_cluster.mds_restart(mds_c) self.wait_for_daemon_start([mds_a, mds_b, mds_c]) #Stop active mds daemon service of fs if (fs_a.get_active_names(), [mds_a]): self.mds_cluster.mds_stop(mds_a) self.mds_cluster.mds_fail(mds_a) fs_a.wait_for_daemons() else: self.mds_cluster.mds_stop(mds_b) self.mds_cluster.mds_fail(mds_b) fs_a.wait_for_daemons() #Get active mons from cluster active_mons = manager.get_mon_quorum() #Check for active quorum mon status and configured mon status self.assertEqual(active_mons, configured_mons, "Not all mons are in quorum Invalid standby invalid fscid test failed!")
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] if cluster_name not in ctx.ceph: ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].bootstrapped = False # image if not hasattr(ctx.ceph[cluster_name], 'image'): ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: sha1 = config.get('sha1') if sha1: ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) with contextutil.nested( #if the cluster is already bootstrapped bypass corresponding methods lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else initialize_config(ctx=ctx, config=config), lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else ceph_bootstrap(ctx=ctx, config=config), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_rgw(ctx=ctx, config=config), lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), lambda: ceph_monitoring('grafana', ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')
def task(ctx, config): """ Deploy ceph cluster using cephadm Setup containers' mirrors before the bootstrap, if corresponding config provided in teuthology server config yaml file. For example, teuthology.yaml can contain the 'defaults' section: defaults: cephadm: containers: registry_mirrors: docker.io: 'registry.mirror.example.com:5000' image: 'quay.io/ceph-ci/ceph' Using overrides makes it possible to customize it per run. The equivalent 'overrides' section looks like: overrides: cephadm: containers: registry_mirrors: docker.io: 'registry.mirror.example.com:5000' image: 'quay.io/ceph-ci/ceph' :param ctx: the argparse.Namespace object :param config: the config dict """ if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) teuthology.deep_merge(config, overrides.get('cephadm', {})) log.info('Config: ' + str(config)) # set up cluster context if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] if cluster_name not in ctx.ceph: ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].bootstrapped = False # image teuth_defaults = teuth_config.get('defaults', {}) cephadm_defaults = teuth_defaults.get('cephadm', {}) containers_defaults = cephadm_defaults.get('containers', {}) mirrors_defaults = containers_defaults.get('registry_mirrors', {}) container_registry_mirror = mirrors_defaults.get('docker.io', None) container_image_name = containers_defaults.get('image', None) containers = config.get('containers', {}) mirrors = containers.get('registry_mirrors', {}) container_image_name = containers.get('image', container_image_name) container_registry_mirror = mirrors.get('docker.io', container_registry_mirror) if not hasattr(ctx.ceph[cluster_name], 'image'): ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: if not container_image_name: raise Exception( "Configuration error occurred. " "The 'image' value is undefined for 'cephadm' task. " "Please provide corresponding options in the task's " "config, task 'overrides', or teuthology 'defaults' " "section.") sha1 = config.get('sha1') flavor = config.get('flavor', 'default') if sha1: if flavor == "crimson": ctx.ceph[ cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor else: ctx.ceph[ cluster_name].image = container_image_name + ':' + sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = container_image_name + ':' + branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) with contextutil.nested( #if the cluster is already bootstrapped bypass corresponding methods lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else initialize_config(ctx=ctx, config=config), lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ else ceph_bootstrap(ctx, config, container_registry_mirror), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_rgw(ctx=ctx, config=config), lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), lambda: ceph_monitoring('grafana', ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')
def task(ctx, config): """ Deploy rook-ceph cluster tasks: - kubeadm: - rook: branch: wip-foo spec: mon: count: 1 The spec item is deep-merged against the cluster.yaml. The branch, sha1, or image items are used to determine the Ceph container image. """ if not config: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" log.info('Rook start') overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) teuthology.deep_merge(config, overrides.get('rook', {})) log.info('Config: ' + str(config)) # set up cluster context if not hasattr(ctx, 'rook'): ctx.rook = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] if cluster_name not in ctx.rook: ctx.rook[cluster_name] = argparse.Namespace() ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0] # image teuth_defaults = teuth_config.get('defaults', {}) cephadm_defaults = teuth_defaults.get('cephadm', {}) containers_defaults = cephadm_defaults.get('containers', {}) container_image_name = containers_defaults.get('image', None) if 'image' in config: ctx.rook[cluster_name].image = config.get('image') else: sha1 = config.get('sha1') flavor = config.get('flavor', 'default') if sha1: if flavor == "crimson": ctx.rook[ cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor else: ctx.rook[ cluster_name].image = container_image_name + ':' + sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ctx.rook[cluster_name].image = container_image_name + ':' + branch log.info('Ceph image is %s' % ctx.rook[cluster_name].image) with contextutil.nested( lambda: rook_operator(ctx, config), lambda: ceph_log(ctx, config), lambda: rook_cluster(ctx, config), lambda: rook_toolbox(ctx, config), lambda: wait_for_orch(ctx, config), lambda: rook_post_config(ctx, config), lambda: wait_for_osds(ctx, config), lambda: ceph_config_keyring(ctx, config), lambda: ceph_clients(ctx, config), ): if not hasattr(ctx, 'managers'): ctx.managers = {} ctx.managers[cluster_name] = CephManager( ctx.rook[cluster_name].remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, rook=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Rook complete, yielding') yield finally: to_remove = [] ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) if ret.exitstatus == 0: r = json.loads(ret.stdout.getvalue().decode('utf-8')) for service in r: if service['service_type'] in [ 'rgw', 'mds', 'nfs', 'rbd-mirror' ]: _shell(ctx, config, ['ceph', 'orch', 'rm', service['service_name']]) to_remove.append(service['service_name']) with safe_while( sleep=10, tries=90, action="waiting for service removal") as proceed: while proceed(): ret = _shell(ctx, config, ['ceph', 'orch', 'ls', '-f', 'json'], stdout=BytesIO()) if ret.exitstatus == 0: r = json.loads( ret.stdout.getvalue().decode('utf-8')) still_up = [ service['service_name'] for service in r ] matches = set(still_up).intersection(to_remove) if not matches: break log.info('Tearing down rook')
def task(ctx, config): """ Deploy rook-ceph cluster tasks: - kubeadm: - rook: branch: wip-foo spec: mon: count: 1 The spec item is deep-merged against the cluster.yaml. The branch, sha1, or image items are used to determine the Ceph container image. """ if not config: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" log.info('Rook start') overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) teuthology.deep_merge(config, overrides.get('rook', {})) log.info('Config: ' + str(config)) # set up cluster context if not hasattr(ctx, 'rook'): ctx.rook = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] if cluster_name not in ctx.rook: ctx.rook[cluster_name] = argparse.Namespace() ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0] # image teuth_defaults = teuth_config.get('defaults', {}) cephadm_defaults = teuth_defaults.get('cephadm', {}) containers_defaults = cephadm_defaults.get('containers', {}) container_image_name = containers_defaults.get('image', None) if 'image' in config: ctx.rook[cluster_name].image = config.get('image') else: sha1 = config.get('sha1') flavor = config.get('flavor', 'default') if sha1: if flavor == "crimson": ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor else: ctx.rook[cluster_name].image = container_image_name + ':' + sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ctx.rook[cluster_name].image = container_image_name + ':' + branch log.info('Ceph image is %s' % ctx.rook[cluster_name].image) with contextutil.nested( lambda: rook_operator(ctx, config), lambda: ceph_log(ctx, config), lambda: rook_cluster(ctx, config), lambda: rook_toolbox(ctx, config), lambda: wait_for_osds(ctx, config), lambda: ceph_config_keyring(ctx, config), lambda: ceph_clients(ctx, config), ): if not hasattr(ctx, 'managers'): ctx.managers = {} ctx.managers[cluster_name] = CephManager( ctx.rook[cluster_name].remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, rook=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Rook complete, yielding') yield finally: log.info('Tearing down rook')
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm) # image ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: sha1 = config.get('sha1') if sha1: ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) with contextutil.nested( lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: ceph_bootstrap(ctx=ctx, config=config), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_rgw(ctx=ctx, config=config), lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), lambda: ceph_monitoring('grafana', ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')
def _fix_roles_map(self): ctx = self.ctx # Find rhbuild from config and check for cluster version # In case 4.x, support multiple RGW daemons in single node # else, continue multi_rgw_support = False try: if str(ctx.config.get('redhat', str()).get("rhbuild")).startswith("4"): multi_rgw_support = True except AttributeError: pass if not hasattr(ctx, 'managers'): ctx.managers = {} ctx.daemons = DaemonGroup(use_systemd=True) if not hasattr(ctx, 'new_remote_role'): new_remote_role = dict() ctx.new_remote_role = new_remote_role else: new_remote_role = ctx.new_remote_role for remote, roles in self.ready_cluster.remotes.items(): new_remote_role[remote] = [] generate_osd_list = True rgw_count = 0 for role in roles: cluster, rol, id = misc.split_role(role) if rol.startswith('osd'): if generate_osd_list: # gather osd ids as seen on host out = StringIO() remote.run(args=[ 'ps', '-eaf', run.Raw('|'), 'grep', 'ceph-osd', run.Raw('|'), run.Raw('awk {\'print $13\'}') ], stdout=out) osd_list_all = out.getvalue().split('\n') generate_osd_list = False osd_list = [] for osd_id in osd_list_all: try: if isinstance(int(osd_id), int): osd_list.append(osd_id) except ValueError: # ignore any empty lines as part of output pass id = osd_list.pop() log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id)) ctx.daemons.add_daemon(remote, rol, id) if len(role.split('.')) == 2: osd_role = "{rol}.{id}".format(rol=rol, id=id) else: osd_role = "{c}.{rol}.{id}".format(c=cluster, rol=rol, id=id) new_remote_role[remote].append(osd_role) elif rol.startswith('mon') or rol.startswith( 'mgr') or rol.startswith('mds'): hostname = remote.shortname new_remote_role[remote].append(role) log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id)) ctx.daemons.add_daemon(remote, rol, hostname) elif rol.startswith('rgw'): hostname = remote.shortname new_remote_role[remote].append(role) id_ = "{}.{}".format('rgw', hostname) if multi_rgw_support: id_ = "{}.{}.rgw{}".format('rgw', hostname, rgw_count) rgw_count += 1 log.info("Registering Daemon {rol} {id}".format(rol=rol, id=id_)) ctx.daemons.add_daemon(remote, rol, id_=id_) else: new_remote_role[remote].append(role) self.each_cluster.remotes.update(new_remote_role) (ceph_first_mon, ) = iter( self.ctx.cluster.only( misc.get_first_mon(self.ctx, self.config, self.cluster_name)).remotes.keys()) from tasks.ceph_manager import CephManager ctx.managers['ceph'] = CephManager( ceph_first_mon, ctx=ctx, logger=log.getChild('ceph_manager.' + 'ceph'), )
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph', {})) log.info('Config: ' + str(config)) testdir = teuthology.get_testdir(ctx) # set up cluster context first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.managers = {} if 'cluster' not in config: config['cluster'] = 'ceph' cluster_name = config['cluster'] ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py ctx.ceph[cluster_name].roleless = False # see below # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm) # image ctx.ceph[cluster_name].image = config.get('image') ref = None if not ctx.ceph[cluster_name].image: sha1 = config.get('sha1') if sha1: ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1 ref = sha1 else: # hmm, fall back to branch? branch = config.get('branch', 'master') ref = branch ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] if config.get('roleless', False): # mons will be named after hosts n = len(roles) roles = [] first_mon = None for remote, _ in remotes_and_roles: roles.append(['mon.' + remote.shortname]) if not first_mon: first_mon = remote.shortname bootstrap_remote = remote log.info('No roles; fabricating mons %s' % roles) ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) if config.get('roleless', False): ctx.ceph[cluster_name].roleless = True ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon else: first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] _, _, first_mon = teuthology.split_role(first_mon_role) (bootstrap_remote, ) = ctx.cluster.only(first_mon_role).remotes.keys() log.info('First mon is mon.%s on %s' % (first_mon, bootstrap_remote.shortname)) ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = first_mon_role others = ctx.cluster.remotes[bootstrap_remote] mgrs = sorted( [r for r in others if teuthology.is_type('mgr', cluster_name)(r)]) if not mgrs: raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) _, _, first_mgr = teuthology.split_role(mgrs[0]) log.info('First mgr is %s' % (first_mgr)) ctx.ceph[cluster_name].first_mgr = first_mgr with contextutil.nested( lambda: ceph_initial(), lambda: normalize_hostnames(ctx=ctx), lambda: download_cephadm(ctx=ctx, config=config, ref=ref), lambda: ceph_log(ctx=ctx, config=config), lambda: ceph_crash(ctx=ctx, config=config), lambda: ceph_bootstrap(ctx=ctx, config=config), lambda: crush_setup(ctx=ctx, config=config), lambda: ceph_mons(ctx=ctx, config=config), lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config ), lambda: ceph_mgrs(ctx=ctx, config=config), lambda: ceph_osds(ctx=ctx, config=config), lambda: ceph_mdss(ctx=ctx, config=config), lambda: ceph_rgw(ctx=ctx, config=config), lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), lambda: ceph_monitoring('grafana', ctx=ctx, config=config), lambda: ceph_clients(ctx=ctx, config=config), ): ctx.managers[cluster_name] = CephManager( ctx.ceph[cluster_name].bootstrap_remote, ctx=ctx, logger=log.getChild('ceph_manager.' + cluster_name), cluster=cluster_name, cephadm=True, ) try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=config) log.info('Setup complete, yielding') yield finally: log.info('Teardown begin')