def ceph_clients(ctx, config): cluster_name = config['cluster'] log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) for remote, roles_for_host in clients.remotes.items(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): name = teuthology.ceph_role(role) client_keyring = '/etc/ceph/{0}.{1}.keyring'.format( cluster_name, name) r = _shell( ctx, config, args=[ 'ceph', 'auth', 'get-or-create', name, 'mon', 'allow *', 'osd', 'allow *', 'mds', 'allow *', 'mgr', 'allow *', ], stdout=BytesIO(), ) keyring = r.stdout.getvalue() remote.write_file(client_keyring, keyring, sudo=True, mode='0644') yield
def create_keyring(ctx, cluster_name): """ Set up key ring on remote sites """ log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) for remote, roles_for_host in clients.remotes.items(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): name = teuthology.ceph_role(role) client_keyring = '/etc/ceph/{0}.{1}.keyring'.format( cluster_name, name) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name={name}'.format(name=name), client_keyring, run.Raw('&&'), 'sudo', 'chmod', '0644', client_keyring, ], )
def create_keyring(ctx, cluster_name): """ Set up key ring on remote sites """ log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) for remote, roles_for_host in clients.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): name = teuthology.ceph_role(role) client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name={name}'.format(name=name), client_keyring, run.Raw('&&'), 'sudo', 'chmod', '0644', client_keyring, ], )
def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. First make sure all pgs are active and clean. Next scrub all osds. Then periodically check until all pgs have scrub time stamps that indicate the last scrub completed. Time out if no progess is made here after two minutes. """ retries = 12 delays = 10 cluster_name = config['cluster'] manager = ctx.managers[cluster_name] all_clean = False for _ in range(0, retries): stats = manager.get_pg_stats() states = [stat['state'] for stat in stats] if len(set(states)) == 1 and states[0] == 'active+clean': all_clean = True break log.info("Waiting for all osds to be active and clean.") time.sleep(delays) if not all_clean: log.info("Scrubbing terminated -- not all pgs were active and clean.") return check_time_now = time.localtime() time.sleep(1) all_roles = teuthology.all_roles(ctx.cluster) for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name): log.info("Scrubbing {osd}".format(osd=role)) _, _, id_ = teuthology.split_role(role) manager.raw_cluster_cmd('osd', 'deep-scrub', id_) prev_good = 0 gap_cnt = 0 loop = True while loop: stats = manager.get_pg_stats() timez = [stat['last_scrub_stamp'] for stat in stats] loop = False thiscnt = 0 for tmval in timez: pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S') if pgtm > check_time_now: thiscnt += 1 else: loop = True if thiscnt > prev_good: prev_good = thiscnt gap_cnt = 0 else: gap_cnt += 1 if gap_cnt > retries: log.info('Exiting scrub checking -- not all pgs scrubbed.') return if loop: log.info('Still waiting for all pgs to be scrubbed.') time.sleep(delays)
def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. First make sure all pgs are active and clean. Next scrub all osds. Then periodically check until all pgs have scrub time stamps that indicate the last scrub completed. Time out if no progess is made here after two minutes. """ retries = 12 delays = 10 cluster_name = config["cluster"] manager = ctx.managers[cluster_name] all_clean = False for _ in range(0, retries): stats = manager.get_pg_stats() states = [stat["state"] for stat in stats] if len(set(states)) == 1 and states[0] == "active+clean": all_clean = True break log.info("Waiting for all osds to be active and clean.") time.sleep(delays) if not all_clean: log.info("Scrubbing terminated -- not all pgs were active and clean.") return check_time_now = time.localtime() time.sleep(1) all_roles = teuthology.all_roles(ctx.cluster) for role in teuthology.cluster_roles_of_type(all_roles, "osd", cluster_name): log.info("Scrubbing {osd}".format(osd=role)) _, _, id_ = teuthology.split_role(role) manager.raw_cluster_cmd("osd", "deep-scrub", id_) prev_good = 0 gap_cnt = 0 loop = True while loop: stats = manager.get_pg_stats() timez = [stat["last_scrub_stamp"] for stat in stats] loop = False thiscnt = 0 for tmval in timez: pgtm = time.strptime(tmval[0 : tmval.find(".")], "%Y-%m-%d %H:%M:%S") if pgtm > check_time_now: thiscnt += 1 else: loop = True if thiscnt > prev_good: prev_good = thiscnt gap_cnt = 0 else: gap_cnt += 1 if gap_cnt > retries: log.info("Exiting scrub checking -- not all pgs scrubbed.") return if loop: log.info("Still waiting for all pgs to be scrubbed.") time.sleep(delays)
def create_keyring(self): """ Set up key ring on remote sites """ log.info('Setting up client nodes...') clients = self.ctx.cluster.only( teuthology.is_type('client', self.cluster_name)) testdir = teuthology.get_testdir(self.ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) for remote, roles_for_host in clients.remotes.iteritems(): for role in teuthology.cluster_roles_of_type( roles_for_host, 'client', self.cluster_name): name = teuthology.ceph_role(role) log.info("Creating keyring for {}".format(name)) client_keyring = '/etc/ceph/ceph.{}.keyring'.format(name) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown # "types" are munged to "client" '--name={name}'.format(name=name), '--cap', 'osd', 'allow rwx', '--cap', 'mon', 'allow rwx', client_keyring, run.Raw('&&'), 'sudo', 'chmod', '0644', client_keyring, run.Raw('&&'), 'sudo', 'ls', run.Raw('-l'), client_keyring, run.Raw('&&'), 'sudo', 'ceph', 'auth', 'import', run.Raw('-i'), client_keyring, ], )
def test_cluster_roles_of_type(): expected = [ (['client.0', 'osd.0', 'ceph.osd.1'], 'osd', 'ceph', ['osd.0', 'ceph.osd.1']), (['client.0', 'osd.0', 'ceph.osd.1'], 'client', 'ceph', ['client.0']), (['foo.client.1', 'bar.client.2.3', 'baz.osd.1'], 'mon', None, []), (['foo.client.1', 'bar.client.2.3', 'baz.osd.1'], 'client', None, ['foo.client.1', 'bar.client.2.3']), (['foo.client.1', 'bar.client.2.3', 'baz.osd.1'], 'client', 'bar', ['bar.client.2.3']), ] for roles_for_host, type_, cluster_, expected_roles in expected: roles = list( misc.cluster_roles_of_type(roles_for_host, type_, cluster_)) assert roles == expected_roles
def ceph_clients(ctx, config): cluster_name = config['cluster'] testdir = teuthology.get_testdir(ctx) log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) ctx.cluster.run(args=[ 'sudo', 'mkdir', '-p', '/etc/ceph', ]) for remote, roles_for_host in clients.remotes.items(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): name = teuthology.ceph_role(role) client_keyring = '/etc/ceph/{0}.{1}.keyring'.format( cluster_name, name) r = _shell( ctx=ctx, cluster_name=cluster_name, remote=remote, args=[ 'ceph', 'auth', 'get-or-create', name, 'mon', 'allow *', 'osd', 'allow *', 'mds', 'allow *', 'mgr', 'allow *', ], stdout=StringIO(), ) keyring = r.stdout.getvalue() teuthology.sudo_write_file(remote=remote, path=client_keyring, data=keyring, perms='0644') yield
def execute(ctx, config): """ Run the blktrace program on remote machines. """ procs = [] testdir = teuthology.get_testdir(ctx) log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.items(): roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', config['cluster']): if roles_to_devs.get(role): dev = roles_to_devs[role] log.info("running blktrace on %s: %s" % (remote.name, dev)) proc = remote.run( args=[ 'cd', log_dir, run.Raw(';'), 'daemon-helper', daemon_signal, 'sudo', blktrace, '-o', dev.rsplit("/", 1)[1], '-d', dev, ], wait=False, stdin=run.PIPE, ) procs.append(proc) try: yield finally: osds = ctx.cluster.only(teuthology.is_type('osd')) log.info('stopping blktrace processs') for proc in procs: proc.stdin.close()
def execute(ctx, config): """ Run the blktrace program on remote machines. """ procs = [] testdir = teuthology.get_testdir(ctx) log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', config['cluster']): if roles_to_devs.get(role): dev = roles_to_devs[role] log.info("running blktrace on %s: %s" % (remote.name, dev)) proc = remote.run( args=[ 'cd', log_dir, run.Raw(';'), 'daemon-helper', daemon_signal, 'sudo', blktrace, '-o', dev.rsplit("/", 1)[1], '-d', dev, ], wait=False, stdin=run.PIPE, ) procs.append(proc) try: yield finally: osds = ctx.cluster.only(teuthology.is_type('osd')) log.info('stopping blktrace processs') for proc in procs: proc.stdin.close()
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get('use_existing_cluster', False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config['cluster'] data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name) log.info('Creating ceph cluster %s...', cluster_name) run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', data_dir, ], wait=False, )) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, )) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs), )) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt']) for role in teuthology.cluster_roles_of_type( roles_for_host, 'osd', cluster_name): tmpfs = '/mnt/' + role roles_to_journals[role] = tmpfs remote.run(args=['truncate', '-s', '1500M', tmpfs]) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs), )) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, path=monmap_path, ) if not 'global' in conf: conf['global'] = {} conf['global']['fsid'] = fsid default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name) conf_path = config.get('conf_path', default_conf_path) log.info('Writing %s for FSID %s...' % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path=monmap_path, ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms='0644') teuthology.write_file( remote=rem, path=monmap_path, data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) osdmap_path = '{tdir}/{cluster}.osdmap'.format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format(num=teuthology.num_instances_of_type( ctx.cluster, 'osd', cluster_name), ), osdmap_path, '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds', cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = '/var/lib/ceph/mds/{cluster}-{id}'.format( cluster=cluster_name, id=id_, ) remote.run(args=[ 'sudo', 'mkdir', '-p', mds_dir, run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), mds_dir + '/keyring', ], ) cclient.create_keyring(ctx, cluster_name) log.info('Running mkfs on osd nodes...') if not hasattr(ctx, 'disk_config'): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_journals'): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format( r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format( cluster=cluster_name, id=id_) remote.run(args=[ 'sudo', 'mkdir', '-p', mnt_point, ]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': # package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime', 'user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = [ '-m', 'single', '-l', '32768', '-n', '32768' ] if fs == 'xfs': # package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime', 'user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=['sudo', 'apt-get', 'install', '-y', package], stdout=StringIO(), ) try: remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if '-f' not in mount_options: mkfs_options.append('-f') mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run(args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, mnt_point, ]) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[ remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][ role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][ role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run(args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--cluster', cluster_name, '--mkfs', '--mkkey', '-i', id_, '--monmap', monmap_path, ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds', 'osd']: for role in teuthology.cluster_roles_of_type( roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/{cluster}-{id}/keyring'.format( type=type_, id=id_, cluster=cluster_name, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/etc/ceph/{cluster}.client.{id}.keyring'.format( id=id_, cluster=cluster_name)) keys.append(('client', id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run(args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/{cluster}-{id}'.format( id=id_, cluster=cluster_name), ], ) remote.run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--cluster', cluster_name, '--mkfs', '-i', id_, '--monmap', monmap_path, '--osdmap', osdmap_path, '--keyring', keyring_path, ], ) run.wait( mons.run( args=[ 'rm', '--', monmap_path, osdmap_path, ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name), ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) try: remote.run(args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ]) except Exception as e: remote.run(args=[ 'sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', run.Raw(';'), 'ps', 'auxf', ]) raise e if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=['sudo', 'umount', '-f', '/mnt'], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type('mon', cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = '/var/lib/ceph/mon/' + \ '{0}-{1}'.format(cluster_name, id_) teuthology.pull_directory_tarball( remote, mon_dir, path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw('{tdir}/../*.pid'.format(tdir=testdir)), ], wait=False, ), )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get("use_existing_cluster", False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config["cluster"] data_dir = "{tdir}/{cluster}.data".format(tdir=testdir, cluster=cluster_name) log.info("Creating ceph cluster %s...", cluster_name) run.wait(ctx.cluster.run(args=["install", "-d", "-m0755", "--", data_dir], wait=False)) run.wait(ctx.cluster.run(args=["sudo", "install", "-d", "-m0777", "--", "/var/run/ceph"], wait=False)) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type("osd", cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get("fs"): log.info("fs option selected, checking for scratch devs") log.info("found devs: %s" % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs(teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs) :] devs_to_clean[remote] = [] if config.get("block_journal"): log.info("block journal enabled") roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs ) log.info("journal map: %s", roles_to_journals) if config.get("tmpfs_journal"): log.info("tmpfs journal enabled") roles_to_journals = {} remote.run(args=["sudo", "mount", "-t", "tmpfs", "tmpfs", "/mnt"]) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): tmpfs = "/mnt/" + role roles_to_journals[role] = tmpfs remote.run(args=["truncate", "-s", "1500M", tmpfs]) log.info("journal map: %s", roles_to_journals) log.info("dev map: %s" % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info("Generating config...") remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]["osd journal"] = journal for section, keys in config["conf"].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get("tmpfs_journal"): conf["journal dio"] = False if not hasattr(ctx, "ceph"): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = "/etc/ceph/{cluster}.keyring".format(cluster=cluster_name) keyring_path = config.get("keyring_path", default_keyring) coverage_dir = "{tdir}/archive/coverage".format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info("Setting up %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", keyring_path, ] ) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=mon.", keyring_path, ] ) ctx.cluster.only(firstmon).run(args=["sudo", "chmod", "0644", keyring_path]) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = "{tdir}/{cluster}.monmap".format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap(ctx, remote=mon0_remote, conf=conf, path=monmap_path) if not "global" in conf: conf["global"] = {} conf["global"]["fsid"] = fsid default_conf_path = "/etc/ceph/{cluster}.conf".format(cluster=cluster_name) conf_path = config.get("conf_path", default_conf_path) log.info("Writing %s for FSID %s..." % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info("Creating admin key on %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=client.admin", "--set-uid=0", "--cap", "mon", "allow *", "--cap", "osd", "allow *", "--cap", "mds", "allow *", keyring_path, ] ) log.info("Copying monmap to all nodes...") keyring = teuthology.get_file(remote=mon0_remote, path=keyring_path) monmap = teuthology.get_file(remote=mon0_remote, path=monmap_path) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info("Sending monmap to node {remote}".format(remote=rem)) teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms="0644") teuthology.write_file(remote=rem, path=monmap_path, data=monmap) log.info("Setting up mon nodes...") mons = ctx.cluster.only(teuthology.is_type("mon", cluster_name)) osdmap_path = "{tdir}/{cluster}.osdmap".format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ "adjust-ulimits", "ceph-coverage", coverage_dir, "osdmaptool", "-c", conf_path, "--clobber", "--createsimple", "{num:d}".format(num=teuthology.num_instances_of_type(ctx.cluster, "osd", cluster_name)), osdmap_path, "--pg_bits", "2", "--pgp_bits", "4", ], wait=False, ) ) log.info("Setting up mgr nodes...") mgrs = ctx.cluster.only(teuthology.is_type("mgr", cluster_name)) for remote, roles_for_host in mgrs.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mgr", cluster_name): _, _, id_ = teuthology.split_role(role) mgr_dir = "/var/lib/ceph/mgr/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mgr_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mgr.{id}".format(id=id_), mgr_dir + "/keyring", ] ) log.info("Setting up mds nodes...") mdss = ctx.cluster.only(teuthology.is_type("mds", cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mds", cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = "/var/lib/ceph/mds/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mds_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mds.{id}".format(id=id_), mds_dir + "/keyring", ] ) cclient.create_keyring(ctx, cluster_name) log.info("Running mkfs on osd nodes...") if not hasattr(ctx, "disk_config"): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, "remote_to_roles_to_dev"): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_journals"): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_mount_options"): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_fstype"): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = "/var/lib/ceph/osd/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run(args=["sudo", "mkdir", "-p", mnt_point]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get("fs") package = None mkfs_options = config.get("mkfs_options") mount_options = config.get("mount_options") if fs == "btrfs": # package = 'btrfs-tools' if mount_options is None: mount_options = ["noatime", "user_subvol_rm_allowed"] if mkfs_options is None: mkfs_options = ["-m", "single", "-l", "32768", "-n", "32768"] if fs == "xfs": # package = 'xfsprogs' if mount_options is None: mount_options = ["noatime"] if mkfs_options is None: mkfs_options = ["-f", "-i", "size=2048"] if fs == "ext4" or fs == "ext3": if mount_options is None: mount_options = ["noatime", "user_xattr"] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) if package is not None: remote.run(args=["sudo", "apt-get", "install", "-y", package], stdout=StringIO()) try: remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if "-f" not in mount_options: mkfs_options.append("-f") mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) log.info("mount %s on %s -o %s" % (dev, remote, ",".join(mount_options))) remote.run(args=["sudo", "mount", "-t", fs, "-o", ",".join(mount_options), dev, mnt_point]) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=[ "sudo", "MALLOC_CHECK_=3", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-osd", "--cluster", cluster_name, "--mkfs", "--mkkey", "-i", id_, "--monmap", monmap_path, ] ) log.info("Reading keys from all nodes...") keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ["mgr", "mds", "osd"]: for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/var/lib/ceph/{type}/{cluster}-{id}/keyring".format(type=type_, id=id_, cluster=cluster_name), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "client", cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/etc/ceph/{cluster}.client.{id}.keyring".format(id=id_, cluster=cluster_name) ) keys.append(("client", id_, data)) keys_fp.write(data) log.info("Adding keys to all mons...") writes = mons.run(args=["sudo", "tee", "-a", keyring_path], stdin=run.PIPE, wait=False, stdout=StringIO()) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", keyring_path, "--name={type}.{id}".format(type=type_, id=id_), ] + list(generate_caps(type_)), wait=False, ) ) log.info("Running mkfs on mon nodes...") for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mon", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=["sudo", "mkdir", "-p", "/var/lib/ceph/mon/{cluster}-{id}".format(id=id_, cluster=cluster_name)] ) remote.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-mon", "--cluster", cluster_name, "--mkfs", "-i", id_, "--monmap", monmap_path, "--osdmap", osdmap_path, "--keyring", keyring_path, ] ) run.wait(mons.run(args=["rm", "--", monmap_path, osdmap_path], wait=False)) try: yield except Exception: # we need to know this below ctx.summary["success"] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info("Checking cluster log for badness...") def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = ["sudo", "egrep", pattern, "/var/log/ceph/{cluster}.log".format(cluster=cluster_name)] for exclude in excludes: args.extend([run.Raw("|"), "egrep", "-v", exclude]) args.extend([run.Raw("|"), "head", "-n", "1"]) r = mon0_remote.run(stdout=StringIO(), args=args) stdout = r.stdout.getvalue() if stdout != "": return stdout return None if first_in_ceph_log("\[ERR\]|\[WRN\]|\[SEC\]", config["log_whitelist"]) is not None: log.warning("Found errors (ERR|WRN|SEC) in cluster log") ctx.summary["success"] = False # use the most severe problem as the failure reason if "failure_reason" not in ctx.summary: for pattern in ["\[SEC\]", "\[ERR\]", "\[WRN\]"]: match = first_in_ceph_log(pattern, config["log_whitelist"]) if match is not None: ctx.summary["failure_reason"] = '"{match}" in cluster log'.format(match=match.rstrip("\n")) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info("Unmounting %s on %s" % (dir_, remote)) try: remote.run(args=["sync", run.Raw("&&"), "sudo", "umount", "-f", dir_]) except Exception as e: remote.run(args=["sudo", run.Raw("PATH=/usr/sbin:$PATH"), "lsof", run.Raw(";"), "ps", "auxf"]) raise e if config.get("tmpfs_journal"): log.info("tmpfs journal enabled - unmounting tmpfs at /mnt") for remote, roles_for_host in osds.remotes.iteritems(): remote.run(args=["sudo", "umount", "-f", "/mnt"], check_status=False) if ctx.archive is not None and not (ctx.config.get("archive-on-error") and ctx.summary["success"]): # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type("mon", cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = "/var/lib/ceph/mon/" + "{0}-{1}".format(cluster_name, id_) teuthology.pull_directory_tarball(remote, mon_dir, path + "/" + role + ".tgz") log.info("Cleaning ceph cluster...") run.wait( ctx.cluster.run( args=[ "sudo", "rm", "-rf", "--", conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw("{tdir}/../*.pid".format(tdir=testdir)), ], wait=False, ) )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get('use_existing_cluster', False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config['cluster'] data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name) log.info('Creating ceph cluster %s...', cluster_name) run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', data_dir, ], wait=False, ) ) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs ) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt']) for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): tmpfs = '/mnt/' + role roles_to_journals[role] = tmpfs remote.run(args=['truncate', '-s', '1500M', tmpfs]) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, path=monmap_path, ) if not 'global' in conf: conf['global'] = {} conf['global']['fsid'] = fsid default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name) conf_path = config.get('conf_path', default_conf_path) log.info('Writing %s for FSID %s...' % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path=monmap_path, ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file( remote=rem, path=keyring_path, data=keyring, perms='0644' ) teuthology.write_file( remote=rem, path=monmap_path, data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) osdmap_path = '{tdir}/{cluster}.osdmap'.format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd', cluster_name), ), osdmap_path, '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds', cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = '/var/lib/ceph/mds/{cluster}-{id}'.format( cluster=cluster_name, id=id_, ) remote.run( args=[ 'sudo', 'mkdir', '-p', mds_dir, run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), mds_dir + '/keyring', ], ) cclient.create_keyring(ctx, cluster_name) log.info('Running mkfs on osd nodes...') if not hasattr(ctx, 'disk_config'): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_journals'): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format(cluster=cluster_name, id=id_) remote.run( args=[ 'sudo', 'mkdir', '-p', mnt_point, ]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': # package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime', 'user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': # package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime', 'user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ], stdout=StringIO(), ) try: remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if '-f' not in mount_options: mkfs_options.append('-f') mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, mnt_point, ] ) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--cluster', cluster_name, '--mkfs', '--mkkey', '-i', id_, '--monmap', monmap_path, ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds', 'osd']: for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/{cluster}-{id}/keyring'.format( type=type_, id=id_, cluster=cluster_name, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name) ) keys.append(('client', id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/{cluster}-{id}'.format(id=id_, cluster=cluster_name), ], ) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--cluster', cluster_name, '--mkfs', '-i', id_, '--monmap', monmap_path, '--osdmap', osdmap_path, '--keyring', keyring_path, ], ) run.wait( mons.run( args=[ 'rm', '--', monmap_path, osdmap_path, ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name), ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) try: remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) except Exception as e: remote.run(args=[ 'sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', run.Raw(';'), 'ps', 'auxf', ]) raise e if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=['sudo', 'umount', '-f', '/mnt'], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type('mon', cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = '/var/lib/ceph/mon/' + \ '{0}-{1}'.format(cluster_name, id_) teuthology.pull_directory_tarball( remote, mon_dir, path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw('{tdir}/../*.pid'.format(tdir=testdir)), ], wait=False, ), )