def task(ctx, config): """ Test filestore/filejournal handling of non-idempotent events. Currently this is a kludge; we require the ceph task precedes us just so that we get the tarball installed to run the test binary. :param ctx: Context :param config: Configuration """ assert config is None or isinstance(config, list) \ or isinstance(config, dict), \ "task only supports a list or dictionary for configuration" all_clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] if config is None: config = all_clients if isinstance(config, list): config = dict.fromkeys(config) clients = config.keys() # just use the first client... client = next(iter(clients)) (remote,) = ctx.cluster.only(client).remotes.keys() testdir = teuthology.get_testdir(ctx) dir = '%s/ceph.data/test.%s' % (testdir, client) seed = int(random.uniform(1,100)) start = 800 + random.randint(800,1200) end = start + 50 try: log.info('creating a working dir') remote.run(args=['mkdir', dir]) remote.run( args=[ 'cd', dir, run.Raw('&&'), 'wget','-q', '-Orun_seed_to.sh', 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD', run.Raw('&&'), 'wget','-q', '-Orun_seed_to_range.sh', 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD', run.Raw('&&'), 'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh', ]); log.info('running a series of tests') proc = remote.run( args=[ 'cd', dir, run.Raw('&&'), './run_seed_to_range.sh', str(seed), str(start), str(end), ], wait=False, check_status=False) result = proc.wait() if result != 0: remote.run( args=[ 'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir), ]) raise Exception("./run_seed_to_range.sh errored out") finally: remote.run(args=[ 'rm', '-rf', '--', dir ])
def coredump(ctx, config): """ Stash a coredump of this system if an error occurs. """ log.info('Enabling coredump saving...') archive_dir = misc.get_archive_dir(ctx) run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '{adir}/coredump'.format(adir=archive_dir), run.Raw('&&'), 'sudo', 'sysctl', '-w', 'kernel.core_pattern={adir}/coredump/%t.%p.core'.format( adir=archive_dir), ], wait=False, )) try: yield finally: run.wait( ctx.cluster.run( args=[ 'sudo', 'sysctl', '-w', 'kernel.core_pattern=core', run.Raw('&&'), # don't litter the archive dir if there were no cores dumped 'rmdir', '--ignore-fail-on-non-empty', '--', '{adir}/coredump'.format(adir=archive_dir), ], wait=False, )) # set status = 'fail' if the dir is still there = coredumps were # seen for rem in ctx.cluster.remotes.iterkeys(): r = rem.run( args=[ 'if', 'test', '!', '-e', '{adir}/coredump'.format(adir=archive_dir), run.Raw(';'), 'then', 'echo', 'OK', run.Raw(';'), 'fi', ], stdout=StringIO(), ) if r.stdout.getvalue() != 'OK\n': log.warning('Found coredumps on %s, flagging run as failed', rem) set_status(ctx.summary, 'fail') if 'failure_reason' not in ctx.summary: ctx.summary['failure_reason'] = \ 'Found coredumps on {rem}'.format(rem=rem)
def _update_package_list_and_install(ctx, remote, debs, config): """ Runs ``apt-get update`` first, then runs ``apt-get install``, installing the requested packages on the remote system. TODO: split this into at least two functions. :param ctx: the argparse.Namespace object :param remote: the teuthology.orchestra.remote.Remote object :param debs: list of packages names to install :param config: the config dict """ # check for ceph release key r = remote.run( args=[ 'sudo', 'apt-key', 'list', run.Raw('|'), 'grep', 'Ceph', ], stdout=StringIO(), check_status=False, ) if r.stdout.getvalue().find('Ceph automated package') == -1: # if it doesn't exist, add it remote.run( args=[ 'wget', '-q', '-O-', 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=keys/autobuild.asc', # noqa run.Raw('|'), 'sudo', 'apt-key', 'add', '-', ], stdout=StringIO(), ) builder = _get_builder_project(ctx, remote, config) log.info("Installing packages: {pkglist} on remote deb {arch}".format( pkglist=", ".join(debs), arch=builder.arch) ) system_pkglist = config.get('extra_system_packages') if system_pkglist: if isinstance(system_pkglist, dict): system_pkglist = system_pkglist.get('deb') log.info("Installing system (non-project) packages: {pkglist} on remote deb {arch}".format( pkglist=", ".join(system_pkglist), arch=builder.arch) ) # get baseurl log.info('Pulling from %s', builder.base_url) version = builder.version log.info('Package version is %s', version) builder.install_repo() remote.run(args=['sudo', 'apt-get', 'update'], check_status=False) install_cmd = [ 'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes', '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw( 'Dpkg::Options::="--force-confold"'), 'install', ] install_dep_packages(remote, args=install_cmd + ['%s=%s' % (d, version) for d in debs], ) if system_pkglist: install_dep_packages(remote, args=install_cmd + system_pkglist, ) ldir = _get_local_dir(config, remote) if ldir: for fyle in os.listdir(ldir): fname = "%s/%s" % (ldir, fyle) remote.run(args=['sudo', 'dpkg', '-i', fname],)
def task(ctx, config): """ Test handling of divergent entries during export / import to regression test tracker #11184 overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) osds = [0, 1, 2] manager.flush_pg_stats(osds) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' testdir = teuthology.get_testdir(ctx) # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') manager.raw_cluster_cmd('osd', 'pool', 'application', 'enable', 'foo', 'rados', run.Raw('||'), 'true') # Remove extra pool to simlify log output manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it') for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket('osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket('osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) # Split pgs for pool foo manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') time.sleep(5) manager.raw_cluster_cmd('pg', 'dump') # Export a pg (exp_remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() FSPATH = manager.get_filepath() JPATH = os.path.join(FSPATH, "journal") prefix = ("sudo adjust-ulimits ceph-objectstore-tool " "--data-path {fpath} --journal-path {jpath} " "--log-file=" "/var/log/ceph/objectstore_tool.$$.log ".format(fpath=FSPATH, jpath=JPATH)) pid = os.getpid() expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").format( id=divergent, file=expfile)) try: exp_remote.sh(cmd, wait=True) except CommandFailedError as e: assert e.exitstatus == 0 # Kill one of non-divergent OSDs log.info('killing osd.%d' % non_divergent[0]) manager.kill_osd(non_divergent[0]) manager.mark_down_osd(non_divergent[0]) # manager.mark_out_osd(non_divergent[0]) # An empty collection for pg 2.0 might need to be cleaned up cmd = ((prefix + "--force --op remove --pgid 2.0").format(id=non_divergent[0])) exp_remote.sh(cmd, wait=True, check_status=False) cmd = ((prefix + "--op import --file {file}").format(id=non_divergent[0], file=expfile)) try: exp_remote.sh(cmd, wait=True) except CommandFailedError as e: assert e.exitstatus == 0 # bring in our divergent friend and other node log.info("revive divergent %d", divergent) manager.revive_osd(divergent) manager.mark_in_osd(divergent) log.info("revive %d", non_divergent[0]) manager.revive_osd(non_divergent[0]) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('delay recovery divergent') manager.set_config(divergent, osd_recovery_delay_start=100000) log.info('mark divergent in') manager.mark_in_osd(divergent) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(3) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status == 0 (remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() cmd = 'rm {file}'.format(file=expfile) remote.run(args=cmd, wait=True) log.info("success")
def ceph_bootstrap(ctx, config): cluster_name = config['cluster'] testdir = teuthology.get_testdir(ctx) fsid = ctx.ceph[cluster_name].fsid bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote first_mon = ctx.ceph[cluster_name].first_mon first_mon_role = ctx.ceph[cluster_name].first_mon_role mons = ctx.ceph[cluster_name].mons ctx.cluster.run(args=[ 'sudo', 'mkdir', '-p', '/etc/ceph', ]) ctx.cluster.run(args=[ 'sudo', 'chmod', '777', '/etc/ceph', ]) add_mirror_to_cluster( ctx, config.get('docker_registry_mirror', 'vossi04.front.sepia.ceph.com:5000')) try: # write seed config log.info('Writing seed config...') conf_fp = BytesIO() seed_config = build_initial_config(ctx, config) seed_config.write(conf_fp) teuthology.write_file(remote=bootstrap_remote, path='{}/seed.{}.conf'.format( testdir, cluster_name), data=conf_fp.getvalue()) log.debug('Final config:\n' + conf_fp.getvalue().decode()) ctx.ceph[cluster_name].conf = seed_config # register initial daemons ctx.daemons.register_daemon( bootstrap_remote, 'mon', first_mon, cluster=cluster_name, fsid=fsid, logger=log.getChild('mon.' + first_mon), wait=False, started=True, ) if not ctx.ceph[cluster_name].roleless: first_mgr = ctx.ceph[cluster_name].first_mgr ctx.daemons.register_daemon( bootstrap_remote, 'mgr', first_mgr, cluster=cluster_name, fsid=fsid, logger=log.getChild('mgr.' + first_mgr), wait=False, started=True, ) # bootstrap log.info('Bootstrapping...') cmd = [ 'sudo', ctx.cephadm, '--image', ctx.ceph[cluster_name].image, '-v', 'bootstrap', '--fsid', fsid, '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), '--output-keyring', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name), ] if not ctx.ceph[cluster_name].roleless: cmd += [ '--mon-id', first_mon, '--mgr-id', first_mgr, '--orphan-initial-daemons', # we will do it explicitly! '--skip-monitoring-stack', # we'll provision these explicitly ] if mons[first_mon_role].startswith('['): cmd += ['--mon-addrv', mons[first_mon_role]] else: cmd += ['--mon-ip', mons[first_mon_role]] if config.get('skip_dashboard'): cmd += ['--skip-dashboard'] # bootstrap makes the keyring root 0600, so +r it for our purposes cmd += [ run.Raw('&&'), 'sudo', 'chmod', '+r', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), ] bootstrap_remote.run(args=cmd) # fetch keys and configs log.info('Fetching config...') ctx.ceph[cluster_name].config_file = teuthology.get_file( remote=bootstrap_remote, path='/etc/ceph/{}.conf'.format(cluster_name)) log.info('Fetching client.admin keyring...') ctx.ceph[cluster_name].admin_keyring = teuthology.get_file( remote=bootstrap_remote, path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name)) log.info('Fetching mon keyring...') ctx.ceph[cluster_name].mon_keyring = teuthology.get_file( remote=bootstrap_remote, path='/var/lib/ceph/%s/mon.%s/keyring' % (fsid, first_mon), sudo=True) # fetch ssh key, distribute to additional nodes log.info('Fetching pub ssh key...') ssh_pub_key = teuthology.get_file( remote=bootstrap_remote, path='{}/{}.pub'.format(testdir, cluster_name)).decode('ascii').strip() log.info('Installing pub ssh key for root users...') ctx.cluster.run(args=[ 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', run.Raw('&&'), 'echo', ssh_pub_key, run.Raw('|'), 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', run.Raw('&&'), 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', ]) # set options _shell(ctx, cluster_name, bootstrap_remote, [ 'ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true' ]) # add other hosts for remote in ctx.cluster.remotes.keys(): if remote == bootstrap_remote: continue log.info('Writing (initial) conf and keyring to %s' % remote.shortname) teuthology.write_file( remote=remote, path='/etc/ceph/{}.conf'.format(cluster_name), data=ctx.ceph[cluster_name].config_file) teuthology.write_file( remote=remote, path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), data=ctx.ceph[cluster_name].admin_keyring) log.info('Adding host %s to orchestrator...' % remote.shortname) _shell(ctx, cluster_name, remote, ['ceph', 'orch', 'host', 'add', remote.shortname]) r = _shell(ctx, cluster_name, remote, ['ceph', 'orch', 'host', 'ls', '--format=json'], stdout=StringIO()) hosts = [ node['hostname'] for node in json.loads(r.stdout.getvalue()) ] assert remote.shortname in hosts yield finally: log.info('Cleaning up testdir ceph.* files...') ctx.cluster.run(args=[ 'rm', '-f', '{}/seed.{}.conf'.format(testdir, cluster_name), '{}/{}.pub'.format(testdir, cluster_name), ]) log.info('Stopping all daemons...') # this doesn't block until they are all stopped... #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) # so, stop them individually for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES): cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).stop() # clean up /etc/ceph ctx.cluster.run(args=[ 'sudo', 'rm', '-f', '/etc/ceph/{}.conf'.format(cluster_name), '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), ])
def synch_clocks(remotes): log.info('Synchronizing clocks...') for remote in remotes: remote.run( args=[ 'sudo', 'systemctl', 'stop', 'ntp.service', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ntpd.service', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'chronyd.service', run.Raw('&&'), 'sudo', 'ntpdate-debian', run.Raw('||'), 'sudo', 'ntp', '-gq', run.Raw('||'), 'sudo', 'ntpd', '-gq', run.Raw('||'), 'sudo', 'chronyc', 'sources', run.Raw('&&'), 'sudo', 'hwclock', '--systohc', '--utc', run.Raw('&&'), 'sudo', 'systemctl', 'start', 'ntp.service', run.Raw('||'), 'sudo', 'systemctl', 'start', 'ntpd.service', run.Raw('||'), 'sudo', 'systemctl', 'start', 'chronyd.service', run.Raw('||'), 'true', # ignore errors; we may be racing with ntpd startup ], timeout=60, )
def generate_iso(ctx, config): """Execute system commands to generate iso""" log.info('generating iso...') testdir = teuthology.get_testdir(ctx) # use ctx.config instead of config, because config has been # through teuthology.replace_all_with_clients() refspec = ctx.config.get('branch') if refspec is None: refspec = ctx.config.get('tag') if refspec is None: refspec = ctx.config.get('sha1') if refspec is None: refspec = 'HEAD' # hack: the git_url is always ceph-ci or ceph git_url = teuth_config.get_ceph_git_url() repo_name = 'ceph.git' if git_url.count('ceph-ci'): repo_name = 'ceph-ci.git' for client, client_config in config.iteritems(): assert 'test' in client_config, 'You must specify a test to run' test_url = client_config['test'].format(repo=repo_name, branch=refspec) (remote, ) = ctx.cluster.only(client).remotes.keys() src_dir = os.path.dirname(__file__) userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) with file(os.path.join(src_dir, 'userdata_setup.yaml'), 'rb') as f: test_setup = ''.join(f.readlines()) # configuring the commands to setup the nfs mount mnt_dir = "/export/{client}".format(client=client) test_setup = test_setup.format(mnt_dir=mnt_dir) with file(os.path.join(src_dir, 'userdata_teardown.yaml'), 'rb') as f: test_teardown = ''.join(f.readlines()) user_data = test_setup if client_config.get('type', 'filesystem') == 'filesystem': for i in xrange(0, client_config.get('num_rbd', DEFAULT_NUM_RBD)): dev_letter = chr(ord('b') + i) user_data += """ - | #!/bin/bash mkdir /mnt/test_{dev_letter} mkfs -t xfs /dev/vd{dev_letter} mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter} """.format(dev_letter=dev_letter) # this may change later to pass the directories as args to the # script or something. xfstests needs that. user_data += """ - | #!/bin/bash test -d /mnt/test_b && cd /mnt/test_b /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success """ + test_teardown teuthology.write_file(remote, userdata_path, StringIO(user_data)) with file(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: teuthology.write_file(remote, metadata_path, f) test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) log.info('fetching test %s for %s', test_url, client) remote.run(args=[ 'wget', '-nv', '-O', test_file, test_url, run.Raw('&&'), 'chmod', '755', test_file, ], ) remote.run(args=[ 'genisoimage', '-quiet', '-input-charset', 'utf-8', '-volid', 'cidata', '-joliet', '-rock', '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), '-graft-points', 'user-data={userdata}'.format(userdata=userdata_path), 'meta-data={metadata}'.format(metadata=metadata_path), 'test.sh={file}'.format(file=test_file), ], ) try: yield finally: for client in config.iterkeys(): (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ 'rm', '-f', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), os.path.join(testdir, 'qemu', 'userdata.' + client), os.path.join(testdir, 'qemu', 'metadata.' + client), '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), ], )
def run_admin_cmds(ctx, config): """ Running Keycloak Admin commands(kcadm commands) in order to get the token, aud value, thumbprint and realm name. """ assert isinstance(config, dict) log.info('Running admin commands...') for (client, _) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'config', 'credentials', '--server', 'http://localhost:8080/auth', '--realm', 'master', '--user', 'admin', '--password', 'admin', '--client', 'admin-cli', ], ) realm_name = 'demorealm' realm = 'realm={}'.format(realm_name) remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'create', 'realms', '-s', realm, '-s', 'enabled=true', '-s', 'accessTokenLifespan=1800', '-o', ], ) client_name = 'my_client' client = 'clientId={}'.format(client_name) remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'create', 'clients', '-r', realm_name, '-s', client, '-s', 'directAccessGrantsEnabled=true', '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', ], ) ans1 = toxvenv_sh(ctx, remote, [ 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx, config)), run.Raw('&&'), './kcadm.sh', 'get', 'clients', '-r', realm_name, '-F', 'id,clientId', run.Raw('|'), 'jq', '-r', '.[] | select (.clientId == "my_client") | .id' ]) pre0 = ans1.rstrip() pre1 = "clients/{}".format(pre0) remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'update', pre1, '-r', realm_name, '-s', 'enabled=true', '-s', 'serviceAccountsEnabled=true', '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', ], ) ans2 = pre1 + '/client-secret' out2 = toxvenv_sh(ctx, remote, [ 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx, config)), run.Raw('&&'), './kcadm.sh', 'get', ans2, '-r', realm_name, '-F', 'value' ]) ans0 = '{client}:{secret}'.format(client=client_name, secret=out2[15:51]) ans3 = 'client_secret={}'.format(out2[15:51]) clientid = 'client_id={}'.format(client_name) proto_map = pre1 + "/protocol-mappers/models" uname = "username=testuser" upass = "******" remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'create', 'users', '-s', uname, '-s', 'enabled=true', '-s', 'attributes.\"https://aws.amazon.com/tags\"=\"{"principal_tags":{"Department":["Engineering", "Marketing"]}}\"', '-r', realm_name, ], ) sample = 'testuser' remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'set-password', '-r', realm_name, '--username', sample, '--new-password', sample, ], ) file_path = '{tdir}/scripts/confi.py'.format( tdir=teuthology.get_testdir(ctx)) remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'create', proto_map, '-r', realm_name, '-f', file_path, ], ) remote.run(args=[ '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx, config)), 'config', 'credentials', '--server', 'http://localhost:8080/auth', '--realm', realm_name, '--user', sample, '--password', sample, '--client', 'admin-cli', ], ) out9 = toxvenv_sh(ctx, remote, [ 'curl', '-k', '-v', '-X', 'POST', '-H', 'Content-Type:application/x-www-form-urlencoded', '-d', 'scope=openid', '-d', 'grant_type=password', '-d', clientid, '-d', ans3, '-d', uname, '-d', upass, 'http://localhost:8080/auth/realms/' + realm_name + '/protocol/openid-connect/token', run.Raw('|'), 'jq', '-r', '.access_token' ]) user_token_pre = out9.rstrip() user_token = '{}'.format(user_token_pre) out3 = toxvenv_sh(ctx, remote, [ 'curl', '-k', '-v', '-X', 'POST', '-H', 'Content-Type:application/x-www-form-urlencoded', '-d', 'scope=openid', '-d', 'grant_type=client_credentials', '-d', clientid, '-d', ans3, 'http://localhost:8080/auth/realms/' + realm_name + '/protocol/openid-connect/token', run.Raw('|'), 'jq', '-r', '.access_token' ]) pre2 = out3.rstrip() acc_token = 'token={}'.format(pre2) ans4 = '{}'.format(pre2) out4 = toxvenv_sh(ctx, remote, [ 'curl', '-k', '-v', '-X', 'GET', '-H', 'Content-Type:application/x-www-form-urlencoded', 'http://localhost:8080/auth/realms/' + realm_name + '/protocol/openid-connect/certs', run.Raw('|'), 'jq', '-r', '.keys[].x5c[]' ]) pre3 = out4.rstrip() cert_value = '{}'.format(pre3) start_value = "-----BEGIN CERTIFICATE-----\n" end_value = "\n-----END CERTIFICATE-----" user_data = "" user_data += start_value user_data += cert_value user_data += end_value remote.write_file(path='{tdir}/bin/certificate.crt'.format( tdir=get_keycloak_dir(ctx, config)), data=user_data) out5 = toxvenv_sh(ctx, remote, [ 'openssl', 'x509', '-in', '{tdir}/bin/certificate.crt'.format( tdir=get_keycloak_dir(ctx, config)), '--fingerprint', '--noout', '-sha1' ]) pre_ans = '{}'.format(out5[17:76]) ans5 = "" for character in pre_ans: if (character != ':'): ans5 += character str1 = 'curl' str2 = '-k' str3 = '-v' str4 = '-X' str5 = 'POST' str6 = '-u' str7 = '-d' str8 = 'http://localhost:8080/auth/realms/' + realm_name + '/protocol/openid-connect/token/introspect' out6 = toxvenv_sh(ctx, remote, [ str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.aud' ]) out7 = toxvenv_sh(ctx, remote, [ str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.sub' ]) out8 = toxvenv_sh(ctx, remote, [ str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.azp' ]) ans6 = out6.rstrip() ans7 = out7.rstrip() ans8 = out8.rstrip() os.environ['TOKEN'] = ans4 os.environ['THUMBPRINT'] = ans5 os.environ['AUD'] = ans6 os.environ['SUB'] = ans7 os.environ['AZP'] = ans8 os.environ['USER_TOKEN'] = user_token os.environ['KC_REALM'] = realm_name try: yield finally: log.info('Removing certificate.crt file...') for (client, _) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ 'rm', '-f', '{tdir}/bin/certificate.crt'.format( tdir=get_keycloak_dir(ctx, config)), ], ) remote.run(args=[ 'rm', '-f', '{tdir}/confi.py'.format(tdir=teuthology.get_testdir(ctx)), ], )
def toxvenv_sh(ctx, remote, args, **kwargs): activate = get_toxvenv_dir(ctx) + '/bin/activate' return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) endpoint = ctx.rgw.role_endpoints[client] frontends = ctx.rgw.frontend frontend_prefix = client_config.get('frontend_prefix', None) if frontend_prefix: frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) if endpoint.cert: # add the ssl certificate path frontends += ' ssl_certificate={}'.format( endpoint.cert.certificate) if ctx.rgw.frontend == 'civetweb': frontends += ' port={}s'.format(endpoint.port) else: frontends += ' ssl_port={}'.format(endpoint.port) else: frontends += ' port={}'.format(endpoint.port) rgw_cmd.extend([ '--rgw-frontends', frontends, '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format( client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format( client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), ]) keystone_role = client_config.get('use-keystone-role', None) if keystone_role is not None: if not ctx.keystone: raise ConfigError('rgw must run after the keystone task') url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format( host=endpoint.hostname, port=endpoint.port) ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) keystone_host, keystone_port = \ ctx.keystone.public_endpoints[keystone_role] rgw_cmd.extend([ '--rgw_keystone_url', 'http://{khost}:{kport}'.format(khost=keystone_host, kport=keystone_port), ]) if client_config.get('dns-name') is not None: rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) if client_config.get('dns-s3website-name') is not None: rgw_cmd.extend( ['--rgw-dns-s3website-name', endpoint.website_dns_name]) vault_role = client_config.get('use-vault-role', None) barbican_role = client_config.get('use-barbican-role', None) pykmip_role = client_config.get('use-pykmip-role', None) token_path = teuthology.get_testdir(ctx) + '/vault-token' if barbican_role is not None: if not hasattr(ctx, 'barbican'): raise ConfigError('rgw must run after the barbican task') barbican_host, barbican_port = \ ctx.barbican.endpoints[barbican_role] log.info("Use barbican url=%s:%s", barbican_host, barbican_port) rgw_cmd.extend([ '--rgw_barbican_url', 'http://{bhost}:{bport}'.format(bhost=barbican_host, bport=barbican_port), ]) elif vault_role is not None: if not ctx.vault.root_token: raise ConfigError('vault: no "root_token" specified') # create token on file ctx.cluster.only(client).run(args=[ 'echo', '-n', ctx.vault.root_token, run.Raw('>'), token_path ]) log.info("Token file content") ctx.cluster.only(client).run(args=['cat', token_path]) log.info("Restrict access to token file") ctx.cluster.only(client).run(args=['chmod', '600', token_path]) ctx.cluster.only(client).run( args=['sudo', 'chown', 'ceph', token_path]) rgw_cmd.extend([ '--rgw_crypt_vault_addr', "{}:{}".format(*ctx.vault.endpoints[vault_role]), '--rgw_crypt_vault_token_file', token_path ]) elif pykmip_role is not None: if not hasattr(ctx, 'pykmip'): raise ConfigError('rgw must run after the pykmip task') ctx.rgw.pykmip_role = pykmip_role rgw_cmd.extend([ '--rgw_crypt_kmip_addr', "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]), ]) rgw_cmd.extend([ '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format( client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind'), # see https://github.com/ceph/teuthology/pull/1600 exit_on_first_error=False) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, fsid=ctx.ceph[cluster_name].fsid, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in clients: endpoint = ctx.rgw.role_endpoints[client] url = endpoint.url() log.info( 'Polling {client} until it starts accepting connections on {url}'. format(client=client, url=url)) (remote, ) = ctx.cluster.only(client).remotes.keys() wait_for_radosgw(url, remote) try: yield finally: for client in clients: cluster_name, daemon_type, client_id = teuthology.split_role( client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format( tdir=testdir, client=client_with_cluster), ], ) ctx.cluster.only(client).run(args=['rm', '-f', token_path])
def task(ctx, config): """ Run Hadoop S3A tests using Ceph usage: -tasks: ceph-ansible: s3a-hadoop: maven-version: '3.6.3' (default) hadoop-version: '2.9.2' bucket-name: 's3atest' (default) access-key: 'anykey' (uses a default value) secret-key: 'secretkey' ( uses a default value) role: client.0 """ if config is None: config = {} assert isinstance(config, dict), \ "task only supports a dictionary for configuration" assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task' overrides = ctx.config.get('overrides', {}) misc.deep_merge(config, overrides.get('s3a-hadoop', {})) testdir = misc.get_testdir(ctx) role = config.get('role') (remote, ) = ctx.cluster.only(role).remotes.keys() endpoint = ctx.rgw.role_endpoints.get(role) assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) # get versions maven_major = config.get('maven-major', 'maven-3') maven_version = config.get('maven-version', '3.6.3') hadoop_ver = config.get('hadoop-version', '2.9.2') bucket_name = config.get('bucket-name', 's3atest') access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F') secret_key = config.get('secret-key', 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb') # set versions for cloning the repo apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format( maven_version=maven_version) maven_link = 'http://www-us.apache.org/dist/maven/' + \ '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven hadoop_git = 'https://github.com/apache/hadoop' hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver) if hadoop_ver == 'trunk': # just checkout a new branch out of trunk hadoop_rel = 'hadoop-ceph-trunk' install_prereq(remote) remote.run(args=[ 'cd', testdir, run.Raw('&&'), 'wget', maven_link, run.Raw('&&'), 'tar', '-xvf', apache_maven, run.Raw('&&'), 'git', 'clone', run.Raw(hadoop_git), run.Raw('&&'), 'cd', 'hadoop', run.Raw('&&'), 'git', 'checkout', '-b', run.Raw(hadoop_rel) ]) configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) if hadoop_ver.startswith('2.8'): # test all ITtests but skip AWS test using public bucket landsat-pds # which is not available from within this test test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \ -Dfs.s3a.scale.test.timeout=1200 \ -Dfs.s3a.scale.test.huge.filesize=256M verify' else: test_options = 'test -Dtest=S3a*,TestS3A*' try: run_s3atest(remote, maven_version, testdir, test_options) yield finally: log.info("Done s3a testing, Cleaning up") for fil in ['apache*', 'hadoop*', 'venv*', 'create*']: remote.run(args=[ 'rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil)) ])
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' conf_dir = teuthology.get_testdir(ctx) + "/cdtest" def execute_cdeploy(admin, cmd, path): """Execute ceph-deploy commands """ """Either use git path or repo path """ args = ['cd', conf_dir, run.Raw(';')] if path: args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) else: args.append('ceph-deploy') args.append(run.Raw(cmd)) ec = admin.run(args=args, check_status=False).exitstatus if ec != 0: raise RuntimeError( "failed during ceph-deploy cmd: {cmd} , ec={ec}".format( cmd=cmd, ec=ec)) if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.iteritems(): admin = node admin.run(args=['mkdir', conf_dir], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error('Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) new_cmd = 'new ' + nodename execute_cdeploy(admin, new_cmd, path) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_mgr_install = 'install {branch} --mgr '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = 'mon create-initial ' # either use create-keys or push command push_keys = 'admin ' + nodename execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_mgr_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) execute_cdeploy(admin, push_keys, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) log.info("list files for debugging purpose to check file permissions") admin.run(args=['ls', run.Raw('-lt'), conf_dir]) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) log.info("Waiting for cluster to become healthy") with contextutil.safe_while(sleep=10, tries=6, action='check health') as proceed: while proceed(): r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() if (out.split(None, 1)[0] == 'HEALTH_OK'): break rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) log.info('All ceph-deploy cli tests passed') try: yield finally: log.info("cleaning up") ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target' ], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) log.info("Removing temporary dir") admin.run(args=['rm', run.Raw('-rf'), run.Raw(conf_dir)], check_status=False) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def download_ceph_deploy(ctx, config): """ Downloads ceph-deploy from the ceph.com git mirror and (by default) switches to the master branch. If the `ceph-deploy-branch` is specified, it will use that instead. The `bootstrap` script is ran, with the argument obtained from `python_version`, if specified. """ ceph_admin = ctx.cluster.only(teuthology.get_first_mon(ctx, config)) try: py_ver = str(config['python_version']) except KeyError: pass else: supported_versions = ['2', '3'] if py_ver not in supported_versions: raise ValueError("python_version must be: {}, not {}".format( ' or '.join(supported_versions), py_ver)) log.info("Installing Python") for admin in ceph_admin.remotes: system_type = teuthology.get_system_type(admin) if system_type == 'rpm': package = 'python34' if py_ver == '3' else 'python' ctx.cluster.run(args=[ 'sudo', 'yum', '-y', 'install', package, 'python-virtualenv' ]) else: package = 'python3' if py_ver == '3' else 'python' ctx.cluster.run(args=[ 'sudo', 'apt-get', '-y', '--force-yes', 'install', package, 'python-virtualenv' ]) log.info('Downloading ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') ceph_admin.run(args=[ 'git', 'clone', '-b', ceph_deploy_branch, teuth_config.ceph_git_base_url + 'ceph-deploy.git', '{tdir}/ceph-deploy'.format(tdir=testdir), ], ) args = [ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), './bootstrap', ] try: args.append(str(config['python_version'])) except KeyError: pass ceph_admin.run(args=args) try: yield finally: log.info('Removing ceph-deploy ...') ceph_admin.run(args=[ 'rm', '-rf', '{tdir}/ceph-deploy'.format(tdir=testdir), ], )
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin, ) = ctx.cluster.only(teuthology.get_first_mon( ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(cmd), ], check_status=False, ).exitstatus try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): ceph_branch = '--{var}={val}'.format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, 'mon') mon_nodes = " ".join(mon_node) mgr_nodes = get_nodes_using_role(ctx, 'mgr') mgr_nodes = " ".join(mgr_nodes) new_mon = './ceph-deploy new' + " " + mon_nodes mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) # install ceph dev_branch = ctx.config['branch'] branch = '--dev={branch}'.format(branch=dev_branch) if ceph_branch: option = ceph_branch else: option = branch install_nodes = './ceph-deploy install ' + option + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = './ceph-deploy install --tests ' + option + \ " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) # create-keys is explicit now # http://tracker.ceph.com/issues/16036 mons = ctx.cluster.only(teuthology.is_type('mon')) for remote in mons.remotes.iterkeys(): remote.run(args=[ 'sudo', 'ceph-create-keys', '--cluster', 'ceph', '--id', remote.shortname ]) estatus_gather = execute_ceph_deploy(gather_keys) execute_ceph_deploy(mgr_create) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + \ " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = './ceph-deploy disk zap ' + node + ':' + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = './ceph-deploy osd create ' # first check for filestore, default is bluestore with ceph-deploy if config.get('filestore') is not None: osd_create_cmd += '--filestore ' else: osd_create_cmd += '--bluestore ' if config.get('dmcrypt') is not None: osd_create_cmd += '--dmcrypt ' osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote, ) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run(args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms='0644') teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644') teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms='0644') if mds_nodes: log.info('Configuring CephFS...') ceph_fs = Filesystem(ctx, create=True) elif not config.get('only_mon'): raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info( "Error encountered, logging exception before tearing down ceph-deploy" ) log.info(traceback.format_exc()) raise finally: if config.get('keep_running'): return log.info('Stopping ceph...') ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target' ]) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run(args=[ 'sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status', run.Raw('||'), 'sudo', 'systemctl', 'status', 'ceph.target' ], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=[ 'sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph' ], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(purge_nodes) log.info('Purging data...') execute_ceph_deploy(purgedata_nodes)
def shutdown_daemons(ctx): log.info('Unmounting ceph-fuse and killing daemons...') ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target' ], check_status=False, timeout=180) ctx.cluster.run( args=[ 'if', 'grep', '-q', 'ceph-fuse', '/etc/mtab', run.Raw(';'), 'then', 'grep', 'ceph-fuse', '/etc/mtab', run.Raw('|'), 'grep', '-o', " /.* fuse", run.Raw('|'), 'grep', '-o', "/.* ", run.Raw('|'), 'xargs', '-n', '1', 'sudo', 'fusermount', '-u', run.Raw(';'), 'fi', run.Raw(';'), 'if', 'grep', '-q', 'rbd-fuse', '/etc/mtab', run.Raw(';'), 'then', 'grep', 'rbd-fuse', '/etc/mtab', run.Raw('|'), 'grep', '-o', " /.* fuse", run.Raw('|'), 'grep', '-o', "/.* ", run.Raw('|'), 'xargs', '-n', '1', 'sudo', 'fusermount', '-u', run.Raw(';'), 'fi', run.Raw(';'), 'sudo', 'killall', '--quiet', 'ceph-mon', 'ceph-osd', 'ceph-mds', 'ceph-mgr', 'ceph-fuse', 'ceph-disk', 'radosgw', 'ceph_test_rados', 'rados', 'rbd-fuse', 'apache2', run.Raw('||'), 'true', # ignore errors from ceph binaries not being found ], timeout=120, ) log.info('All daemons killed.')
def coredump(ctx, config): """ Stash a coredump of this system if an error occurs. """ log.info('Enabling coredump saving...') archive_dir = misc.get_archive_dir(ctx) run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '{adir}/coredump'.format(adir=archive_dir), run.Raw('&&'), 'sudo', 'sysctl', '-w', 'kernel.core_pattern={adir}/coredump/%t.%p.core'.format( adir=archive_dir), run.Raw('&&'), 'echo', 'kernel.core_pattern={adir}/coredump/%t.%p.core'.format( adir=archive_dir), run.Raw('|'), 'sudo', 'tee', '-a', '/etc/sysctl.conf', ], wait=False, )) try: yield finally: run.wait( ctx.cluster.run( args=[ 'sudo', 'sysctl', '-w', 'kernel.core_pattern=core', run.Raw('&&'), # don't litter the archive dir if there were no cores dumped 'rmdir', '--ignore-fail-on-non-empty', '--', '{adir}/coredump'.format(adir=archive_dir), ], wait=False, )) # set status = 'fail' if the dir is still there = coredumps were # seen for rem in ctx.cluster.remotes.keys(): try: rem.sh("test -e " + archive_dir + "/coredump") except run.CommandFailedError: continue log.warning('Found coredumps on %s, flagging run as failed', rem) set_status(ctx.summary, 'fail') if 'failure_reason' not in ctx.summary: ctx.summary['failure_reason'] = \ 'Found coredumps on {rem}'.format(rem=rem)
def remove_ceph_packages(ctx): """ remove ceph and ceph dependent packages by force force is needed since the node's repo might have changed and in many cases autocorrect will not work due to missing packages due to repo changes """ log.info("Force remove ceph packages") ceph_packages_to_remove = [ 'ceph-common', 'ceph-mon', 'ceph-osd', 'libcephfs1', 'libcephfs2', 'librados2', 'librgw2', 'librbd1', 'python-rgw', 'ceph-selinux', 'python-cephfs', 'ceph-base', 'python-rbd', 'python-rados', 'ceph-mds', 'ceph-mgr', 'libcephfs-java', 'libcephfs-jni', 'ceph-deploy', 'libapache2-mod-fastcgi' ] pkgs = str.join(' ', ceph_packages_to_remove) for remote in ctx.cluster.remotes.keys(): if remote.os.package_type == 'rpm': log.info("Remove any broken repos") remote.run(args=['sudo', 'rm', run.Raw("/etc/yum.repos.d/*ceph*")], check_status=False) remote.run( args=['sudo', 'rm', run.Raw("/etc/yum.repos.d/*fcgi*")], check_status=False, ) remote.run( args=['sudo', 'rm', run.Raw("/etc/yum.repos.d/*samba*")], check_status=False, ) remote.run( args=['sudo', 'rm', run.Raw("/etc/yum.repos.d/*nfs-ganesha*")], check_status=False, ) remote.run(args=[ 'sudo', 'rpm', '--rebuilddb', run.Raw('&&'), 'yum', 'clean', 'all' ]) log.info('Remove any ceph packages') remote.run(args=['sudo', 'yum', 'remove', '-y', run.Raw(pkgs)], check_status=False) else: log.info("Remove any broken repos") remote.run( args=['sudo', 'rm', run.Raw("/etc/apt/sources.list.d/*ceph*")], check_status=False, ) remote.run( args=[ 'sudo', 'rm', run.Raw("/etc/apt/sources.list.d/*samba*") ], check_status=False, ) remote.run( args=[ 'sudo', 'rm', run.Raw("/etc/apt/sources.list.d/*nfs-ganesha*") ], check_status=False, ) log.info("Autoclean") remote.run( args=['sudo', 'apt-get', 'autoclean'], check_status=False, ) log.info('Remove any ceph packages') remote.run(args=[ 'sudo', 'dpkg', '--remove', '--force-remove-reinstreq', run.Raw(pkgs) ], check_status=False) log.info("Autoclean") remote.run(args=['sudo', 'apt-get', 'autoclean'])
def task(ctx, config): """ Execute commands and allow daemon restart with config options. Each process executed can output to stdout restart commands of the form: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2> This will restart the daemon <role>.<id> with the specified config values once by modifying the conf file with those values, and then replacing the old conf file once the daemon is restarted. This task does not kill a running daemon, it assumes the daemon will abort on an assert specified in the config. tasks: - install: - ceph: - restart: exec: client.0: - test_backtraces.py """ assert isinstance(config, dict), "task kill got invalid config" testdir = teuthology.get_testdir(ctx) try: assert 'exec' in config, "config requires exec key with <role>: <command> entries" for role, task in config['exec'].iteritems(): log.info('restart for role {r}'.format(r=role)) (remote, ) = ctx.cluster.only(role).remotes.iterkeys() srcdir, restarts = get_tests(ctx, config, role, remote, testdir) log.info('Running command on role %s host %s', role, remote.name) spec = '{spec}'.format(spec=task[0]) log.info('Restarts list: %s', restarts) log.info('Spec is %s', spec) to_run = [w for w in restarts if w == task or w.find(spec) != -1] log.info('To run: %s', to_run) for c in to_run: log.info('Running restart script %s...', c) args = [ run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), ] env = config.get('env') if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), '{srcdir}/{c}'.format( srcdir=srcdir, c=c, ), ]) proc = remote.run( args=args, stdout=tor.PIPE, stdin=tor.PIPE, stderr=log, wait=False, ) log.info('waiting for a command from script') while True: l = proc.stdout.readline() if not l or l == '': break log.debug('script command: {c}'.format(c=l)) ll = l.strip() cmd = ll.split(' ') if cmd[0] == "done": break assert cmd[ 0] == 'restart', "script sent invalid command request to kill task" # cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2> # or to clear, just: restart <role> <id> restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:]) proc.stdin.writelines(['restarted\n']) proc.stdin.flush() try: proc.wait() except tor.CommandFailedError: raise Exception( 'restart task got non-zero exit status from script: {s}' .format(s=c)) finally: log.info('Finishing %s on %s...', task, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir, ], )
def run_qemu(ctx, config): """Setup kvm environment and start qemu""" procs = [] testdir = teuthology.get_testdir(ctx) for client, client_config in config.iteritems(): (remote, ) = ctx.cluster.only(client).remotes.keys() log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client) remote.run(args=[ 'mkdir', log_dir, run.Raw('&&'), 'sudo', 'modprobe', 'kvm', ]) # make an nfs mount to use for logging and to # allow to test to tell teuthology the tests outcome _setup_nfs_mount(remote, client, log_dir) base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client) qemu_cmd = 'qemu-system-x86_64' if remote.os.package_type == "rpm": qemu_cmd = "/usr/libexec/qemu-kvm" args = [ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', qemu_cmd, '-enable-kvm', '-nographic', '-m', str(client_config.get('memory', DEFAULT_MEM)), # base OS device '-drive', 'file={base},format=qcow2,if=virtio'.format(base=base_file), # cd holding metadata for cloud-init '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), ] cachemode = 'none' ceph_config = ctx.ceph['ceph'].conf.get('global', {}) ceph_config.update(ctx.ceph['ceph'].conf.get('client', {})) ceph_config.update(ctx.ceph['ceph'].conf.get(client, {})) if ceph_config.get('rbd cache', True): if ceph_config.get('rbd cache max dirty', 1) > 0: cachemode = 'writeback' else: cachemode = 'writethrough' clone = client_config.get('clone', False) for i in xrange(client_config.get('num_rbd', DEFAULT_NUM_RBD)): suffix = '-clone' if clone else '' args.extend([ '-drive', 'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}' .format( img='{client}.{num}{suffix}'.format(client=client, num=i, suffix=suffix), id=client[len('client.'):], cachemode=cachemode, ), ]) time_wait = client_config.get('time_wait', 0) log.info('starting qemu...') procs.append( remote.run( args=args, logger=log.getChild(client), stdin=run.PIPE, wait=False, )) try: yield finally: log.info('waiting for qemu tests to finish...') run.wait(procs) if time_wait > 0: log.debug('waiting {time_wait} sec for workloads detect finish...'. format(time_wait=time_wait)) time.sleep(time_wait) log.debug('checking that qemu tests succeeded...') for client in config.iterkeys(): (remote, ) = ctx.cluster.only(client).remotes.keys() # teardown nfs mount _teardown_nfs_mount(remote, client) # check for test status remote.run(args=[ 'test', '-f', '{tdir}/archive/qemu/{client}/success'.format(tdir=testdir, client=client), ], )
def task(ctx, config): """ Run all cram tests from the specified paths on the specified clients. Each client runs tests in parallel. Limitations: Tests must have a .t suffix. Tests with duplicate names will overwrite each other, so only the last one will run. For example:: tasks: - ceph: - cram: clients: client.0: - qa/test.t - qa/test2.t] client.1: [qa/test.t] branch: foo You can also run a list of cram tests on all clients:: tasks: - ceph: - cram: clients: all: [qa/test.t] :param ctx: Context :param config: Configuration """ assert isinstance(config, dict) assert 'clients' in config and isinstance(config['clients'], dict), \ 'configuration must contain a dictionary of clients' clients = teuthology.replace_all_with_clients(ctx.cluster, config['clients']) testdir = teuthology.get_testdir(ctx) overrides = ctx.config.get('overrides', {}) refspec = get_refspec_after_overrides(config, overrides) git_url = teuth_config.get_ceph_qa_suite_git_url() log.info('Pulling tests from %s ref %s', git_url, refspec) try: for client, tests in clients.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) remote.run(args=[ 'mkdir', '--', client_dir, run.Raw('&&'), 'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir), run.Raw('&&'), '{tdir}/virtualenv/bin/pip'.format(tdir=testdir), 'install', 'cram==0.6', ], ) clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) remote.run(args=refspec.clone(git_url, clone_dir)) for test in tests: assert test.endswith('.t'), 'tests must end in .t' remote.run(args=[ 'cp', '--', os.path.join(clone_dir, test), client_dir, ], ) with parallel() as p: for role in clients.keys(): p.spawn(_run_tests, ctx, role) finally: for client, tests in clients.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) test_files = set([test.rsplit('/', 1)[1] for test in tests]) # remove test files unless they failed for test_file in test_files: abs_file = os.path.join(client_dir, test_file) remote.run(args=[ 'test', '-f', abs_file + '.err', run.Raw('||'), 'rm', '-f', '--', abs_file, ], ) # ignore failure since more than one client may # be run on a host, and the client dir should be # non-empty if the test failed remote.run(args=[ 'rm', '-rf', '--', '{tdir}/virtualenv'.format(tdir=testdir), clone_dir, run.Raw(';'), 'rmdir', '--ignore-fail-on-non-empty', client_dir, ], )
def configure(ctx, config): """ Configure the s3-tests. This includes the running of the bootstrap code and the updating of local conf files. """ assert isinstance(config, dict) log.info('Configuring s3-tests...') testdir = teuthology.get_testdir(ctx) for client, properties in config['clients'].iteritems(): s3tests_conf = config['s3tests_conf'][client] if properties is not None and 'rgw_server' in properties: host = None for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): log.info('roles: ' + str(roles)) log.info('target: ' + str(target)) if properties['rgw_server'] in roles: _, host = split_user(target) assert host is not None, "Invalid client specified as the rgw_server" s3tests_conf['DEFAULT']['host'] = host else: s3tests_conf['DEFAULT']['host'] = 'localhost' if properties is not None and 'slow_backend' in properties: s3tests_conf['fixtures']['slow backend'] = properties[ 'slow_backend'] (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ 'cd', '{tdir}/s3-tests'.format(tdir=testdir), run.Raw('&&'), './bootstrap', ], ) conf_fp = StringIO() s3tests_conf.write(conf_fp) teuthology.write_file( remote=remote, path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), data=conf_fp.getvalue(), ) log.info('Configuring boto...') boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') for client, properties in config['clients'].iteritems(): with file(boto_src, 'rb') as f: (remote, ) = ctx.cluster.only(client).remotes.keys() conf = f.read().format(idle_timeout=config.get('idle_timeout', 30)) teuthology.write_file( remote=remote, path='{tdir}/boto.cfg'.format(tdir=testdir), data=conf, ) try: yield finally: log.info('Cleaning up boto...') for client, properties in config['clients'].iteritems(): (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ 'rm', '{tdir}/boto.cfg'.format(tdir=testdir), ], )
def _make_scratch_dir(ctx, role, subdir): """ Make scratch directories for this role. This also makes the mount point if that directory does not exist. :param ctx: Context :param role: "role.#" where # is used for the role id. :param subdir: use this subdir (False if not used) """ created_mountpoint = False cluster, _, id_ = misc.split_role(role) remote = get_remote_for_role(ctx, role) dir_owner = remote.user mnt = _client_mountpoint(ctx, cluster, id_) # if neither kclient nor ceph-fuse are required for a workunit, # mnt may not exist. Stat and create the directory if it doesn't. try: remote.run( args=[ 'stat', '--', mnt, ], ) log.info('Did not need to create dir {dir}'.format(dir=mnt)) except CommandFailedError: remote.run( args=[ 'mkdir', '--', mnt, ], ) log.info('Created dir {dir}'.format(dir=mnt)) created_mountpoint = True if not subdir: subdir = 'client.{id}'.format(id=id_) if created_mountpoint: remote.run( args=[ 'cd', '--', mnt, run.Raw('&&'), 'mkdir', '--', subdir, ], ) else: remote.run( args=[ # cd first so this will fail if the mount point does # not exist; pure install -d will silently do the # wrong thing 'cd', '--', mnt, run.Raw('&&'), 'sudo', 'install', '-d', '-m', '0755', '--owner={user}'.format(user=dir_owner), '--', subdir, ], ) return created_mountpoint
def ceph_log(ctx, config): cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurrence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/{fsid}/ceph.log'.format(fsid=fsid), ] if excludes: for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = ctx.ceph[cluster_name].bootstrap_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config.get('log-whitelist')) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log-whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # and logs log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', # all logs, not just for the cluster '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') try: os.makedirs(path) except OSError: pass for remote in ctx.cluster.remotes.keys(): sub = os.path.join(path, remote.name) try: os.makedirs(sub) except OSError: pass try: teuthology.pull_directory( remote, '/var/log/ceph', # everything os.path.join(sub, 'log')) except ReadError: pass
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the workunits from git. Make the executables, and then run the tests. Clean up (remove files created) after the tests are finished. :param ctx: Context :param refspec: branch, sha1, or version tag used to identify this build :param tests: specific tests specified. :param env: environment set in yaml file. Could be None. :param subdir: subdirectory set in yaml file. Could be None :param timeout: If present, use the 'timeout' command on the remote host to limit execution time. Must be specified by a number followed by 's' for seconds, 'm' for minutes, 'h' for hours, or 'd' for days. If '0' or anything that evaluates to False is passed, the 'timeout' command is not used. """ testdir = misc.get_testdir(ctx) assert isinstance(role, basestring) cluster, type_, id_ = misc.split_role(role) assert type_ == 'client' remote = get_remote_for_role(ctx, role) mnt = _client_mountpoint(ctx, cluster, id_) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role) clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) git_url = teuth_config.get_ceph_git_url() remote.run( logger=log.getChild(role), args=[ 'git', 'clone', git_url, clonedir, run.Raw('&&'), 'cd', '--', clonedir, run.Raw('&&'), 'git', 'checkout', refspec, run.Raw('&&'), 'cp','-r', 'qa/workunits', srcdir, # 'mv', 'qa/workunits', srcdir, ], ) remote.run( logger=log.getChild(role), args=[ 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), ], ) workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) workunits = sorted(misc.get_file(remote, workunits_file).split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), run.Raw('CEPH_REF={ref}'.format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), run.Raw('PATH=$PATH:/usr/sbin') ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) # args.extend([ # 'adjust-ulimits', # 'ceph-coverage', # '{tdir}/archive/coverage'.format(tdir=testdir)]) # if timeout and timeout != '0': # args.extend(['timeout', timeout]) args.extend([ '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, label="workunit test {workunit}".format(workunit=workunit) ) remote.run( logger=log.getChild(role), args=['sudo', 'rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', tests, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', workunits_file, srcdir, clonedir, ], )
def download_cephadm(ctx, config, ref): cluster_name = config['cluster'] if config.get('cephadm_mode') != 'cephadm-package': ref = config.get('cephadm_branch', ref) git_url = teuth_config.get_ceph_git_url() log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) if git_url.startswith('https://github.com/'): # git archive doesn't like https:// URLs, which we use with github. rest = git_url.split('https://github.com/', 1)[1] rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix ctx.cluster.run(args=[ 'curl', '--silent', 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', run.Raw('>'), ctx.cephadm, run.Raw('&&'), 'ls', '-l', ctx.cephadm, ], ) else: ctx.cluster.run(args=[ 'git', 'archive', '--remote=' + git_url, ref, 'src/cephadm/cephadm', run.Raw('|'), 'tar', '-xO', 'src/cephadm/cephadm', run.Raw('>'), ctx.cephadm, ], ) # sanity-check the resulting file and set executable bit cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) ctx.cluster.run(args=[ 'test', '-s', ctx.cephadm, run.Raw('&&'), 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), run.Raw('&&'), 'chmod', '+x', ctx.cephadm, ], ) try: yield finally: log.info('Removing cluster...') ctx.cluster.run(args=[ 'sudo', ctx.cephadm, 'rm-cluster', '--fsid', ctx.ceph[cluster_name].fsid, '--force', ]) if config.get('cephadm_mode') == 'root': log.info('Removing cephadm ...') ctx.cluster.run(args=[ 'rm', '-rf', ctx.cephadm, ], )
def configure(ctx, config): """ Configure the s3-tests. This includes the running of the bootstrap code and the updating of local conf files. """ assert isinstance(config, dict) log.info('Configuring s3-tests...') testdir = teuthology.get_testdir(ctx) for client, properties in config['clients'].iteritems(): properties = properties or {} s3tests_conf = config['s3tests_conf'][client] s3tests_conf['DEFAULT']['calling_format'] = properties.get('calling-format', 'ordinary') # use rgw_server if given, or default to local client role = properties.get('rgw_server', client) endpoint = ctx.rgw.role_endpoints.get(role) assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) s3tests_conf['DEFAULT']['host'] = endpoint.dns_name website_role = properties.get('rgw_website_server') if website_role: website_endpoint = ctx.rgw.role_endpoints.get(website_role) assert website_endpoint, \ 's3tests: no rgw endpoint for rgw_website_server {}'.format(website_role) assert website_endpoint.website_dns_name, \ 's3tests: no dns-s3website-name for rgw_website_server {}'.format(website_role) s3tests_conf['DEFAULT']['s3website_domain'] = website_endpoint.website_dns_name if hasattr(ctx, 'barbican'): properties = properties['barbican'] if properties is not None and 'kms_key' in properties: if not (properties['kms_key'] in ctx.barbican.keys): raise ConfigError('Key '+properties['kms_key']+' not defined') if not (properties['kms_key2'] in ctx.barbican.keys): raise ConfigError('Key '+properties['kms_key2']+' not defined') key = ctx.barbican.keys[properties['kms_key']] s3tests_conf['DEFAULT']['kms_keyid'] = key['id'] key = ctx.barbican.keys[properties['kms_key2']] s3tests_conf['DEFAULT']['kms_keyid2'] = key['id'] elif hasattr(ctx, 'vault'): properties = properties['vault'] log.info("Vault Key") s3tests_conf['DEFAULT']['kms_keyid'] = properties['key_path'] s3tests_conf['DEFAULT']['kms_keyid2'] = properties['key_path2'] else: # Fallback scenario where it's the local (ceph.conf) kms being tested s3tests_conf['DEFAULT']['kms_keyid'] = 'testkey-1' s3tests_conf['DEFAULT']['kms_keyid2'] = 'testkey-2' slow_backend = properties.get('slow_backend') if slow_backend: s3tests_conf['fixtures']['slow backend'] = slow_backend (remote,) = ctx.cluster.only(client).remotes.keys() remote.run( args=[ 'cd', '{tdir}/s3-tests'.format(tdir=testdir), run.Raw('&&'), './bootstrap', ], ) conf_fp = StringIO() s3tests_conf.write(conf_fp) teuthology.write_file( remote=remote, path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), data=conf_fp.getvalue(), ) log.info('Configuring boto...') boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') for client, properties in config['clients'].iteritems(): with file(boto_src, 'rb') as f: (remote,) = ctx.cluster.only(client).remotes.keys() conf = f.read().format( idle_timeout=config.get('idle_timeout', 30) ) teuthology.write_file( remote=remote, path='{tdir}/boto.cfg'.format(tdir=testdir), data=conf, ) try: yield finally: log.info('Cleaning up boto...') for client, properties in config['clients'].iteritems(): (remote,) = ctx.cluster.only(client).remotes.keys() remote.run( args=[ 'rm', '{tdir}/boto.cfg'.format(tdir=testdir), ], )
def _remove(ctx, config, remote, debs): """ Removes Debian packages from remote, rudely TODO: be less rude (e.g. using --force-yes) :param ctx: the argparse.Namespace object :param config: the config dict :param remote: the teuthology.orchestra.remote.Remote object :param debs: list of packages names to install """ log.info("Removing packages: {pkglist} on Debian system.".format( pkglist=", ".join(debs))) # first ask nicely remote.run( args=[ 'for', 'd', 'in', ] + debs + [ run.Raw(';'), 'do', 'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes', '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw( 'Dpkg::Options::="--force-confold"'), 'purge', run.Raw('$d'), run.Raw('||'), 'true', run.Raw(';'), 'done', ]) # mop up anything that is broken remote.run( args=[ 'dpkg', '-l', run.Raw('|'), # Any package that is unpacked or half-installed and also requires # reinstallation 'grep', '^.\(U\|H\)R', run.Raw('|'), 'awk', '{print $2}', run.Raw('|'), 'sudo', 'xargs', '--no-run-if-empty', 'dpkg', '-P', '--force-remove-reinstreq', ]) # then let apt clean up remote.run( args=[ 'sudo', 'DEBIAN_FRONTEND=noninteractive', 'apt-get', '-y', '--force-yes', '-o', run.Raw('Dpkg::Options::="--force-confdef"'), '-o', run.Raw( 'Dpkg::Options::="--force-confold"'), 'autoremove', ], )
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): manager = ctx.managers['ceph'] osds = ctx.cluster.only(teuthology.is_type('osd')) TEUTHDIR = teuthology.get_testdir(ctx) DATADIR = os.path.join(TEUTHDIR, "ceph.data") DATALINECOUNT = 10000 ERRORS = 0 NUM_OBJECTS = config.get('objects', 10) log.info("objects: {num}".format(num=NUM_OBJECTS)) pool_dump = manager.get_pool_dump(REP_POOL) REPID = pool_dump['pool'] log.debug("repid={num}".format(num=REPID)) db = {} LOCALDIR = tempfile.mkdtemp("cod") cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME, DATALINECOUNT) allremote = [] allremote.append(cli_remote) allremote += osds.remotes.keys() allremote = list(set(allremote)) for remote in allremote: cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT) ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db, ec) pgs = {} for stats in manager.get_pg_stats(): if stats["pgid"].find(str(REPID) + ".") != 0: continue if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: for osd in stats["acting"]: pgs.setdefault(osd, []).append(stats["pgid"]) elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL: shard = 0 for osd in stats["acting"]: pgs.setdefault(osd, []).append("{pgid}s{shard}". format(pgid=stats["pgid"], shard=shard)) shard += 1 else: raise Exception("{pool} has an unexpected type {type}". format(pool=REP_POOL, type=pool_dump["type"])) log.info(pgs) log.info(db) for osd in manager.get_osd_status()['up']: manager.kill_osd(osd) time.sleep(5) pgswithobjects = set() objsinpg = {} # Test --op list and generate json for all objects log.info("Test --op list by generating json for all objects") prefix = ("sudo ceph-objectstore-tool " "--data-path {fpath} " "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) for remote in osds.remotes.iterkeys(): log.debug(remote) log.debug(osds.remotes[remote]) for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) log.info("process osd.{id} on {remote}". format(id=osdid, remote=remote)) cmd = (prefix + "--op list").format(id=osdid) proc = remote.run(args=cmd.split(), check_status=False, stdout=StringIO()) if proc.exitstatus != 0: log.error("Bad exit status {ret} from --op list request". format(ret=proc.exitstatus)) ERRORS += 1 else: for pgline in proc.stdout.getvalue().splitlines(): if not pgline: continue (pg, obj) = json.loads(pgline) name = obj['oid'] if name in db: pgswithobjects.add(pg) objsinpg.setdefault(pg, []).append(name) db[name].setdefault("pg2json", {})[pg] = json.dumps(obj) log.info(db) log.info(pgswithobjects) log.info(objsinpg) if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: # Test get-bytes log.info("Test get-bytes and set-bytes") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-bytes {fname}". format(fname=GETNAME).split()) proc = remote.run(args=cmd, check_status=False) if proc.exitstatus != 0: remote.run(args="rm -f {getfile}". format(getfile=GETNAME).split()) log.error("Bad exit status {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue cmd = ("diff -q {file} {getfile}". format(file=file, getfile=GETNAME)) proc = remote.run(args=cmd.split()) if proc.exitstatus != 0: log.error("Data from get-bytes differ") # log.debug("Got:") # cat_file(logging.DEBUG, GETNAME) # log.debug("Expected:") # cat_file(logging.DEBUG, file) ERRORS += 1 remote.run(args="rm -f {getfile}". format(getfile=GETNAME).split()) data = ("put-bytes going into {file}\n". format(file=file)) teuthology.write_file(remote, SETNAME, data) cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}". format(fname=SETNAME).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}". format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes -".split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-bytes after " "set-bytes ret={ret}". format(ret=proc.exitstatus)) ERRORS += 1 else: if data != proc.stdout.getvalue(): log.error("Data inconsistent after " "set-bytes, got:") log.error(proc.stdout.getvalue()) ERRORS += 1 cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}". format(fname=file).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}". format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 log.info("Test list-attrs get-attr") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ["list-attrs"] proc = remote.run(args=cmd, check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Bad exit status {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue keys = proc.stdout.getvalue().split() values = dict(db[basename]["xattr"]) for key in keys: if (key == "_" or key == "snapset" or key == "hinfo_key"): continue key = key.strip("_") if key not in values: log.error("The key {key} should be present". format(key=key)) ERRORS += 1 continue exp = values.pop(key) cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-attr {key}". format(key="_" + key).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-attr failed with {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue val = proc.stdout.getvalue() if exp != val: log.error("For key {key} got value {got} " "instead of {expected}". format(key=key, got=val, expected=exp)) ERRORS += 1 if "hinfo_key" in keys: cmd_prefix = prefix.format(id=osdid) cmd = """ expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder echo $expected | base64 --decode | \ {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected """.format(prefix=cmd_prefix, pg=pg, json=JSON, key="hinfo_key") log.debug(cmd) proc = remote.run(args=['bash', '-e', '-x', '-c', cmd], check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("failed with " + str(proc.exitstatus)) log.error(proc.stdout.getvalue() + " " + proc.stderr.getvalue()) ERRORS += 1 if len(values) != 0: log.error("Not all keys found, remaining keys:") log.error(values) log.info("Test pg info") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op info --pgid {pg}"). format(id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Failure of --op info command with {ret}". format(proc.exitstatus)) ERRORS += 1 continue info = proc.stdout.getvalue() if not str(pg) in info: log.error("Bad data from info: {info}".format(info=info)) ERRORS += 1 log.info("Test pg logging") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op log --pgid {pg}"). format(id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Getting log failed for pg {pg} " "from osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 continue HASOBJ = pg in pgswithobjects MODOBJ = "modify" in proc.stdout.getvalue() if HASOBJ != MODOBJ: log.error("Bad log for pg {pg} from osd.{id}". format(pg=pg, id=osdid)) MSG = (HASOBJ and [""] or ["NOT "])[0] log.error("Log should {msg}have a modify entry". format(msg=MSG)) ERRORS += 1 log.info("Test pg export") EXP_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}". format(id=osdid, pg=pg)) cmd = ((prefix + "--op export --pgid {pg} --file {file}"). format(id=osdid, pg=pg, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Exporting failed for pg {pg} " "on osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) EXP_ERRORS += 1 ERRORS += EXP_ERRORS log.info("Test pg removal") RM_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op remove --pgid {pg}"). format(pg=pg, id=osdid)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Removing failed for pg {pg} " "on osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) RM_ERRORS += 1 ERRORS += RM_ERRORS IMP_ERRORS = 0 if EXP_ERRORS == 0 and RM_ERRORS == 0: log.info("Test pg import") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}". format(id=osdid, pg=pg)) cmd = ((prefix + "--op import --file {file}"). format(id=osdid, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Import failed from {file} with {ret}". format(file=fpath, ret=proc.exitstatus)) IMP_ERRORS += 1 else: log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: manager.revive_osd(osd) # Wait for health? time.sleep(5) # Let scrub after test runs verify consistency of all copies log.info("Verify replicated import data") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) TESTNAME = os.path.join(DATADIR, "gettest") REFNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) ret = proc.wait() if ret != 0: log.error("After import, rados get failed with {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, ref=REFNAME) proc = cli_remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.error("Data comparison failed for {obj}".format(obj=NAME)) ERRORS += 1 return ERRORS
def generate_iso(ctx, config): """Execute system commands to generate iso""" log.info('generating iso...') testdir = teuthology.get_testdir(ctx) # use ctx.config instead of config, because config has been # through teuthology.replace_all_with_clients() refspec = get_refspec_after_overrides(ctx.config, {}) git_url = teuth_config.get_ceph_qa_suite_git_url() log.info('Pulling tests from %s ref %s', git_url, refspec) for client, client_config in config.items(): assert 'test' in client_config, 'You must specify a test to run' test = client_config['test'] (remote, ) = ctx.cluster.only(client).remotes.keys() clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client) remote.run(args=refspec.clone(git_url, clone_dir)) src_dir = os.path.dirname(__file__) userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f: test_setup = ''.join(f.readlines()) # configuring the commands to setup the nfs mount mnt_dir = "/export/{client}".format(client=client) test_setup = test_setup.format(mnt_dir=mnt_dir) with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f: test_teardown = ''.join(f.readlines()) user_data = test_setup disks = client_config['disks'] for disk in disks: if disk['device_type'] != 'filesystem' or \ 'device_letter' not in disk or \ 'image_url' in disk: continue dev_letter = disk['device_letter'] user_data += """ - | #!/bin/bash mkdir /mnt/test_{dev_letter} mkfs -t xfs /dev/vd{dev_letter} mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter} """.format(dev_letter=dev_letter) user_data += """ - | #!/bin/bash test -d /etc/ceph || mkdir /etc/ceph cp /mnt/cdrom/ceph.* /etc/ceph/ """ cloud_config_archive = client_config.get('cloud_config_archive', []) if cloud_config_archive: user_data += yaml.safe_dump(cloud_config_archive, default_style='|', default_flow_style=False) # this may change later to pass the directories as args to the # script or something. xfstests needs that. user_data += """ - | #!/bin/bash test -d /mnt/test_b && cd /mnt/test_b /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success """ + test_teardown user_data = user_data.format(ceph_branch=ctx.config.get('branch'), ceph_sha1=ctx.config.get('sha1')) remote.write_file(userdata_path, user_data) with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: remote.write_file(metadata_path, f) test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) log.info('fetching test %s for %s', test, client) remote.run(args=[ 'cp', '--', os.path.join(clone_dir, test), test_file, run.Raw('&&'), 'chmod', '755', test_file, ], ) remote.run(args=[ 'genisoimage', '-quiet', '-input-charset', 'utf-8', '-volid', 'cidata', '-joliet', '-rock', '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), '-graft-points', 'user-data={userdata}'.format(userdata=userdata_path), 'meta-data={metadata}'.format(metadata=metadata_path), 'ceph.conf=/etc/ceph/ceph.conf', 'ceph.keyring=/etc/ceph/ceph.keyring', 'test.sh={file}'.format(file=test_file), ], ) try: yield finally: for client in config.keys(): (remote, ) = ctx.cluster.only(client).remotes.keys() remote.run(args=[ 'rm', '-rf', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), os.path.join(testdir, 'qemu', 'userdata.' + client), os.path.join(testdir, 'qemu', 'metadata.' + client), '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client), ], )
def run_playbook(self): # setup ansible on first mon node ceph_installer = self.ceph_installer args = self.args if ceph_installer.os.package_type == 'rpm': # handle selinux init issues during purge-cluster # https://bugzilla.redhat.com/show_bug.cgi?id=1364703 ceph_installer.run( args=[ 'sudo', 'yum', 'remove', '-y', 'libselinux-python' ] ) # install crypto/selinux packages for ansible ceph_installer.run(args=[ 'sudo', 'yum', 'install', '-y', 'libffi-devel', 'python-devel', 'openssl-devel', 'libselinux-python' ]) else: # update ansible from ppa ceph_installer.run(args=[ 'sudo', 'add-apt-repository', run.Raw('ppa:ansible/ansible'), ]) ceph_installer.run(args=[ 'sudo', 'apt-get', 'update', ]) ceph_installer.run(args=[ 'sudo', 'apt-get', 'install', '-y', 'ansible', 'libssl-dev', 'python-openssl', 'libffi-dev', 'python-dev' ]) ansible_repo = self.config['repo'] branch = 'master' if self.config.get('branch'): branch = self.config.get('branch') ansible_ver = 'ansible==2.5' if self.config.get('ansible-version'): ansible_ver = 'ansible==' + self.config.get('ansible-version') ceph_installer.run( args=[ 'rm', '-rf', run.Raw('~/ceph-ansible'), ], check_status=False ) ceph_installer.run(args=[ 'mkdir', run.Raw('~/ceph-ansible'), run.Raw(';'), 'git', 'clone', run.Raw('-b %s' % branch), run.Raw(ansible_repo), ]) self._copy_and_print_config() str_args = ' '.join(args) ceph_installer.run(args=[ run.Raw('cd ~/ceph-ansible'), run.Raw(';'), 'virtualenv', run.Raw('--system-site-packages'), 'venv', run.Raw(';'), run.Raw('source venv/bin/activate'), run.Raw(';'), 'pip', 'install', '--upgrade', 'pip', run.Raw(';'), 'pip', 'install', run.Raw('setuptools>=11.3'), run.Raw('notario>=0.0.13'), # FIXME: use requirements.txt run.Raw(ansible_ver), run.Raw(';'), run.Raw(str_args) ]) wait_for_health = self.config.get('wait-for-health', True) if wait_for_health: self.wait_for_ceph_health() # for the teuthology workunits to work we # need to fix the permission on keyring to be readable by them self._create_rbd_pool() self.fix_keyring_permission()