def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluster is brought up and tested by this task """ POOL = 'unfound_pool' if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend( ['--osd-recovery-delay-start', '1000']) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.flush_pg_stats([1, 0]) manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_till_active() manager.flush_pg_stats([0, 2]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', POOL, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_unfound(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] assert m['available_might_have_unfound'] == True assert m['might_have_unfound'][0]['osd'] == "1" assert m['might_have_unfound'][0]['status'] == "osd is down" num_unfound = 0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'revert') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2]) manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) assert not err # see if osd.1 can cope manager.mark_in_osd(1) manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs) manager.wait_for_clean()
def task(ctx, config): """ Test handling of object location going down """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take 0, 1 out manager.mark_out_osd(0) manager.mark_out_osd(1) manager.wait_for_clean() # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.0', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.2', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.3', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) manager.mark_out_osd(3) manager.wait_till_active() manager.mark_in_osd(0) manager.wait_till_active() manager.flush_pg_stats([2, 0]) manager.mark_out_osd(2) manager.wait_till_active() # bring up 1 manager.mark_in_osd(1) manager.wait_till_active() manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert not unfound manager.kill_osd(2) manager.mark_down_osd(2) manager.kill_osd(3) manager.mark_down_osd(3) manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert unfound
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name, min_size=2) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' ) manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.flush_pg_stats([0, 2, 3]) manager.wait_till_active() manager.flush_pg_stats([0, 2, 3]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append(mon.run( args=[ "/bin/sh", "-c", " ".join(['adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4<<10), '-p' , pool, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild('radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False )) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']); m = manager.list_pg_unfound(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2, 3]) manager.wait_for_recovery() if not config.get('parallel_bench', True): time.sleep(20) # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, BASE_NAME, DATALINECOUNT, POOL, db, ec): ERRORS = 0 log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = BASE_NAME + "{num}".format(num=i) DDNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], wait=False) # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) ret = proc.wait() if ret != 0: log.critical("Rados put failed with status {ret}".format( ret=proc.exitstatus)) sys.exit(1) db[NAME] = {} keys = range(i) db[NAME]["xattr"] = {} for k in keys: if k == 0: continue mykey = "key{i}-{k}".format(i=i, k=k) myval = "val{i}-{k}".format(i=i, k=k) proc = remote.run( args=['rados', '-p', POOL, 'setxattr', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.error("setxattr failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["xattr"][mykey] = myval # Erasure coded pools don't support omap if ec: continue # Create omap header in all objects but REPobject1 if i != 1: myhdr = "hdr{i}".format(i=i) proc = remote.run( args=['rados', '-p', POOL, 'setomapheader', NAME, myhdr]) ret = proc.wait() if ret != 0: log.critical("setomapheader failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["omapheader"] = myhdr db[NAME]["omap"] = {} for k in keys: if k == 0: continue mykey = "okey{i}-{k}".format(i=i, k=k) myval = "oval{i}-{k}".format(i=i, k=k) proc = remote.run( args=['rados', '-p', POOL, 'setomapval', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.critical("setomapval failed with {ret}".format(ret=ret)) db[NAME]["omap"][mykey] = myval return ERRORS
def task(ctx, config): """ Test backfill reservation calculates "toofull" condition correctly. A pretty rigid cluster is brought up and tested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'backfill_toofull task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) profile = config.get('erasure_code_profile', { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'backfill_toofull') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( pg_num=1, erasure_code_profile_name=profile_name, min_size=2) manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode', 'off') manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_clean() pool_id = manager.get_pool_num(pool) pgid = '%d.0' % pool_id pgs = manager.get_pg_stats() acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) log.debug("acting=%s" % acting) assert acting primary = acting[0] target = acting[1] log.debug("write some data") rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup']) df = manager.get_osd_df(target) log.debug("target osd df: %s" % df) total_kb = df['kb'] used_kb = df['kb_used'] log.debug("pause recovery") manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'nobackfill') manager.raw_cluster_cmd('osd', 'set', 'norecover') log.debug("stop tartget osd %s" % target) manager.kill_osd(target) manager.wait_till_active() pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg log.debug("re-write data") rados(ctx, mon, ['-p', pool, 'cleanup']) time.sleep(10) rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup']) df = manager.get_osd_df(primary) log.debug("primary osd df: %s" % df) primary_used_kb = df['kb_used'] log.info("test backfill reservation rejected with toofull") # We set backfillfull ratio less than new data size and expect the pg # entering backfill_toofull state. # # We also need to update nearfull ratio to prevent "full ratio(s) out of order". backfillfull = 0.9 * primary_used_kb / total_kb nearfull = backfillfull * 0.9 log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % (nearfull, backfillfull)) manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', '{:.3f}'.format(nearfull + 0.001)) manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', '{:.3f}'.format(backfillfull + 0.001)) log.debug("start tartget osd %s" % target) manager.revive_osd(target) manager.wait_for_active() manager.wait_till_osd_is_up(target) wait_for_pg_state(manager, pgid, 'backfill_toofull', target) log.info("test pg not enter backfill_toofull after restarting backfill") # We want to set backfillfull ratio to be big enough for the target to # successfully backfill new data but smaller than the sum of old and new # data, so if the osd backfill reservation incorrectly calculates "toofull" # the test will detect this (fail). # # Note, we need to operate with "uncompressed" bytes because currently # osd backfill reservation does not take compression into account. # # We also need to update nearfull ratio to prevent "full ratio(s) out of order". pdf = manager.get_pool_df(pool) log.debug("pool %s df: %s" % (pool, pdf)) assert pdf compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \ if pdf['compress_bytes_used'] > 0 else 1.0 log.debug("compress_ratio: %s" % compress_ratio) backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb assert backfillfull < 0.9 nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb assert nearfull_min < backfillfull delta = backfillfull - nearfull_min nearfull = nearfull_min + delta * 0.1 backfillfull = nearfull_min + delta * 0.2 log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % (nearfull, backfillfull)) manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', '{:.3f}'.format(nearfull + 0.001)) manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', '{:.3f}'.format(backfillfull + 0.001)) wait_for_pg_state(manager, pgid, 'backfilling', target) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg log.debug("interrupt %s backfill" % target) manager.mark_down_osd(target) # after marking the target osd down it will automatically be # up soon again log.debug("resume recovery") manager.raw_cluster_cmd('osd', 'unset', 'noout') manager.raw_cluster_cmd('osd', 'unset', 'nobackfill') manager.raw_cluster_cmd('osd', 'unset', 'norecover') # wait for everything to peer, backfill and recover manager.wait_for_clean() pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.info('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+')
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail and a ceph-objectstore-tool export/import overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' testdir = teuthology.get_testdir(ctx) # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket('osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket('osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) # Export a pg (exp_remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() FSPATH = manager.get_filepath() JPATH = os.path.join(FSPATH, "journal") prefix = ("sudo adjust-ulimits ceph-objectstore-tool " "--data-path {fpath} --journal-path {jpath} " "--log-file=" "/var/log/ceph/objectstore_tool.$$.log ".format(fpath=FSPATH, jpath=JPATH)) pid = os.getpid() expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").format( id=divergent, file=expfile)) try: exp_remote.sh(cmd, wait=True) except CommandFailedError as e: assert e.exitstatus == 0 cmd = ((prefix + "--op import --file {file}").format(id=divergent, file=expfile)) try: exp_remote.sh(cmd, wait=True) except CommandFailedError as e: assert e.exitstatus == 0 log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) time.sleep(20) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status == 0 cmd = 'rm {file}'.format(file=expfile) exp_remote.run(args=cmd, wait=True) log.info("success")
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): manager = ctx.managers['ceph'] osds = ctx.cluster.only(teuthology.is_type('osd')) TEUTHDIR = teuthology.get_testdir(ctx) DATADIR = os.path.join(TEUTHDIR, "ceph.data") DATALINECOUNT = 10000 ERRORS = 0 NUM_OBJECTS = config.get('objects', 10) log.info("objects: {num}".format(num=NUM_OBJECTS)) pool_dump = manager.get_pool_dump(REP_POOL) REPID = pool_dump['pool'] log.debug("repid={num}".format(num=REPID)) db = {} LOCALDIR = tempfile.mkdtemp("cod") cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME, DATALINECOUNT) allremote = [] allremote.append(cli_remote) allremote += list(osds.remotes.keys()) allremote = list(set(allremote)) for remote in allremote: cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT) ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db, ec) pgs = {} for stats in manager.get_pg_stats(): if stats["pgid"].find(str(REPID) + ".") != 0: continue if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: for osd in stats["acting"]: pgs.setdefault(osd, []).append(stats["pgid"]) elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL: shard = 0 for osd in stats["acting"]: pgs.setdefault(osd, []).append("{pgid}s{shard}".format( pgid=stats["pgid"], shard=shard)) shard += 1 else: raise Exception("{pool} has an unexpected type {type}".format( pool=REP_POOL, type=pool_dump["type"])) log.info(pgs) log.info(db) for osd in manager.get_osd_status()['up']: manager.kill_osd(osd) time.sleep(5) pgswithobjects = set() objsinpg = {} # Test --op list and generate json for all objects log.info("Test --op list by generating json for all objects") prefix = ("sudo ceph-objectstore-tool " "--data-path {fpath} " "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) for remote in osds.remotes.keys(): log.debug(remote) log.debug(osds.remotes[remote]) for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) log.info("process osd.{id} on {remote}".format(id=osdid, remote=remote)) cmd = (prefix + "--op list").format(id=osdid) try: lines = remote.sh(cmd, check_status=False).splitlines() for pgline in lines: if not pgline: continue (pg, obj) = json.loads(pgline) name = obj['oid'] if name in db: pgswithobjects.add(pg) objsinpg.setdefault(pg, []).append(name) db[name].setdefault("pg2json", {})[pg] = json.dumps(obj) except CommandFailedError as e: log.error( "Bad exit status {ret} from --op list request".format( ret=e.exitstatus)) ERRORS += 1 log.info(db) log.info(pgswithobjects) log.info(objsinpg) if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: # Test get-bytes log.info("Test get-bytes and set-bytes") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].items(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-bytes {fname}".format( fname=GETNAME).split()) proc = remote.run(args=cmd, check_status=False) if proc.exitstatus != 0: remote.run(args="rm -f {getfile}".format( getfile=GETNAME).split()) log.error("Bad exit status {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue cmd = ("diff -q {file} {getfile}".format( file=file, getfile=GETNAME)) proc = remote.run(args=cmd.split()) if proc.exitstatus != 0: log.error("Data from get-bytes differ") # log.debug("Got:") # cat_file(logging.DEBUG, GETNAME) # log.debug("Expected:") # cat_file(logging.DEBUG, file) ERRORS += 1 remote.run(args="rm -f {getfile}".format( getfile=GETNAME).split()) data = ("put-bytes going into {file}\n".format( file=file)) teuthology.write_file(remote, SETNAME, data) cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}".format( fname=SETNAME).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info( "set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}".format( obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes -".split() try: output = remote.sh(cmd, wait=True) if data != output: log.error("Data inconsistent after " "set-bytes, got:") log.error(output) ERRORS += 1 except CommandFailedError as e: log.error("get-bytes after " "set-bytes ret={ret}".format( ret=e.exitstatus)) ERRORS += 1 cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}".format( fname=file).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info( "set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}".format( obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 log.info("Test list-attrs get-attr") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].items(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}").format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ["list-attrs"] try: keys = remote.sh(cmd, wait=True, stderr=BytesIO()).split() except CommandFailedError as e: log.error("Bad exit status {ret}".format( ret=e.exitstatus)) ERRORS += 1 continue values = dict(db[basename]["xattr"]) for key in keys: if (key == "_" or key == "snapset" or key == "hinfo_key"): continue key = key.strip("_") if key not in values: log.error( "The key {key} should be present".format( key=key)) ERRORS += 1 continue exp = values.pop(key) cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-attr {key}".format(key="_" + key).split()) try: val = remote.sh(cmd, wait=True) except CommandFailedError as e: log.error("get-attr failed with {ret}".format( ret=e.exitstatus)) ERRORS += 1 continue if exp != val: log.error("For key {key} got value {got} " "instead of {expected}".format( key=key, got=val, expected=exp)) ERRORS += 1 if "hinfo_key" in keys: cmd_prefix = prefix.format(id=osdid) cmd = """ expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder echo $expected | base64 --decode | \ {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected """.format(prefix=cmd_prefix, pg=pg, json=JSON, key="hinfo_key") log.debug(cmd) proc = remote.run( args=['bash', '-e', '-x', '-c', cmd], check_status=False, stdout=BytesIO(), stderr=BytesIO()) proc.wait() if proc.exitstatus != 0: log.error("failed with " + str(proc.exitstatus)) log.error(" ".join([ six.ensure_str(proc.stdout.getvalue()), six.ensure_str(proc.stderr.getvalue()), ])) ERRORS += 1 if len(values) != 0: log.error("Not all keys found, remaining keys:") log.error(values) log.info("Test pg info") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op info --pgid {pg}").format( id=osdid, pg=pg).split()) try: info = remote.sh(cmd, wait=True) except CommandFailedError as e: log.error("Failure of --op info command with {ret}".format( e.exitstatus)) ERRORS += 1 continue if not str(pg) in info: log.error("Bad data from info: {info}".format(info=info)) ERRORS += 1 log.info("Test pg logging") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op log --pgid {pg}").format(id=osdid, pg=pg).split()) try: output = remote.sh(cmd, wait=True) except CommandFailedError as e: log.error("Getting log failed for pg {pg} " "from osd.{id} with {ret}".format( pg=pg, id=osdid, ret=e.exitstatus)) ERRORS += 1 continue HASOBJ = pg in pgswithobjects MODOBJ = "modify" in output if HASOBJ != MODOBJ: log.error("Bad log for pg {pg} from osd.{id}".format( pg=pg, id=osdid)) MSG = (HASOBJ and [""] or ["NOT "])[0] log.error( "Log should {msg}have a modify entry".format(msg=MSG)) ERRORS += 1 log.info("Test pg export") EXP_ERRORS = 0 for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = ((prefix + "--op export --pgid {pg} --file {file}").format( id=osdid, pg=pg, file=fpath)) try: remote.sh(cmd, wait=True) except CommandFailedError as e: log.error("Exporting failed for pg {pg} " "on osd.{id} with {ret}".format( pg=pg, id=osdid, ret=e.exitstatus)) EXP_ERRORS += 1 ERRORS += EXP_ERRORS log.info("Test pg removal") RM_ERRORS = 0 for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--force --op remove --pgid {pg}").format( pg=pg, id=osdid)) try: remote.sh(cmd, wait=True) except CommandFailedError as e: log.error("Removing failed for pg {pg} " "on osd.{id} with {ret}".format( pg=pg, id=osdid, ret=e.exitstatus)) RM_ERRORS += 1 ERRORS += RM_ERRORS IMP_ERRORS = 0 if EXP_ERRORS == 0 and RM_ERRORS == 0: log.info("Test pg import") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if not role.startswith("osd."): continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join( DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = ((prefix + "--op import --file {file}").format( id=osdid, file=fpath)) try: remote.sh(cmd, wait=True) except CommandFailedError as e: log.error( "Import failed from {file} with {ret}".format( file=fpath, ret=e.exitstatus)) IMP_ERRORS += 1 else: log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: manager.revive_osd(osd) # Wait for health? time.sleep(5) # Let scrub after test runs verify consistency of all copies log.info("Verify replicated import data") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) TESTNAME = os.path.join(DATADIR, "gettest") REFNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) ret = proc.wait() if ret != 0: log.error("After import, rados get failed with {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, ref=REFNAME) proc = cli_remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.error("Data comparison failed for {obj}".format(obj=NAME)) ERRORS += 1 return ERRORS
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run( args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket( 'osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket( 'osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(20) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status == 0 log.info("success")
def task(ctx, config): """ Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio configuration settings In order for test to pass must use log-ignorelist as follows tasks: - chef: - install: - ceph: log-ignorelist: ['OSD near full', 'OSD full dropping all updates'] - osd_failsafe_enospc: """ if config is None: config = {} assert isinstance(config, dict), \ 'osd_failsafe_enospc task only accepts a dict for configuration' # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding sleep_time = 50 # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' manager = ctx.managers['ceph'] # create 1 pg pool with 1 rep which can only be on osd.0 osds = manager.get_osd_dump() for osd in osds: if osd['osd'] != 0: manager.mark_out_osd(osd['osd']) log.info('creating pool foo') manager.create_pool("foo") manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') # State NONE -> NEAR log.info('1. Verify warning messages when exceeding nearfull_ratio') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() proc = mon.run( args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count count = len( filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count # State NEAR -> FULL log.info('2. Verify error messages when exceeding full_ratio') proc = mon.run( args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len( filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count log.info('3. Verify write failure when exceeding full_ratio') # Write data should fail ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) assert ret != 0, 'Expected write failure but it succeeded with exit status 0' # Put back default manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) # State FULL -> NEAR log.info('4. Verify write success when NOT exceeding full_ratio') # Write should succeed ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret log.info('5. Verify warning messages again when exceeding nearfull_ratio') proc = mon.run( args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count count = len( filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') time.sleep(10) # State NONE -> FULL log.info('6. Verify error messages again when exceeding full_ratio') proc = mon.run( args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len( filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count # State FULL -> NONE log.info('7. Verify no messages settings back to default') manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) proc = mon.run( args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len( filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count log.info('Test Passed') # Bring all OSDs back in manager.remove_pool("foo") for osd in osds: if osd['osd'] != 0: manager.mark_in_osd(osd['osd'])
def task(ctx, config): """ Test peering. """ if config is None: config = {} assert isinstance(config, dict), \ 'peer task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() for i in range(3): manager.set_config(i, osd_recovery_delay_start=120) # take on osd down manager.kill_osd(2) manager.mark_down_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # kill another and revive 2, so that some pgs can't peer. manager.kill_osd(1) manager.mark_down_osd(1) manager.revive_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_for_active_or_down() manager.flush_pg_stats([0, 2]) # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() for pg in pgs: out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') log.debug("out string %s", out) j = json.loads(out) log.info("pg is %s, query json is %s", pg, j) if pg['state'].count('down'): num_down_pgs += 1 # verify that it is blocked on osd.1 rs = j['recovery_state'] assert len(rs) >= 2 assert rs[0]['name'] == 'Started/Primary/Peering/Down' assert rs[1]['name'] == 'Started/Primary/Peering' assert rs[1]['blocked'] assert rs[1]['down_osds_we_would_probe'] == [1] assert len(rs[1]['peering_blocked_by']) == 1 assert rs[1]['peering_blocked_by'][0]['osd'] == 1 assert num_down_pgs > 0 # bring it all back manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean()
def task(ctx, config): """ Test handling resolve stuck peering requires 3 osds on a single test node """ if config is None: config = {} assert isinstance(config, dict), \ 'Resolve stuck peering only accepts a dict for config' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() dummyfile = '/etc/fstab' dummyfile1 = '/etc/resolv.conf' #create 1 PG pool pool='foo' log.info('creating pool foo') manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1') #set min_size of the pool to 1 #so that we can continue with I/O #when 2 osds are down manager.set_pool_property(pool, "min_size", 1) osds = [0, 1, 2] primary = manager.get_pg_primary('foo', 0) log.info("primary osd is %d", primary) others = list(osds) others.remove(primary) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() #create few objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() #kill other osds except primary log.info('killing other osds except primary') for i in others: manager.kill_osd(i) for i in others: manager.mark_down_osd(i) for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1]) #kill primary osd manager.kill_osd(primary) manager.mark_down_osd(primary) #revive other 2 osds for i in others: manager.revive_osd(i) #make sure that pg is down #Assuming pg number for single pg pool will start from 0 pgnum=0 pgstr = manager.get_pgid(pool, pgnum) stats = manager.get_single_pg_stats(pgstr) print(stats['state']) timeout=60 start=time.time() while 'down' not in stats['state']: assert time.time() - start < timeout, \ 'failed to reach down state before timeout expired' stats = manager.get_single_pg_stats(pgstr) #mark primary as lost manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\ '--yes-i-really-mean-it') #expect the pg status to be active+undersized+degraded #pg should recover and become active+clean within timeout stats = manager.get_single_pg_stats(pgstr) print(stats['state']) timeout=10 start=time.time() while manager.get_num_down(): assert time.time() - start < timeout, \ 'failed to recover before timeout expired' manager.revive_osd(primary)
def task(ctx, config): """ Test handling of objects with inconsistent hash info during backfill and deep-scrub. A pretty rigid cluster is brought up and tested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'ec_inconsistent_hinfo task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) profile = config.get('erasure_code_profile', { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'backfill_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( pg_num=1, erasure_code_profile_name=profile_name, min_size=2) manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode', 'off') manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_clean() pool_id = manager.get_pool_num(pool) pgid = '%d.0' % pool_id pgs = manager.get_pg_stats() acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) log.info("acting=%s" % acting) assert acting primary = acting[0] # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() log.debug("create test object") obj = 'test' rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile]) victim = acting[1] log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair" % victim) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=victim) check_time_now = time.strftime('%s') manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) wait_for_deep_scrub_complete(manager, pgid, check_time_now, True) check_time_now = time.strftime('%s') manager.raw_cluster_cmd('pg', 'repair', pgid) wait_for_deep_scrub_complete(manager, pgid, check_time_now, False) log.info("remove test object hash info from primary osd.%s shard and test backfill" % primary) log.debug("write some data") rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096', '--no-cleanup']) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=primary) # mark the osd out to trigger a rebalance/backfill source = acting[1] target = [x for x in [0, 1, 2, 3] if x not in acting][0] manager.mark_out_osd(source) # wait for everything to peer, backfill and recover wait_for_backfilling_complete(manager, pgid, source, target) manager.wait_for_clean() manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+') assert 'inconsistent' not in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 0 source, target = target, source log.info("remove test object hash info from non-primary osd.%s shard and test backfill" % source) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=source) # mark the osd in to trigger a rebalance/backfill manager.mark_in_osd(target) # wait for everything to peer, backfill and recover wait_for_backfilling_complete(manager, pgid, source, target) manager.wait_for_clean() manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+') assert 'inconsistent' not in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 0 log.info("remove hash info from two shards and test backfill") source = acting[2] target = [x for x in [0, 1, 2, 3] if x not in acting][0] manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=primary) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=source) # mark the osd out to trigger a rebalance/backfill manager.mark_out_osd(source) # wait for everything to peer, backfill and detect unfound object wait_for_backfilling_complete(manager, pgid, source, target) # verify that there is unfound object manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'backfill_unfound' in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 1 m = manager.list_pg_unfound(pgid) log.debug('list_pg_unfound=%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] # mark stuff lost pgs = manager.get_pg_stats() manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete') # wait for everything to peer and be happy... manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_recovery()