def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db): ERRORS = 0 log.info("Creating {objs} objects in replicated pool".format(objs=NUM_OBJECTS)) nullfd = open(os.devnull, "w") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) DDNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, remote, ['-p', REP_POOL, 'put', NAME, DDNAME], wait=False) # proc = remote.run(args=['rados', '-p', REP_POOL, 'put', NAME, DDNAME]) ret = proc.wait() if ret != 0: log.critical("Rados put failed with status {ret}".format(ret=r[0].exitstatus)) sys.exit(1) db[NAME] = {} keys = range(i) db[NAME]["xattr"] = {} for k in keys: if k == 0: continue mykey = "key{i}-{k}".format(i=i, k=k) myval = "val{i}-{k}".format(i=i, k=k) proc = remote.run(args=['rados', '-p', REP_POOL, 'setxattr', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.error("setxattr failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["xattr"][mykey] = myval # Create omap header in all objects but REPobject1 if i != 1: myhdr = "hdr{i}".format(i=i) proc = remote.run(args=['rados', '-p', REP_POOL, 'setomapheader', NAME, myhdr]) ret = proc.wait() if ret != 0: log.critical("setomapheader failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["omapheader"] = myhdr db[NAME]["omap"] = {} for k in keys: if k == 0: continue mykey = "okey{i}-{k}".format(i=i, k=k) myval = "oval{i}-{k}".format(i=i, k=k) proc = remote.run(args=['rados', '-p', REP_POOL, 'setomapval', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.critical("setomapval failed with {ret}".format(ret=ret)) db[NAME]["omap"][mykey] = myval nullfd.close() return ERRORS
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail and a ceph-objectstore-tool export/import overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' testdir = teuthology.get_testdir(ctx) # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket('osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket('osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) # Export a pg (exp_remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() FSPATH = manager.get_filepath() JPATH = os.path.join(FSPATH, "journal") prefix = ("sudo adjust-ulimits ceph-objectstore-tool " "--data-path {fpath} --journal-path {jpath} " "--log-file=" "/var/log/ceph/objectstore_tool.$$.log ".format(fpath=FSPATH, jpath=JPATH)) pid = os.getpid() expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) cmd = ((prefix + "--op export --pgid 2.0 --file {file}").format( id=divergent, file=expfile)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 cmd = ((prefix + "--op remove --pgid 2.0").format(id=divergent, file=expfile)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 cmd = ((prefix + "--op import --file {file}").format(id=divergent, file=expfile)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) time.sleep(20) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status is 0 cmd = 'rm {file}'.format(file=expfile) exp_remote.run(args=cmd, wait=True) log.info("success")
def task(ctx, config): """ Test peering. """ if config is None: config = {} assert isinstance(config, dict), \ 'peer task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: manager.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() for i in range(3): manager.set_config(i, osd_recovery_delay_start=120) # take on osd down manager.kill_osd(2) manager.mark_down_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # kill another and revive 2, so that some pgs can't peer. manager.kill_osd(1) manager.mark_down_osd(1) manager.revive_osd(2) manager.wait_till_osd_is_up(2) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_active_or_down() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() for pg in pgs: out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') log.debug("out string %s", out) j = json.loads(out) log.info("pg is %s, query json is %s", pg, j) if pg['state'].count('down'): num_down_pgs += 1 # verify that it is blocked on osd.1 rs = j['recovery_state'] assert len(rs) > 0 assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo' assert rs[1]['name'] == 'Started/Primary/Peering' assert rs[1]['blocked'] assert rs[1]['down_osds_we_would_probe'] == [1] assert len(rs[1]['peering_blocked_by']) == 1 assert rs[1]['peering_blocked_by'][0]['osd'] == 1 assert num_down_pgs > 0 # bring it all back manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean()
def task(ctx, config): """ Test peering. """ if config is None: config = {} assert isinstance(config, dict), \ 'peer task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() for i in range(3): manager.set_config( i, osd_recovery_delay_start=120) # take on osd down manager.kill_osd(2) manager.mark_down_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # kill another and revive 2, so that some pgs can't peer. manager.kill_osd(1) manager.mark_down_osd(1) manager.revive_osd(2) manager.wait_till_osd_is_up(2) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_active_or_down() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() for pg in pgs: out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') log.debug("out string %s",out) j = json.loads(out) log.info("pg is %s, query json is %s", pg, j) if pg['state'].count('down'): num_down_pgs += 1 # verify that it is blocked on osd.1 rs = j['recovery_state'] assert len(rs) > 0 assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo' assert rs[1]['name'] == 'Started/Primary/Peering' assert rs[1]['blocked'] assert rs[1]['down_osds_we_would_probe'] == [1] assert len(rs[1]['peering_blocked_by']) == 1 assert rs[1]['peering_blocked_by'][0]['osd'] == 1 assert num_down_pgs > 0 # bring it all back manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean()
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'ruleset-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_missing(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean()
def task(ctx, config): """ Test handling resolve stuck peering requires 3 osds on a single test node """ if config is None: config = {} assert isinstance(config, dict), \ 'Resolve stuck peering only accepts a dict for config' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() dummyfile = '/etc/fstab' dummyfile1 = '/etc/resolv.conf' #create 1 PG pool pool='foo' log.info('creating pool foo') manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1') #set min_size of the pool to 1 #so that we can continue with I/O #when 2 osds are down manager.set_pool_property(pool, "min_size", 1) osds = [0, 1, 2] primary = manager.get_pg_primary('foo', 0) log.info("primary osd is %d", primary) others = list(osds) others.remove(primary) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() #create few objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() #kill other osds except primary log.info('killing other osds except primary') for i in others: manager.kill_osd(i) for i in others: manager.mark_down_osd(i) for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1]) #kill primary osd manager.kill_osd(primary) manager.mark_down_osd(primary) #revive other 2 osds for i in others: manager.revive_osd(i) #make sure that pg is down #Assuming pg number for single pg pool will start from 0 pgnum=0 pgstr = manager.get_pgid(pool, pgnum) stats = manager.get_single_pg_stats(pgstr) print stats['state'] timeout=60 start=time.time() while 'down' not in stats['state']: assert time.time() - start < timeout, \ 'failed to reach down state before timeout expired' stats = manager.get_single_pg_stats(pgstr) #mark primary as lost manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\ '--yes-i-really-mean-it') #expect the pg status to be active+undersized+degraded #pg should recover and become active+clean within timeout stats = manager.get_single_pg_stats(pgstr) print stats['state'] timeout=10 start=time.time() while manager.get_num_down(): assert time.time() - start < timeout, \ 'failed to recover before timeout expired' manager.revive_osd(primary)
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): manager = ctx.managers['ceph'] osds = ctx.cluster.only(teuthology.is_type('osd')) TEUTHDIR = teuthology.get_testdir(ctx) DATADIR = os.path.join(TEUTHDIR, "ceph.data") DATALINECOUNT = 10000 ERRORS = 0 NUM_OBJECTS = config.get('objects', 10) log.info("objects: {num}".format(num=NUM_OBJECTS)) pool_dump = manager.get_pool_dump(REP_POOL) REPID = pool_dump['pool'] log.debug("repid={num}".format(num=REPID)) db = {} LOCALDIR = tempfile.mkdtemp("cod") cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME, DATALINECOUNT) allremote = [] allremote.append(cli_remote) allremote += osds.remotes.keys() allremote = list(set(allremote)) for remote in allremote: cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT) ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db, ec) pgs = {} for stats in manager.get_pg_stats(): if stats["pgid"].find(str(REPID) + ".") != 0: continue if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: for osd in stats["acting"]: pgs.setdefault(osd, []).append(stats["pgid"]) elif pool_dump["type"] == ceph_manager.PoolType.ERASURE_CODED: shard = 0 for osd in stats["acting"]: pgs.setdefault(osd, []).append("{pgid}s{shard}".format( pgid=stats["pgid"], shard=shard)) shard += 1 else: raise Exception("{pool} has an unexpected type {type}".format( pool=REP_POOL, type=pool_dump["type"])) log.info(pgs) log.info(db) for osd in manager.get_osd_status()['up']: manager.kill_osd(osd) time.sleep(5) pgswithobjects = set() objsinpg = {} # Test --op list and generate json for all objects log.info("Test --op list by generating json for all objects") prefix = ("sudo ceph-objectstore-tool " "--data-path {fpath} " "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) for remote in osds.remotes.keys(): log.debug(remote) log.debug(osds.remotes[remote]) for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) log.info("process osd.{id} on {remote}".format(id=osdid, remote=remote)) cmd = (prefix + "--op list").format(id=osdid) proc = remote.run(args=cmd.split(), check_status=False, stdout=StringIO()) if proc.exitstatus != 0: log.error( "Bad exit status {ret} from --op list request".format( ret=proc.exitstatus)) ERRORS += 1 else: for pgline in proc.stdout.getvalue().splitlines(): if not pgline: continue (pg, obj) = json.loads(pgline) name = obj['oid'] if name in db: pgswithobjects.add(pg) objsinpg.setdefault(pg, []).append(name) db[name].setdefault("pg2json", {})[pg] = json.dumps(obj) log.info(db) log.info(pgswithobjects) log.info(objsinpg) if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: # Test get-bytes log.info("Test get-bytes and set-bytes") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-bytes {fname}".format( fname=GETNAME).split()) proc = remote.run(args=cmd, check_status=False) if proc.exitstatus != 0: remote.run(args="rm -f {getfile}".format( getfile=GETNAME).split()) log.error("Bad exit status {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue cmd = ("diff -q {file} {getfile}".format( file=file, getfile=GETNAME)) proc = remote.run(args=cmd.split()) if proc.exitstatus != 0: log.error("Data from get-bytes differ") # log.debug("Got:") # cat_file(logging.DEBUG, GETNAME) # log.debug("Expected:") # cat_file(logging.DEBUG, file) ERRORS += 1 remote.run(args="rm -f {getfile}".format( getfile=GETNAME).split()) data = ("put-bytes going into {file}\n".format( file=file)) teuthology.write_file(remote, SETNAME, data) cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}".format( fname=SETNAME).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info( "set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}".format( obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes -".split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-bytes after " "set-bytes ret={ret}".format( ret=proc.exitstatus)) ERRORS += 1 else: if data != proc.stdout.getvalue(): log.error("Data inconsistent after " "set-bytes, got:") log.error(proc.stdout.getvalue()) ERRORS += 1 cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}".format( fname=file).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info( "set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}".format( obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 log.info("Test list-attrs get-attr") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}").format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ["list-attrs"] proc = remote.run(args=cmd, check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Bad exit status {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue keys = proc.stdout.getvalue().split() values = dict(db[basename]["xattr"]) for key in keys: if (key == "_" or key == "snapset" or key == "hinfo_key"): continue key = key.strip("_") if key not in values: log.error( "The key {key} should be present".format( key=key)) ERRORS += 1 continue exp = values.pop(key) cmd = ((prefix + "--pgid {pg}").format( id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-attr {key}".format(key="_" + key).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-attr failed with {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue val = proc.stdout.getvalue() if exp != val: log.error("For key {key} got value {got} " "instead of {expected}".format( key=key, got=val, expected=exp)) ERRORS += 1 if "hinfo_key" in keys: cmd_prefix = prefix.format(id=osdid) cmd = """ expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder echo $expected | base64 --decode | \ {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected """.format(prefix=cmd_prefix, pg=pg, json=JSON, key="hinfo_key") log.debug(cmd) proc = remote.run( args=['bash', '-e', '-x', '-c', cmd], check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("failed with " + str(proc.exitstatus)) log.error(proc.stdout.getvalue() + " " + proc.stderr.getvalue()) ERRORS += 1 if len(values) != 0: log.error("Not all keys found, remaining keys:") log.error(values) log.info("Test pg info") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op info --pgid {pg}").format( id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Failure of --op info command with {ret}".format( proc.exitstatus)) ERRORS += 1 continue info = proc.stdout.getvalue() if not str(pg) in info: log.error("Bad data from info: {info}".format(info=info)) ERRORS += 1 log.info("Test pg logging") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op log --pgid {pg}").format(id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Getting log failed for pg {pg} " "from osd.{id} with {ret}".format( pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 continue HASOBJ = pg in pgswithobjects MODOBJ = "modify" in proc.stdout.getvalue() if HASOBJ != MODOBJ: log.error("Bad log for pg {pg} from osd.{id}".format( pg=pg, id=osdid)) MSG = (HASOBJ and [""] or ["NOT "])[0] log.error( "Log should {msg}have a modify entry".format(msg=MSG)) ERRORS += 1 log.info("Test pg export") EXP_ERRORS = 0 for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = ((prefix + "--op export --pgid {pg} --file {file}").format( id=osdid, pg=pg, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Exporting failed for pg {pg} " "on osd.{id} with {ret}".format( pg=pg, id=osdid, ret=proc.exitstatus)) EXP_ERRORS += 1 ERRORS += EXP_ERRORS log.info("Test pg removal") RM_ERRORS = 0 for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--force --op remove --pgid {pg}").format( pg=pg, id=osdid)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Removing failed for pg {pg} " "on osd.{id} with {ret}".format( pg=pg, id=osdid, ret=proc.exitstatus)) RM_ERRORS += 1 ERRORS += RM_ERRORS IMP_ERRORS = 0 if EXP_ERRORS == 0 and RM_ERRORS == 0: log.info("Test pg import") for remote in osds.remotes.keys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join( DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = ((prefix + "--op import --file {file}").format( id=osdid, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error( "Import failed from {file} with {ret}".format( file=fpath, ret=proc.exitstatus)) IMP_ERRORS += 1 else: log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: manager.revive_osd(osd) # Wait for health? time.sleep(5) # Let scrub after test runs verify consistency of all copies log.info("Verify replicated import data") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) TESTNAME = os.path.join(DATADIR, "gettest") REFNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) ret = proc.wait() if ret != 0: log.error("After import, rados get failed with {ret}".format( ret=proc.exitstatus)) ERRORS += 1 continue cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, ref=REFNAME) proc = cli_remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.error("Data comparison failed for {obj}".format(obj=NAME)) ERRORS += 1 return ERRORS
def task(ctx, config): """ Test handling resolve stuck peering requires 3 osds on a single test node """ if config is None: config = {} assert isinstance(config, dict), \ 'Resolve stuck peering only accepts a dict for config' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() dummyfile = '/etc/fstab' dummyfile1 = '/etc/resolv.conf' #create 1 PG pool pool = 'foo' log.info('creating pool foo') manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1') #set min_size of the pool to 1 #so that we can continue with I/O #when 2 osds are down manager.set_pool_property(pool, "min_size", 1) osds = [0, 1, 2] primary = manager.get_pg_primary('foo', 0) log.info("primary osd is %d", primary) others = list(osds) others.remove(primary) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() #create few objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() #kill other osds except primary log.info('killing other osds except primary') for i in others: manager.kill_osd(i) for i in others: manager.mark_down_osd(i) for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1]) #kill primary osd manager.kill_osd(primary) manager.mark_down_osd(primary) #revive other 2 osds for i in others: manager.revive_osd(i) #make sure that pg is down #Assuming pg number for single pg pool will start from 0 pgnum = 0 pgstr = manager.get_pgid(pool, pgnum) stats = manager.get_single_pg_stats(pgstr) print(stats['state']) timeout = 60 start = time.time() while 'down' not in stats['state']: assert time.time() - start < timeout, \ 'failed to reach down state before timeout expired' stats = manager.get_single_pg_stats(pgstr) #mark primary as lost manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\ '--yes-i-really-mean-it') #expect the pg status to be active+undersized+degraded #pg should recover and become active+clean within timeout stats = manager.get_single_pg_stats(pgstr) print(stats['state']) timeout = 10 start = time.time() while manager.get_num_down(): assert time.time() - start < timeout, \ 'failed to recover before timeout expired' manager.revive_osd(primary)
def task(ctx, config): """ Test handling of object location going down """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take 0, 1 out manager.mark_out_osd(0) manager.mark_out_osd(1) manager.wait_for_clean() # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.0', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.2', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.3', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) manager.mark_out_osd(3) manager.wait_till_active() manager.mark_in_osd(0) manager.wait_till_active() manager.flush_pg_stats([2, 0]) manager.mark_out_osd(2) manager.wait_till_active() # bring up 1 manager.mark_in_osd(1) manager.wait_till_active() manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert not unfound manager.kill_osd(2) manager.mark_down_osd(2) manager.kill_osd(3) manager.mark_down_osd(3) manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert unfound
def task(ctx, config): """ Run ceph_objectstore_tool test The config should be as follows:: ceph_objectstore_tool: objects: <number of objects> """ if config is None: config = {} assert isinstance(config, dict), \ 'ceph_objectstore_tool task only accepts a dict for configuration' TEUTHDIR = teuthology.get_testdir(ctx) # clients = config['clients'] # assert len(clients) > 0, # 'ceph_objectstore_tool task needs at least 1 client' REP_POOL = "rep_pool" REP_NAME = "REPobject" # EC_POOL = "ec_pool" # EC_NAME = "ECobject" NUM_OBJECTS = config.get('objects', 10) ERRORS = 0 DATADIR = os.path.join(TEUTHDIR, "data") # Put a test dir below the data dir # TESTDIR = os.path.join(DATADIR, "test") DATALINECOUNT = 10000 # PROFNAME = "testecprofile" log.info('Beginning ceph_objectstore_tool...') log.info("objects: {num}".format(num=NUM_OBJECTS)) log.debug(config) log.debug(ctx) clients = ctx.cluster.only(teuthology.is_type('client')) assert len(clients.remotes) > 0, 'Must specify at least 1 client' (cli_remote, _) = clients.remotes.popitem() log.debug(cli_remote) # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys())) # client = clients.popitem() # log.info(client) osds = ctx.cluster.only(teuthology.is_type('osd')) log.info("OSDS") log.info(osds) log.info(osds.remotes) first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, config=config, logger=log.getChild('ceph_manager'), ) ctx.manager = manager # ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', REP_POOL, '12', '12', 'replicated') create_replicated_pool(cli_remote, REP_POOL, 12) REPID = get_pool_id(ctx, REP_POOL) log.debug("repid={num}".format(num=REPID)) while len(manager.get_osd_status()['up']) != len(manager.get_osd_status()['raw']): time.sleep(10) while len(manager.get_osd_status()['in']) != len(manager.get_osd_status()['up']): time.sleep(10) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'nodown') db = {} LOCALDIR = tempfile.mkdtemp("cod") cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME, DATALINECOUNT) allremote = [] allremote.append(cli_remote) allremote += osds.remotes.keys() allremote = list(set(allremote)) for remote in allremote: cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT) ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db) pgs = {} jsontext = manager.raw_cluster_cmd('pg', 'dump_json') pgdump = json.loads(jsontext) PGS = [str(p["pgid"]) for p in pgdump["pg_stats"] if p["pgid"].find(str(REPID) + ".") == 0] for stats in pgdump["pg_stats"]: if stats["pgid"] in PGS: for osd in stats["acting"]: if not pgs.has_key(osd): pgs[osd] = [] pgs[osd].append(stats["pgid"]) log.info(pgs) log.info(db) for osd in manager.get_osd_status()['up']: manager.kill_osd(osd) time.sleep(5) pgswithobjects = set() objsinpg = {} # Test --op list and generate json for all objects log.info("Test --op list by generating json for all objects") prefix = "sudo ceph_objectstore_tool --data-path {fpath} --journal-path {jpath} ".format(fpath=FSPATH, jpath=JPATH) for remote in osds.remotes.iterkeys(): log.debug(remote) log.debug(osds.remotes[remote]) for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) log.info("process osd.{id} on {remote}".format(id=osdid, remote=remote)) for pg in pgs[osdid]: cmd = (prefix + "--op list --pgid {pg}").format(id=osdid, pg=pg) proc = remote.run(args=cmd.split(), check_status=False, stdout=StringIO()) # proc.wait() if proc.exitstatus != 0: log.error("Bad exit status {ret} from --op list request".format(ret=proc.exitstatus)) ERRORS += 1 else: data = proc.stdout.getvalue() if len(data): # This pg has some objects in it pgswithobjects.add(pg) pglines = data.split('\n') # All copies of a pg are the same so we can overwrite objsinpg[pg] = [] while(len(pglines)): # Drop any blank lines if (len(pglines[-1]) == 0): pglines.pop() continue objjson = pglines.pop() name = json.loads(objjson)['oid'] objsinpg[pg].append(name) db[name]["pgid"] = pg db[name]["json"] = objjson log.info(db) log.info(pgswithobjects) log.info(objsinpg) # Test get-bytes log.info("Test get-bytes and set-bytes") for basename in db.keys(): file = os.path.join(DATADIR, basename) JSON = db[basename]["json"] GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) pg = db[basename]['pgid'] if pg in pgs[osdid]: cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes {fname}".format(fname=GETNAME).split() proc = remote.run(args=cmd, check_status=False) if proc.exitstatus != 0: remote.run(args="rm -f {getfile}".format(getfile=GETNAME).split()) log.error("Bad exit status {ret}".format(ret=proc.exitstatus)) ERRORS += 1 continue cmd = "diff -q {file} {getfile}".format(file=file, getfile=GETNAME) proc = remote.run(args=cmd.split()) if proc.exitstatus != 0: log.error("Data from get-bytes differ") # log.debug("Got:") # cat_file(logging.DEBUG, GETNAME) # log.debug("Expected:") # cat_file(logging.DEBUG, file) ERRORS += 1 remote.run(args="rm -f {getfile}".format(getfile=GETNAME).split()) data = "put-bytes going into {file}\n".format(file=file) teuthology.write_file(remote, SETNAME, data) cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "set-bytes {fname}".format(fname=SETNAME).split() proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} in pg {pg} osd.{id} ret={ret}".format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes -".split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-bytes after set-bytes ret={ret}".format(ret=proc.exitstatus)) ERRORS += 1 else: if data != proc.stdout.getvalue(): log.error("Data inconsistent after set-bytes, got:") log.error(proc.stdout.getvalue()) ERRORS += 1 cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "set-bytes {fname}".format(fname=file).split() proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} in pg {pg} osd.{id} ret={ret}".format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 log.info("Test list-attrs get-attr") for basename in db.keys(): file = os.path.join(DATADIR, basename) JSON = db[basename]["json"] GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) pg = db[basename]['pgid'] if pg in pgs[osdid]: cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ["list-attrs"] proc = remote.run(args=cmd, check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Bad exit status {ret}".format(ret=proc.exitstatus)) ERRORS += 1 continue keys = proc.stdout.getvalue().split() values = dict(db[basename]["xattr"]) for key in keys: if key == "_" or key == "snapset": continue key = key.strip("_") if key not in values: log.error("The key {key} should be present".format(key=key)) ERRORS += 1 continue exp = values.pop(key) cmd = (prefix + "--pgid {pg}").format(id=osdid, pg=pg).split() cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-attr {key}".format(key="_" + key).split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-attr failed with {ret}".format(ret=proc.exitstatus)) ERRORS += 1 continue val = proc.stdout.getvalue() if exp != val: log.error("For key {key} got value {got} instead of {expected}".format(key=key, got=val, expected=exp)) ERRORS += 1 if len(values) != 0: log.error("Not all keys found, remaining keys:") log.error(values) log.info("Test pg info") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) for pg in pgs[osdid]: cmd = (prefix + "--op info --pgid {pg}").format(id=osdid, pg=pg).split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Failure of --op info command with {ret}".format(proc.exitstatus)) ERRORS += 1 continue info = proc.stdout.getvalue() if not str(pg) in info: log.error("Bad data from info: {info}".format(info=info)) ERRORS += 1 log.info("Test pg logging") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) for pg in pgs[osdid]: cmd = (prefix + "--op log --pgid {pg}").format(id=osdid, pg=pg).split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Getting log failed for pg {pg} from osd.{id} with {ret}".format(pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 continue HASOBJ = pg in pgswithobjects MODOBJ = "modify" in proc.stdout.getvalue() if HASOBJ != MODOBJ: log.error("Bad log for pg {pg} from osd.{id}".format(pg=pg, id=osdid)) MSG = (HASOBJ and [""] or ["NOT "])[0] log.error("Log should {msg}have a modify entry".format(msg=MSG)) ERRORS += 1 log.info("Test pg export") EXP_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = (prefix + "--op export --pgid {pg} --file {file}").format(id=osdid, pg=pg, file=fpath) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Exporting failed for pg {pg} on osd.{id} with {ret}".format(pg=pg, id=osdid, ret=proc.exitstatus)) EXP_ERRORS += 1 ERRORS += EXP_ERRORS log.info("Test pg removal") RM_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) for pg in pgs[osdid]: cmd = (prefix + "--op remove --pgid {pg}").format(pg=pg, id=osdid) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Removing failed for pg {pg} on osd.{id} with {ret}".format(pg=pg, id=osdid, ret=proc.exitstatus)) RM_ERRORS += 1 ERRORS += RM_ERRORS IMP_ERRORS = 0 if EXP_ERRORS == 0 and RM_ERRORS == 0: log.info("Test pg import") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg)) cmd = (prefix + "--op import --file {file}").format(id=osdid, file=fpath) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Import failed from {file} with {ret}".format(file=fpath, ret=proc.exitstatus)) IMP_ERRORS += 1 else: log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: manager.revive_osd(osd) # Wait for health? time.sleep(5) # Let scrub after test runs verify consistency of all copies log.info("Verify replicated import data") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) TESTNAME = os.path.join(DATADIR, "gettest") REFNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) ret = proc.wait() if ret != 0: log.errors("After import, rados get failed with {ret}".format(ret=r[0].exitstatus)) ERRORS += 1 continue cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, ref=REFNAME) proc = cli_remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.error("Data comparison failed for {obj}".format(obj=NAME)) ERRORS += 1 if ERRORS == 0: log.info("TEST PASSED") else: log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) try: yield finally: log.info('Ending ceph_objectstore_tool')
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail config: none Requires 3 osds. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' while len(ctx.manager.get_osd_status()['up']) < 3: time.sleep(10) ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') ctx.manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: ctx.manager.set_config(i, osd_min_pg_log_entries=1) # determine primary divergent = ctx.manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = [0, 1, 2] non_divergent.remove(divergent) log.info('writing initial objects') # write 1000 objects for i in range(1000): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) ctx.manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: ctx.manager.set_config(i, filestore_blackhole='') # write 1 (divergent) object log.info('writing divergent object existing_0') rados(ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds log.info('killing all the osds') for i in osds: ctx.manager.kill_osd(i) for i in osds: ctx.manager.mark_down_osd(i) for i in osds: ctx.manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: ctx.manager.revive_osd(i) for i in non_divergent: ctx.manager.mark_in_osd(i) log.info('making log long to prevent backfill') for i in non_divergent: ctx.manager.set_config(i, osd_min_pg_log_entries=100000) # write 1 non-divergent object (ensure that old divergent one is divergent) log.info('writing non-divergent object existing_1') rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2]) ctx.manager.wait_for_recovery() # ensure no recovery log.info('delay recovery') for i in non_divergent: ctx.manager.set_config(i, osd_recovery_delay_start=100000) # bring in our divergent friend log.info("revive divergent %d", divergent) ctx.manager.revive_osd(divergent) while len(ctx.manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('delay recovery divergent') ctx.manager.set_config(divergent, osd_recovery_delay_start=100000) log.info('mark divergent in') ctx.manager.mark_in_osd(divergent) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) log.info("killing divergent %d", divergent) ctx.manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) ctx.manager.revive_osd(divergent) log.info('allowing recovery') for i in non_divergent: ctx.manager.set_config(i, osd_recovery_delay_start=0) log.info('reading existing_0') exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_0', '-o', '/tmp/existing']) assert exit_status is 0 log.info("success")
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluseter is brought up andtested by this task """ POOL = "unfound_pool" if config is None: config = {} assert isinstance(config, dict), "lost_unfound task only accepts a dict for configuration" first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild("ceph_manager")) while len(manager.get_osd_status()["up"]) < 3: time.sleep(10) manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.1", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = "/etc/fstab" # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ["-p", POOL, "put", "dummy", dummyfile]) manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.1", "flush_pg_stats") manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ["-p", POOL, "put", "existing_%d" % f, dummyfile]) rados(ctx, mon, ["-p", POOL, "put", "existed_%d" % f, dummyfile]) rados(ctx, mon, ["-p", POOL, "rm", "existed_%d" % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( "tell", "osd.1", "injectargs", "--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000" ) manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ["-p", POOL, "put", "new_%d" % f, dummyfile]) rados(ctx, mon, ["-p", POOL, "put", "existed_%d" % f, dummyfile]) rados(ctx, mon, ["-p", POOL, "put", "existing_%d" % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info("osd.0 command_args is %s" % "foo") log.info(ctx.daemons.get_daemon("osd", 0).command_args) ctx.daemons.get_daemon("osd", 0).command_kwargs["args"].extend(["--osd-recovery-delay-start", "1000"]) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.raw_cluster_cmd("tell", "osd.1", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd("osd", "lost", "1", "--yes-i-really-mean-it") # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") manager.wait_till_active() manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get("parallel_bench", True): procs.append( mon.run( args=[ "/bin/sh", "-c", " ".join( [ "adjust-ulimits", "ceph-coverage", "{tdir}/archive/coverage", "rados", "--no-log-to-stderr", "--name", "client.admin", "-b", str(4 << 10), "-p", POOL, "-t", "20", "bench", "240", "write", ] ).format(tdir=testdir), ], logger=log.getChild("radosbench.{id}".format(id="client.admin")), stdin=run.PIPE, wait=False, ) ) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg["stat_sum"]["num_objects_unfound"] > 0: primary = "osd.%d" % pg["acting"][0] # verify that i can list them direct from the osd log.info("listing missing/lost in %s state %s", pg["pgid"], pg["state"]) m = manager.list_pg_missing(pg["pgid"]) # log.info('%s' % m) assert m["num_unfound"] == pg["stat_sum"]["num_objects_unfound"] num_unfound = 0 for o in m["objects"]: if len(o["locations"]) == 0: num_unfound += 1 assert m["num_unfound"] == num_unfound log.info("reverting unfound in %s on %s", pg["pgid"], primary) manager.raw_cluster_cmd("pg", pg["pgid"], "mark_unfound_lost", "revert") else: log.info("no unfound in %s", pg["pgid"]) manager.raw_cluster_cmd("tell", "osd.0", "debug", "kick_recovery_wq", "5") manager.raw_cluster_cmd("tell", "osd.2", "debug", "kick_recovery_wq", "5") manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ["-p", POOL, "get", "new_%d" % f, "-"]) assert err err = rados(ctx, mon, ["-p", POOL, "get", "existed_%d" % f, "-"]) assert err err = rados(ctx, mon, ["-p", POOL, "get", "existing_%d" % f, "-"]) assert not err # see if osd.1 can cope manager.revive_osd(1) manager.mark_in_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail and a ceph-objectstore-tool export/import overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), "divergent_priors task only accepts a dict for configuration" while len(ctx.manager.get_osd_status()["up"]) < 3: time.sleep(10) ctx.manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") ctx.manager.raw_cluster_cmd("tell", "osd.1", "flush_pg_stats") ctx.manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") ctx.manager.raw_cluster_cmd("osd", "set", "noout") ctx.manager.raw_cluster_cmd("osd", "set", "noin") ctx.manager.raw_cluster_cmd("osd", "set", "nodown") ctx.manager.wait_for_clean() # something that is always there dummyfile = "/etc/fstab" dummyfile2 = "/etc/resolv.conf" testdir = teuthology.get_testdir(ctx) # create 1 pg pool log.info("creating foo") ctx.manager.raw_cluster_cmd("osd", "pool", "create", "foo", "1") osds = [0, 1, 2] for i in osds: ctx.manager.set_config(i, osd_min_pg_log_entries=10) ctx.manager.set_config(i, osd_max_pg_log_entries=10) ctx.manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = ctx.manager.get_pg_primary("foo", 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info("writing initial objects") first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ["-p", "foo", "put", "existing_%d" % i, dummyfile]) ctx.manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: ctx.manager.set_config(i, filestore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info("writing divergent objects") for i in range(DIVERGENT_WRITE): rados(ctx, mon, ["-p", "foo", "put", "existing_%d" % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info("remove divergent objects") for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ["-p", "foo", "rm", "existing_%d" % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=["killall", "-9", "rados"], wait=True, check_status=False) # kill all the osds but leave divergent in log.info("killing all the osds") for i in osds: ctx.manager.kill_osd(i) for i in osds: ctx.manager.mark_down_osd(i) for i in non_divergent: ctx.manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: ctx.manager.revive_osd(i) for i in non_divergent: ctx.manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info("writing non-divergent object " + objname) rados(ctx, mon, ["-p", "foo", "put", objname, dummyfile2]) ctx.manager.wait_for_recovery() # ensure no recovery of up osds first log.info("delay recovery") for i in non_divergent: ctx.manager.wait_run_admin_socket("osd", i, ["set_recovery_delay", "100000"]) # bring in our divergent friend log.info("revive divergent %d", divergent) ctx.manager.raw_cluster_cmd("osd", "set", "noup") ctx.manager.revive_osd(divergent) log.info("delay recovery divergent") ctx.manager.wait_run_admin_socket("osd", divergent, ["set_recovery_delay", "100000"]) ctx.manager.raw_cluster_cmd("osd", "unset", "noup") while len(ctx.manager.get_osd_status()["up"]) < 3: time.sleep(10) log.info("wait for peering") rados(ctx, mon, ["-p", "foo", "put", "foo", dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) ctx.manager.kill_osd(divergent) # Export a pg (exp_remote,) = ctx.cluster.only("osd.{o}".format(o=divergent)).remotes.iterkeys() FSPATH = ctx.manager.get_filepath() JPATH = os.path.join(FSPATH, "journal") prefix = ( "sudo adjust-ulimits ceph-objectstore-tool " "--data-path {fpath} --journal-path {jpath} " "--log-file=" "/var/log/ceph/objectstore_tool.$$.log ".format(fpath=FSPATH, jpath=JPATH) ) pid = os.getpid() expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) cmd = (prefix + "--op export --pgid 1.0 --file {file}").format(id=divergent, file=expfile) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 cmd = (prefix + "--op remove --pgid 1.0").format(id=divergent, file=expfile) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 cmd = (prefix + "--op import --file {file}").format(id=divergent, file=expfile) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 log.info("reviving divergent %d", divergent) ctx.manager.revive_osd(divergent) ctx.manager.wait_run_admin_socket("osd", divergent, ["dump_ops_in_flight"]) time.sleep(20) log.info("allowing recovery") # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: ctx.manager.raw_cluster_cmd("tell", "osd.%d" % i, "debug", "kick_recovery_wq", " 0") log.info("reading divergent objects") for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados(ctx, mon, ["-p", "foo", "get", "existing_%d" % i, "/tmp/existing"]) assert exit_status is 0 (remote,) = ctx.cluster.only("osd.{o}".format(o=divergent)).remotes.iterkeys() msg = "dirty_divergent_priors: true, divergent_priors: %d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'.format(msg=msg, osd=divergent) proc = remote.run(args=cmd, wait=True, check_status=False) assert proc.exitstatus == 0 cmd = "rm {file}".format(file=expfile) remote.run(args=cmd, wait=True) log.info("success")
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), "divergent_priors task only accepts a dict for configuration" manager = ctx.managers["ceph"] while len(manager.get_osd_status()["up"]) < 3: time.sleep(10) manager.raw_cluster_cmd("tell", "osd.0", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.1", "flush_pg_stats") manager.raw_cluster_cmd("tell", "osd.2", "flush_pg_stats") manager.raw_cluster_cmd("osd", "set", "noout") manager.raw_cluster_cmd("osd", "set", "noin") manager.raw_cluster_cmd("osd", "set", "nodown") manager.wait_for_clean() # something that is always there dummyfile = "/etc/fstab" dummyfile2 = "/etc/resolv.conf" # create 1 pg pool log.info("creating foo") manager.raw_cluster_cmd("osd", "pool", "create", "foo", "1") osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary("foo", 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info("writing initial objects") first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ["-p", "foo", "put", "existing_%d" % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, filestore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info("writing divergent objects") for i in range(DIVERGENT_WRITE): rados(ctx, mon, ["-p", "foo", "put", "existing_%d" % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info("remove divergent objects") for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ["-p", "foo", "rm", "existing_%d" % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=["killall", "-9", "rados"], wait=True, check_status=False) # kill all the osds but leave divergent in log.info("killing all the osds") for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info("writing non-divergent object " + objname) rados(ctx, mon, ["-p", "foo", "put", objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info("delay recovery") for i in non_divergent: manager.wait_run_admin_socket("osd", i, ["set_recovery_delay", "100000"]) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd("osd", "set", "noup") manager.revive_osd(divergent) log.info("delay recovery divergent") manager.wait_run_admin_socket("osd", divergent, ["set_recovery_delay", "100000"]) manager.raw_cluster_cmd("osd", "unset", "noup") while len(manager.get_osd_status()["up"]) < 3: time.sleep(10) log.info("wait for peering") rados(ctx, mon, ["-p", "foo", "put", "foo", dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(20) log.info("allowing recovery") # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd("tell", "osd.%d" % i, "debug", "kick_recovery_wq", " 0") log.info("reading divergent objects") for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados(ctx, mon, ["-p", "foo", "get", "existing_%d" % i, "/tmp/existing"]) assert exit_status is 0 (remote,) = ctx.cluster.only("osd.{o}".format(o=divergent)).remotes.iterkeys() msg = "dirty_divergent_priors: true, divergent_priors: %d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) cmd = 'grep "{msg}" /var/log/ceph/ceph-osd.{osd}.log'.format(msg=msg, osd=divergent) proc = remote.run(args=cmd, wait=True, check_status=False) assert proc.exitstatus == 0 log.info("success")
def configure_regions_and_zones(ctx, config, regions, role_endpoints, realm): """ Configure regions and zones from rados and rgw. """ if not regions: log.debug( 'In rgw.configure_regions_and_zones() and regions is None. ' 'Bailing') yield return if not realm: log.debug( 'In rgw.configure_regions_and_zones() and realm is None. ' 'Bailing') yield return log.info('Configuring regions and zones...') log.debug('config is %r', config) log.debug('regions are %r', regions) log.debug('role_endpoints = %r', role_endpoints) log.debug('realm is %r', realm) # extract the zone info role_zones = dict([(client, extract_zone_info(ctx, client, c_config)) for client, c_config in config.iteritems()]) log.debug('roles_zones = %r', role_zones) # extract the user info and append it to the payload tuple for the given # client for client, c_config in config.iteritems(): if not c_config: user_info = None else: user_info = extract_user_info(c_config) (region, zone, zone_info) = role_zones[client] role_zones[client] = (region, zone, zone_info, user_info) region_info = dict([ (region_name, extract_region_info(region_name, r_config)) for region_name, r_config in regions.iteritems()]) fill_in_endpoints(region_info, role_zones, role_endpoints) # clear out the old defaults first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # removing these objects from .rgw.root and the per-zone root pools # may or may not matter rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'region_info.default']) rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'zone_info.default']) # read master zonegroup and master_zone for zonegroup, zg_info in region_info.iteritems(): if zg_info['is_master']: master_zonegroup = zonegroup master_zone = zg_info['master_zone'] break for client in config.iterkeys(): (zonegroup, zone, zone_info, user_info) = role_zones[client] if zonegroup == master_zonegroup and zone == master_zone: master_client = client break log.debug('master zonegroup =%r', master_zonegroup) log.debug('master zone = %r', master_zone) log.debug('master client = %r', master_client) log.debug('config %r ', config) (ret, out)=rgwadmin(ctx, master_client, cmd=['realm', 'create', '--rgw-realm', realm, '--default']) log.debug('realm create ret %r exists %r', -ret, errno.EEXIST) assert ret == 0 or ret != -errno.EEXIST if ret is -errno.EEXIST: log.debug('realm %r exists', realm) for client in config.iterkeys(): for role, (zonegroup, zone, zone_info, user_info) in role_zones.iteritems(): rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'region_info.default']) rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'zone_info.default']) (remote,) = ctx.cluster.only(role).remotes.keys() for pool_info in zone_info['placement_pools']: remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', pool_info['val']['index_pool'], '64', '64']) if ctx.rgw.ec_data_pool: create_ec_pool(remote, pool_info['val']['data_pool'], zone, 64, ctx.rgw.erasure_code_profile) else: create_replicated_pool( remote, pool_info['val']['data_pool'], 64) zone_json = json.dumps(dict(zone_info.items() + user_info.items())) log.debug('zone info is: %r', zone_json) rgwadmin(ctx, client, cmd=['zone', 'set', '--rgw-zonegroup', zonegroup, '--rgw-zone', zone], stdin=StringIO(zone_json), check_status=True) for region, info in region_info.iteritems(): region_json = json.dumps(info) log.debug('region info is: %s', region_json) rgwadmin(ctx, client, cmd=['zonegroup', 'set'], stdin=StringIO(region_json), check_status=True) if info['is_master']: rgwadmin(ctx, client, cmd=['zonegroup', 'default', '--rgw-zonegroup', master_zonegroup], check_status=True) (zonegroup, zone, zone_info, user_info) = role_zones[client] rgwadmin(ctx, client, cmd=['zone', 'default', zone], check_status=True) rgwadmin(ctx, master_client, cmd=['-n', master_client, 'period', 'update', '--commit'], check_status=True) yield
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluseter is brought up andtested by this task """ POOL = 'unfound_pool' if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' ) manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ '--osd-recovery-delay-start', '1000' ]) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.flush_pg_stats([1, 0]) manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_till_active() manager.flush_pg_stats([0, 2]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append(mon.run( args=[ "/bin/sh", "-c", " ".join(['adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4<<10), '-p' , POOL, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild('radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False )) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']); m = manager.list_pg_missing(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] num_unfound=0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'revert') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2]) manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) assert not err # see if osd.1 can cope manager.mark_in_osd(1) manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'ruleset-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) # something that is always there dummyfile = '/etc/fstab' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' ) manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']); m = manager.list_pg_missing(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean()
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): manager = ctx.managers['ceph'] osds = ctx.cluster.only(teuthology.is_type('osd')) TEUTHDIR = teuthology.get_testdir(ctx) DATADIR = os.path.join(TEUTHDIR, "ceph.data") DATALINECOUNT = 10000 ERRORS = 0 NUM_OBJECTS = config.get('objects', 10) log.info("objects: {num}".format(num=NUM_OBJECTS)) pool_dump = manager.get_pool_dump(REP_POOL) REPID = pool_dump['pool'] log.debug("repid={num}".format(num=REPID)) db = {} LOCALDIR = tempfile.mkdtemp("cod") cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME, DATALINECOUNT) allremote = [] allremote.append(cli_remote) allremote += osds.remotes.keys() allremote = list(set(allremote)) for remote in allremote: cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT) ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME, DATALINECOUNT, REP_POOL, db, ec) pgs = {} for stats in manager.get_pg_stats(): if stats["pgid"].find(str(REPID) + ".") != 0: continue if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: for osd in stats["acting"]: pgs.setdefault(osd, []).append(stats["pgid"]) elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL: shard = 0 for osd in stats["acting"]: pgs.setdefault(osd, []).append("{pgid}s{shard}". format(pgid=stats["pgid"], shard=shard)) shard += 1 else: raise Exception("{pool} has an unexpected type {type}". format(pool=REP_POOL, type=pool_dump["type"])) log.info(pgs) log.info(db) for osd in manager.get_osd_status()['up']: manager.kill_osd(osd) time.sleep(5) pgswithobjects = set() objsinpg = {} # Test --op list and generate json for all objects log.info("Test --op list by generating json for all objects") prefix = ("sudo ceph-objectstore-tool " "--data-path {fpath} " "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) for remote in osds.remotes.iterkeys(): log.debug(remote) log.debug(osds.remotes[remote]) for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) log.info("process osd.{id} on {remote}". format(id=osdid, remote=remote)) cmd = (prefix + "--op list").format(id=osdid) proc = remote.run(args=cmd.split(), check_status=False, stdout=StringIO()) if proc.exitstatus != 0: log.error("Bad exit status {ret} from --op list request". format(ret=proc.exitstatus)) ERRORS += 1 else: for pgline in proc.stdout.getvalue().splitlines(): if not pgline: continue (pg, obj) = json.loads(pgline) name = obj['oid'] if name in db: pgswithobjects.add(pg) objsinpg.setdefault(pg, []).append(name) db[name].setdefault("pg2json", {})[pg] = json.dumps(obj) log.info(db) log.info(pgswithobjects) log.info(objsinpg) if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL: # Test get-bytes log.info("Test get-bytes and set-bytes") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-bytes {fname}". format(fname=GETNAME).split()) proc = remote.run(args=cmd, check_status=False) if proc.exitstatus != 0: remote.run(args="rm -f {getfile}". format(getfile=GETNAME).split()) log.error("Bad exit status {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue cmd = ("diff -q {file} {getfile}". format(file=file, getfile=GETNAME)) proc = remote.run(args=cmd.split()) if proc.exitstatus != 0: log.error("Data from get-bytes differ") # log.debug("Got:") # cat_file(logging.DEBUG, GETNAME) # log.debug("Expected:") # cat_file(logging.DEBUG, file) ERRORS += 1 remote.run(args="rm -f {getfile}". format(getfile=GETNAME).split()) data = ("put-bytes going into {file}\n". format(file=file)) teuthology.write_file(remote, SETNAME, data) cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}". format(fname=SETNAME).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}". format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += "get-bytes -".split() proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-bytes after " "set-bytes ret={ret}". format(ret=proc.exitstatus)) ERRORS += 1 else: if data != proc.stdout.getvalue(): log.error("Data inconsistent after " "set-bytes, got:") log.error(proc.stdout.getvalue()) ERRORS += 1 cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("set-bytes {fname}". format(fname=file).split()) proc = remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.info("set-bytes failed for object {obj} " "in pg {pg} osd.{id} ret={ret}". format(obj=basename, pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 log.info("Test list-attrs get-attr") for basename in db.keys(): file = os.path.join(DATADIR, basename) GETNAME = os.path.join(DATADIR, "get") SETNAME = os.path.join(DATADIR, "set") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg, JSON in db[basename]["pg2json"].iteritems(): if pg in pgs[osdid]: cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ["list-attrs"] proc = remote.run(args=cmd, check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Bad exit status {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue keys = proc.stdout.getvalue().split() values = dict(db[basename]["xattr"]) for key in keys: if (key == "_" or key == "snapset" or key == "hinfo_key"): continue key = key.strip("_") if key not in values: log.error("The key {key} should be present". format(key=key)) ERRORS += 1 continue exp = values.pop(key) cmd = ((prefix + "--pgid {pg}"). format(id=osdid, pg=pg).split()) cmd.append(run.Raw("'{json}'".format(json=JSON))) cmd += ("get-attr {key}". format(key="_" + key).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("get-attr failed with {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue val = proc.stdout.getvalue() if exp != val: log.error("For key {key} got value {got} " "instead of {expected}". format(key=key, got=val, expected=exp)) ERRORS += 1 if "hinfo_key" in keys: cmd_prefix = prefix.format(id=osdid) cmd = """ expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder echo $expected | base64 --decode | \ {prefix} --pgid {pg} '{json}' set-attr {key} - test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected """.format(prefix=cmd_prefix, pg=pg, json=JSON, key="hinfo_key") log.debug(cmd) proc = remote.run(args=['bash', '-e', '-x', '-c', cmd], check_status=False, stdout=StringIO(), stderr=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("failed with " + str(proc.exitstatus)) log.error(proc.stdout.getvalue() + " " + proc.stderr.getvalue()) ERRORS += 1 if len(values) != 0: log.error("Not all keys found, remaining keys:") log.error(values) log.info("Test pg info") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op info --pgid {pg}"). format(id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Failure of --op info command with {ret}". format(proc.exitstatus)) ERRORS += 1 continue info = proc.stdout.getvalue() if not str(pg) in info: log.error("Bad data from info: {info}".format(info=info)) ERRORS += 1 log.info("Test pg logging") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--op log --pgid {pg}"). format(id=osdid, pg=pg).split()) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Getting log failed for pg {pg} " "from osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) ERRORS += 1 continue HASOBJ = pg in pgswithobjects MODOBJ = "modify" in proc.stdout.getvalue() if HASOBJ != MODOBJ: log.error("Bad log for pg {pg} from osd.{id}". format(pg=pg, id=osdid)) MSG = (HASOBJ and [""] or ["NOT "])[0] log.error("Log should {msg}have a modify entry". format(msg=MSG)) ERRORS += 1 log.info("Test pg export") EXP_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}". format(id=osdid, pg=pg)) cmd = ((prefix + "--op export --pgid {pg} --file {file}"). format(id=osdid, pg=pg, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Exporting failed for pg {pg} " "on osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) EXP_ERRORS += 1 ERRORS += EXP_ERRORS log.info("Test pg removal") RM_ERRORS = 0 for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: cmd = ((prefix + "--force --op remove --pgid {pg}"). format(pg=pg, id=osdid)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Removing failed for pg {pg} " "on osd.{id} with {ret}". format(pg=pg, id=osdid, ret=proc.exitstatus)) RM_ERRORS += 1 ERRORS += RM_ERRORS IMP_ERRORS = 0 if EXP_ERRORS == 0 and RM_ERRORS == 0: log.info("Test pg import") for remote in osds.remotes.iterkeys(): for role in osds.remotes[remote]: if string.find(role, "osd.") != 0: continue osdid = int(role.split('.')[1]) if osdid not in pgs: continue for pg in pgs[osdid]: fpath = os.path.join(DATADIR, "osd{id}.{pg}". format(id=osdid, pg=pg)) cmd = ((prefix + "--op import --file {file}"). format(id=osdid, file=fpath)) proc = remote.run(args=cmd, check_status=False, stdout=StringIO()) proc.wait() if proc.exitstatus != 0: log.error("Import failed from {file} with {ret}". format(file=fpath, ret=proc.exitstatus)) IMP_ERRORS += 1 else: log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") ERRORS += IMP_ERRORS if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: log.info("Restarting OSDs....") # They are still look to be up because of setting nodown for osd in manager.get_osd_status()['up']: manager.revive_osd(osd) # Wait for health? time.sleep(5) # Let scrub after test runs verify consistency of all copies log.info("Verify replicated import data") objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = REP_NAME + "{num}".format(num=i) TESTNAME = os.path.join(DATADIR, "gettest") REFNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) ret = proc.wait() if ret != 0: log.error("After import, rados get failed with {ret}". format(ret=proc.exitstatus)) ERRORS += 1 continue cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, ref=REFNAME) proc = cli_remote.run(args=cmd, check_status=False) proc.wait() if proc.exitstatus != 0: log.error("Data comparison failed for {obj}".format(obj=NAME)) ERRORS += 1 return ERRORS
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, filestore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run( args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket( 'osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket( 'osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(20) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status is 0 log.info("success")
def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, BASE_NAME, DATALINECOUNT, POOL, db, ec): ERRORS = 0 log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) objects = range(1, NUM_OBJECTS + 1) for i in objects: NAME = BASE_NAME + "{num}".format(num=i) DDNAME = os.path.join(DATADIR, NAME) proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], wait=False) # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) ret = proc.wait() if ret != 0: log.critical("Rados put failed with status {ret}".format( ret=proc.exitstatus)) sys.exit(1) db[NAME] = {} keys = range(i) db[NAME]["xattr"] = {} for k in keys: if k == 0: continue mykey = "key{i}-{k}".format(i=i, k=k) myval = "val{i}-{k}".format(i=i, k=k) proc = remote.run( args=['rados', '-p', POOL, 'setxattr', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.error("setxattr failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["xattr"][mykey] = myval # Erasure coded pools don't support omap if ec: continue # Create omap header in all objects but REPobject1 if i != 1: myhdr = "hdr{i}".format(i=i) proc = remote.run( args=['rados', '-p', POOL, 'setomapheader', NAME, myhdr]) ret = proc.wait() if ret != 0: log.critical("setomapheader failed with {ret}".format(ret=ret)) ERRORS += 1 db[NAME]["omapheader"] = myhdr db[NAME]["omap"] = {} for k in keys: if k == 0: continue mykey = "okey{i}-{k}".format(i=i, k=k) myval = "oval{i}-{k}".format(i=i, k=k) proc = remote.run( args=['rados', '-p', POOL, 'setomapval', NAME, mykey, myval]) ret = proc.wait() if ret != 0: log.critical("setomapval failed with {ret}".format(ret=ret)) db[NAME]["omap"][mykey] = myval return ERRORS
def task(ctx, config): """ Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio configuration settings In order for test to pass must use log-whitelist as follows tasks: - chef: - install: - ceph: log-whitelist: ['OSD near full', 'OSD full dropping all updates'] - osd_failsafe_enospc: """ if config is None: config = {} assert isinstance(config, dict), \ 'osd_failsafe_enospc task only accepts a dict for configuration' # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding sleep_time = 50 # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' manager = ctx.managers['ceph'] # create 1 pg pool with 1 rep which can only be on osd.0 osds = manager.get_osd_dump() for osd in osds: if osd['osd'] != 0: manager.mark_out_osd(osd['osd']) log.info('creating pool foo') manager.create_pool("foo") manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') # State NONE -> NEAR log.info('1. Verify warning messages when exceeding nearfull_ratio') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() proc = mon.run( args=[ 'sudo', 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count # State NEAR -> FULL log.info('2. Verify error messages when exceeding full_ratio') proc = mon.run( args=[ 'sudo', 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count log.info('3. Verify write failure when exceeding full_ratio') # Write data should fail ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) assert ret != 0, 'Expected write failure but it succeeded with exit status 0' # Put back default manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) # State FULL -> NEAR log.info('4. Verify write success when NOT exceeding full_ratio') # Write should succeed ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret log.info('5. Verify warning messages again when exceeding nearfull_ratio') proc = mon.run( args=[ 'sudo', 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') time.sleep(10) # State NONE -> FULL log.info('6. Verify error messages again when exceeding full_ratio') proc = mon.run( args=[ 'sudo', 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count # State FULL -> NONE log.info('7. Verify no messages settings back to default') manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) proc = mon.run( args=[ 'sudo', 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count log.info('Test Passed') # Bring all OSDs back in manager.remove_pool("foo") for osd in osds: if osd['osd'] != 0: manager.mark_in_osd(osd['osd'])
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket('osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket('osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(20) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status is 0 log.info("success")
def configure_regions_and_zones(ctx, config, regions, role_endpoints): """ Configure regions and zones from rados and rgw. """ if not regions: log.debug( 'In rgw.configure_regions_and_zones() and regions is None. ' 'Bailing') yield return log.info('Configuring regions and zones...') log.debug('config is %r', config) log.debug('regions are %r', regions) log.debug('role_endpoints = %r', role_endpoints) # extract the zone info role_zones = dict([(client, extract_zone_info(ctx, client, c_config)) for client, c_config in config.iteritems()]) log.debug('roles_zones = %r', role_zones) # extract the user info and append it to the payload tuple for the given # client for client, c_config in config.iteritems(): if not c_config: user_info = None else: user_info = extract_user_info(c_config) (region, zone, zone_info) = role_zones[client] role_zones[client] = (region, zone, zone_info, user_info) region_info = dict([ (region_name, extract_region_info(region_name, r_config)) for region_name, r_config in regions.iteritems()]) fill_in_endpoints(region_info, role_zones, role_endpoints) # clear out the old defaults first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # removing these objects from .rgw.root and the per-zone root pools # may or may not matter rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'region_info.default']) rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'zone_info.default']) for client in config.iterkeys(): for role, (_, zone, zone_info, user_info) in role_zones.iteritems(): rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'region_info.default']) rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'zone_info.default']) (remote,) = ctx.cluster.only(role).remotes.keys() for pool_info in zone_info['placement_pools']: remote.run(args=['ceph', 'osd', 'pool', 'create', pool_info['val']['index_pool'], '64', '64']) if ctx.rgw.ec_data_pool: create_ec_pool(remote, pool_info['val']['data_pool'], zone, 64, ctx.rgw.erasure_code_profile) else: create_replicated_pool( remote, pool_info['val']['data_pool'], 64) rgwadmin(ctx, client, cmd=['-n', client, 'zone', 'set', '--rgw-zone', zone], stdin=StringIO(json.dumps(dict( zone_info.items() + user_info.items()))), check_status=True) for region, info in region_info.iteritems(): region_json = json.dumps(info) log.debug('region info is: %s', region_json) rgwadmin(ctx, client, cmd=['-n', client, 'region', 'set'], stdin=StringIO(region_json), check_status=True) if info['is_master']: rgwadmin(ctx, client, cmd=['-n', client, 'region', 'default', '--rgw-region', region], check_status=True) rgwadmin(ctx, client, cmd=['-n', client, 'regionmap', 'update']) yield
def task(ctx, config): """ Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio configuration settings In order for test to pass must use log-whitelist as follows tasks: - chef: - install: - ceph: log-whitelist: ['OSD near full', 'OSD full dropping all updates'] - osd_failsafe_enospc: """ if config is None: config = {} assert isinstance(config, dict), \ 'osd_failsafe_enospc task only accepts a dict for configuration' # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding sleep_time = 50 # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool with 1 rep which can only be on osd.0 osds = ctx.manager.get_osd_dump() for osd in osds: if osd['osd'] != 0: ctx.manager.mark_out_osd(osd['osd']) log.info('creating pool foo') ctx.manager.create_pool("foo") ctx.manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') # State NONE -> NEAR log.info('1. Verify warning messages when exceeding nearfull_ratio') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count # State NEAR -> FULL log.info('2. Verify error messages when exceeding full_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count log.info('3. Verify write failure when exceeding full_ratio') # Write data should fail ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) assert ret != 0, 'Expected write failure but it succeeded with exit status 0' # Put back default ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) # State FULL -> NEAR log.info('4. Verify write success when NOT exceeding full_ratio') # Write should succeed ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret log.info('5. Verify warning messages again when exceeding nearfull_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') time.sleep(10) # State NONE -> FULL log.info('6. Verify error messages again when exceeding full_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count # State FULL -> NONE log.info('7. Verify no messages settings back to default') ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count log.info('Test Passed') # Bring all OSDs back in ctx.manager.remove_pool("foo") for osd in osds: if osd['osd'] != 0: ctx.manager.mark_in_osd(osd['osd'])
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail config: none Requires 3 osds. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' while len(ctx.manager.get_osd_status()['up']) < 3: time.sleep(10) ctx.manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') ctx.manager.raw_cluster_cmd('osd', 'set', 'noout') ctx.manager.raw_cluster_cmd('osd', 'set', 'noin') ctx.manager.raw_cluster_cmd('osd', 'set', 'nodown') ctx.manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') ctx.manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: ctx.manager.set_config(i, osd_min_pg_log_entries=1) # determine primary divergent = ctx.manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = [0,1,2] non_divergent.remove(divergent) log.info('writing initial objects') # write 1000 objects for i in range(1000): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) ctx.manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: ctx.manager.set_config(i, filestore_blackhole='') # write 1 (divergent) object log.info('writing divergent object existing_0') rados( ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2], wait=False) time.sleep(10) mon.run( args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds log.info('killing all the osds') for i in osds: ctx.manager.kill_osd(i) for i in osds: ctx.manager.mark_down_osd(i) for i in osds: ctx.manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: ctx.manager.revive_osd(i) for i in non_divergent: ctx.manager.mark_in_osd(i) log.info('making log long to prevent backfill') for i in non_divergent: ctx.manager.set_config(i, osd_min_pg_log_entries=100000) # write 1 non-divergent object (ensure that old divergent one is divergent) log.info('writing non-divergent object existing_1') rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2]) ctx.manager.wait_for_recovery() # ensure no recovery log.info('delay recovery') for i in non_divergent: ctx.manager.set_config(i, osd_recovery_delay_start=100000) # bring in our divergent friend log.info("revive divergent %d", divergent) ctx.manager.revive_osd(divergent) while len(ctx.manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('delay recovery divergent') ctx.manager.set_config(divergent, osd_recovery_delay_start=100000) log.info('mark divergent in') ctx.manager.mark_in_osd(divergent) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) log.info("killing divergent %d", divergent) ctx.manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) ctx.manager.revive_osd(divergent) log.info('allowing recovery') for i in non_divergent: ctx.manager.set_config(i, osd_recovery_delay_start=0) log.info('reading existing_0') exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_0', '-o', '/tmp/existing']) assert exit_status is 0 log.info("success")
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name, min_size=2) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.flush_pg_stats([0, 2, 3]) manager.wait_till_active() manager.flush_pg_stats([0, 2, 3]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', pool, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_unfound(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2, 3]) manager.wait_for_recovery() if not config.get('parallel_bench', True): time.sleep(20) # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluseter is brought up andtested by this task """ POOL = 'unfounddel_pool' if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend( ['--osd-recovery-delay-start', '1000']) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.flush_pg_stats([0, 1]) manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_till_active() manager.flush_pg_stats([0, 2]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', POOL, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_missing(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] num_unfound = 0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2]) manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.mark_in_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of object location going down """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take 0, 1 out manager.mark_out_osd(0) manager.mark_out_osd(1) manager.wait_for_clean() # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.0', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.2', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.3', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) manager.mark_out_osd(3) manager.wait_till_active() manager.mark_in_osd(0) manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.mark_out_osd(2) manager.wait_till_active() # bring up 1 manager.mark_in_osd(1) manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert not unfound manager.kill_osd(2) manager.mark_down_osd(2) manager.kill_osd(3) manager.mark_down_osd(3) manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert unfound
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'ruleset-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' ) manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append(mon.run( args=[ "/bin/sh", "-c", " ".join(['adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4<<10), '-p' , pool, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild('radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False )) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']); m = manager.list_pg_missing(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_recovery() if not config.get('parallel_bench', True): time.sleep(20) # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of divergent entries during export / import to regression test tracker #11184 overrides: ceph: conf: osd: debug osd: 5 Requires 3 osds on a single test node. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' manager = ctx.managers['ceph'] while len(manager.get_osd_status()['up']) < 3: time.sleep(10) osds = [0, 1, 2] manager.flush_pg_stats(osds) manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' testdir = teuthology.get_testdir(ctx) # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') # Remove extra pool to simlify log output manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it') for i in osds: manager.set_config(i, osd_min_pg_log_entries=10) manager.set_config(i, osd_max_pg_log_entries=10) manager.set_config(i, osd_pg_log_trim_min=5) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = list(osds) non_divergent.remove(divergent) log.info('writing initial objects') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # write 100 objects for i in range(100): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, objectstore_blackhole=1) DIVERGENT_WRITE = 5 DIVERGENT_REMOVE = 5 # Write some soon to be divergent log.info('writing divergent objects') for i in range(DIVERGENT_WRITE): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile2], wait=False) # Remove some soon to be divergent log.info('remove divergent objects') for i in range(DIVERGENT_REMOVE): rados(ctx, mon, ['-p', 'foo', 'rm', 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) time.sleep(10) mon.run( args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds but leave divergent in log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in non_divergent: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) # write 1 non-divergent object (ensure that old divergent one is divergent) objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) log.info('writing non-divergent object ' + objname) rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) manager.wait_for_recovery() # ensure no recovery of up osds first log.info('delay recovery') for i in non_divergent: manager.wait_run_admin_socket( 'osd', i, ['set_recovery_delay', '100000']) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.raw_cluster_cmd('osd', 'set', 'noup') manager.revive_osd(divergent) log.info('delay recovery divergent') manager.wait_run_admin_socket( 'osd', divergent, ['set_recovery_delay', '100000']) manager.raw_cluster_cmd('osd', 'unset', 'noup') while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) # At this point the divergent_priors should have been detected log.info("killing divergent %d", divergent) manager.kill_osd(divergent) # Split pgs for pool foo manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') time.sleep(5) manager.raw_cluster_cmd('pg','dump') # Export a pg (exp_remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() FSPATH = manager.get_filepath() JPATH = os.path.join(FSPATH, "journal") prefix = ("sudo adjust-ulimits ceph-objectstore-tool " "--data-path {fpath} --journal-path {jpath} " "--log-file=" "/var/log/ceph/objectstore_tool.$$.log ". format(fpath=FSPATH, jpath=JPATH)) pid = os.getpid() expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) cmd = ((prefix + "--op export --pgid 2.0 --file {file}"). format(id=divergent, file=expfile)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 # Remove the same pg that was exported cmd = ((prefix + "--op remove --pgid 2.0"). format(id=divergent)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 # Kill one of non-divergent OSDs log.info('killing osd.%d' % non_divergent[0]) manager.kill_osd(non_divergent[0]) manager.mark_down_osd(non_divergent[0]) # manager.mark_out_osd(non_divergent[0]) # An empty collection for pg 2.0 needs to be cleaned up cmd = ((prefix + "--op remove --pgid 2.0"). format(id=non_divergent[0])) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 cmd = ((prefix + "--op import --file {file}"). format(id=non_divergent[0], file=expfile)) proc = exp_remote.run(args=cmd, wait=True, check_status=False, stdout=StringIO()) assert proc.exitstatus == 0 # bring in our divergent friend and other node log.info("revive divergent %d", divergent) manager.revive_osd(divergent) manager.mark_in_osd(divergent) log.info("revive %d", non_divergent[0]) manager.revive_osd(non_divergent[0]) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('delay recovery divergent') manager.set_config(divergent, osd_recovery_delay_start=100000) log.info('mark divergent in') manager.mark_in_osd(divergent) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) time.sleep(3) log.info('allowing recovery') # Set osd_recovery_delay_start back to 0 and kick the queue for i in osds: manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', 'kick_recovery_wq', ' 0') log.info('reading divergent objects') for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, '/tmp/existing']) assert exit_status is 0 (remote,) = ctx.\ cluster.only('osd.{o}'.format(o=divergent)).remotes.iterkeys() cmd = 'rm {file}'.format(file=expfile) remote.run(args=cmd, wait=True) log.info("success")
def configure_regions_and_zones(ctx, config, regions, role_endpoints, realm): """ Configure regions and zones from rados and rgw. """ if not regions: log.debug( 'In rgw.configure_regions_and_zones() and regions is None. ' 'Bailing') configure_compression_in_default_zone(ctx, config) yield return if not realm: log.debug( 'In rgw.configure_regions_and_zones() and realm is None. ' 'Bailing') configure_compression_in_default_zone(ctx, config) yield return log.info('Configuring regions and zones...') log.debug('config is %r', config) log.debug('regions are %r', regions) log.debug('role_endpoints = %r', role_endpoints) log.debug('realm is %r', realm) # extract the zone info role_zones = dict([(client, extract_zone_info(ctx, client, c_config)) for client, c_config in config.iteritems()]) log.debug('roles_zones = %r', role_zones) # extract the user info and append it to the payload tuple for the given # client for client, c_config in config.iteritems(): if not c_config: user_info = None else: user_info = extract_user_info(c_config) (region, zone, zone_info) = role_zones[client] role_zones[client] = (region, zone, zone_info, user_info) region_info = dict([ (region_name, extract_region_info(region_name, r_config)) for region_name, r_config in regions.iteritems()]) fill_in_endpoints(region_info, role_zones, role_endpoints) # clear out the old defaults cluster_name, daemon_type, client_id = teuthology.split_role(client) first_mon = teuthology.get_first_mon(ctx, config, cluster_name) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() # removing these objects from .rgw.root and the per-zone root pools # may or may not matter rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'region_info.default', '--cluster', cluster_name]) rados(ctx, mon, cmd=['-p', '.rgw.root', 'rm', 'zone_info.default', '--cluster', cluster_name]) # read master zonegroup and master_zone for zonegroup, zg_info in region_info.iteritems(): if zg_info['is_master']: master_zonegroup = zonegroup master_zone = zg_info['master_zone'] break for client in config.iterkeys(): (zonegroup, zone, zone_info, user_info) = role_zones[client] if zonegroup == master_zonegroup and zone == master_zone: master_client = client break log.debug('master zonegroup =%r', master_zonegroup) log.debug('master zone = %r', master_zone) log.debug('master client = %r', master_client) log.debug('config %r ', config) (ret, out)=rgwadmin(ctx, client, cmd=['realm', 'create', '--rgw-realm', realm, '--default']) log.debug('realm create ret %r exists %r', -ret, errno.EEXIST) assert ret == 0 or ret != -errno.EEXIST if ret is -errno.EEXIST: log.debug('realm %r exists', realm) for client in config.iterkeys(): for role, (zonegroup, zone, zone_info, user_info) in role_zones.iteritems(): rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'region_info.default', '--cluster', cluster_name]) rados(ctx, mon, cmd=['-p', zone_info['domain_root'], 'rm', 'zone_info.default', '--cluster', cluster_name]) (remote,) = ctx.cluster.only(role).remotes.keys() for pool_info in zone_info['placement_pools']: remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', pool_info['val']['index_pool'], '64', '64', '--cluster', cluster_name]) if ctx.rgw.ec_data_pool: create_ec_pool(remote, pool_info['val']['data_pool'], zone, 64, ctx.rgw.erasure_code_profile, cluster_name) else: create_replicated_pool( remote, pool_info['val']['data_pool'], 64, cluster_name) zone_json = json.dumps(dict(zone_info.items() + user_info.items())) log.debug('zone info is: %r', zone_json) rgwadmin(ctx, client, cmd=['zone', 'set', '--rgw-zonegroup', zonegroup, '--rgw-zone', zone], stdin=StringIO(zone_json), check_status=True) for region, info in region_info.iteritems(): region_json = json.dumps(info) log.debug('region info is: %s', region_json) rgwadmin(ctx, client, cmd=['zonegroup', 'set'], stdin=StringIO(region_json), check_status=True) if info['is_master']: rgwadmin(ctx, client, cmd=['zonegroup', 'default', '--rgw-zonegroup', master_zonegroup], check_status=True) (zonegroup, zone, zone_info, user_info) = role_zones[client] rgwadmin(ctx, client, cmd=['zone', 'default', '--rgw-zone', zone], check_status=True) #this used to take master_client, need to edit that accordingly rgwadmin(ctx, client, cmd=['period', 'update', '--commit'], check_status=True) yield