Ejemplo n.º 1
0
def task(ctx, config):
    """
    Test handling of lost objects.

    A pretty rigid cluster is brought up and tested by this task
    """
    POOL = 'unfound_pool'
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)

    manager.wait_for_clean()

    manager.create_pool(POOL)

    # something that is always there
    dummyfile = '/etc/fstab'

    # take an osd out until the very end
    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.mark_out_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])

    # bring osd.0 back up, let it peer, but don't replicate the new
    # objects...
    log.info('osd.0 command_args is %s' % 'foo')
    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend(
        ['--osd-recovery-delay-start', '1000'])
    manager.revive_osd(0)
    manager.mark_in_osd(0)
    manager.wait_till_osd_is_up(0)

    manager.flush_pg_stats([1, 0])
    manager.wait_till_active()

    # take out osd.1 and the only copy of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.mark_out_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')

    # bring up osd.2 so that things would otherwise, in theory, recovery fully
    manager.revive_osd(2)
    manager.mark_in_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.flush_pg_stats([0, 2])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(
            mon.run(args=[
                "/bin/sh",
                "-c",
                " ".join([
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage',
                    'rados',
                    '--no-log-to-stderr',
                    '--name',
                    'client.admin',
                    '-b',
                    str(4 << 10),
                    '-p',
                    POOL,
                    '-t',
                    '20',
                    'bench',
                    '240',
                    'write',
                ]).format(tdir=testdir),
            ],
                    logger=log.getChild(
                        'radosbench.{id}'.format(id='client.admin')),
                    stdin=run.PIPE,
                    wait=False))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            primary = 'osd.%d' % pg['acting'][0]

            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_unfound(pg['pgid'])
            #log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
            assert m['available_might_have_unfound'] == True
            assert m['might_have_unfound'][0]['osd'] == "1"
            assert m['might_have_unfound'][0]['status'] == "osd is down"
            num_unfound = 0
            for o in m['objects']:
                if len(o['locations']) == 0:
                    num_unfound += 1
            assert m['num_unfound'] == num_unfound

            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'revert')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2])
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
        assert not err

    # see if osd.1 can cope
    manager.mark_in_osd(1)
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
    manager.wait_for_clean()
Ejemplo n.º 2
0
def task(ctx, config):
    """
    Test handling of object location going down
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'

    # take 0, 1 out
    manager.mark_out_osd(0)
    manager.mark_out_osd(1)
    manager.wait_for_clean()

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.0',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.2',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.3',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])

    manager.mark_out_osd(3)
    manager.wait_till_active()

    manager.mark_in_osd(0)
    manager.wait_till_active()

    manager.flush_pg_stats([2, 0])

    manager.mark_out_osd(2)
    manager.wait_till_active()

    # bring up 1
    manager.mark_in_osd(1)
    manager.wait_till_active()

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert not unfound

    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.kill_osd(3)
    manager.mark_down_osd(3)

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert unfound
Ejemplo n.º 3
0
def task(ctx, config):
    """
    Test handling of lost objects on an ec pool.

    A pretty rigid cluster is brought up andtested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    manager.wait_for_clean()

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '2',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'lost_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        erasure_code_profile_name=profile_name,
        min_size=2)

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
            )

    manager.kill_osd(0)
    manager.mark_down_osd(0)
    manager.kill_osd(3)
    manager.mark_down_osd(3)
    
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])

    # take out osd.1 and a necessary shard of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
    manager.revive_osd(0)
    manager.wait_till_osd_is_up(0)
    manager.revive_osd(3)
    manager.wait_till_osd_is_up(3)

    manager.flush_pg_stats([0, 2, 3])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2, 3])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(mon.run(
            args=[
                "/bin/sh", "-c",
                " ".join(['adjust-ulimits',
                          'ceph-coverage',
                          '{tdir}/archive/coverage',
                          'rados',
                          '--no-log-to-stderr',
                          '--name', 'client.admin',
                          '-b', str(4<<10),
                          '-p' , pool,
                          '-t', '20',
                          'bench', '240', 'write',
                      ]).format(tdir=testdir),
            ],
            logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
            stdin=run.PIPE,
            wait=False
        ))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state']);
            m = manager.list_pg_unfound(pg['pgid'])
            log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

            log.info("reverting unfound in %s", pg['pgid'])
            manager.raw_cluster_cmd('pg', pg['pgid'],
                                    'mark_unfound_lost', 'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2, 3])
    manager.wait_for_recovery()

    if not config.get('parallel_bench', True):
        time.sleep(20)

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
Ejemplo n.º 4
0
def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, BASE_NAME, DATALINECOUNT,
              POOL, db, ec):
    ERRORS = 0
    log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS))

    objects = range(1, NUM_OBJECTS + 1)
    for i in objects:
        NAME = BASE_NAME + "{num}".format(num=i)
        DDNAME = os.path.join(DATADIR, NAME)

        proc = rados(ctx,
                     remote, ['-p', POOL, 'put', NAME, DDNAME],
                     wait=False)
        # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME])
        ret = proc.wait()
        if ret != 0:
            log.critical("Rados put failed with status {ret}".format(
                ret=proc.exitstatus))
            sys.exit(1)

        db[NAME] = {}

        keys = range(i)
        db[NAME]["xattr"] = {}
        for k in keys:
            if k == 0:
                continue
            mykey = "key{i}-{k}".format(i=i, k=k)
            myval = "val{i}-{k}".format(i=i, k=k)
            proc = remote.run(
                args=['rados', '-p', POOL, 'setxattr', NAME, mykey, myval])
            ret = proc.wait()
            if ret != 0:
                log.error("setxattr failed with {ret}".format(ret=ret))
                ERRORS += 1
            db[NAME]["xattr"][mykey] = myval

        # Erasure coded pools don't support omap
        if ec:
            continue

        # Create omap header in all objects but REPobject1
        if i != 1:
            myhdr = "hdr{i}".format(i=i)
            proc = remote.run(
                args=['rados', '-p', POOL, 'setomapheader', NAME, myhdr])
            ret = proc.wait()
            if ret != 0:
                log.critical("setomapheader failed with {ret}".format(ret=ret))
                ERRORS += 1
            db[NAME]["omapheader"] = myhdr

        db[NAME]["omap"] = {}
        for k in keys:
            if k == 0:
                continue
            mykey = "okey{i}-{k}".format(i=i, k=k)
            myval = "oval{i}-{k}".format(i=i, k=k)
            proc = remote.run(
                args=['rados', '-p', POOL, 'setomapval', NAME, mykey, myval])
            ret = proc.wait()
            if ret != 0:
                log.critical("setomapval failed with {ret}".format(ret=ret))
            db[NAME]["omap"][mykey] = myval

    return ERRORS
Ejemplo n.º 5
0
def task(ctx, config):
    """
    Test backfill reservation calculates "toofull" condition correctly.

    A pretty rigid cluster is brought up and tested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'backfill_toofull task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '1',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'backfill_toofull')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        pg_num=1, erasure_code_profile_name=profile_name, min_size=2)
    manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode',
                            'off')

    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_clean()

    pool_id = manager.get_pool_num(pool)
    pgid = '%d.0' % pool_id
    pgs = manager.get_pg_stats()
    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
    log.debug("acting=%s" % acting)
    assert acting
    primary = acting[0]
    target = acting[1]

    log.debug("write some data")
    rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup'])
    df = manager.get_osd_df(target)
    log.debug("target osd df: %s" % df)

    total_kb = df['kb']
    used_kb = df['kb_used']

    log.debug("pause recovery")
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'nobackfill')
    manager.raw_cluster_cmd('osd', 'set', 'norecover')

    log.debug("stop tartget osd %s" % target)
    manager.kill_osd(target)
    manager.wait_till_active()

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg

    log.debug("re-write data")
    rados(ctx, mon, ['-p', pool, 'cleanup'])
    time.sleep(10)
    rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup'])

    df = manager.get_osd_df(primary)
    log.debug("primary osd df: %s" % df)

    primary_used_kb = df['kb_used']

    log.info("test backfill reservation rejected with toofull")

    # We set backfillfull ratio less than new data size and expect the pg
    # entering backfill_toofull state.
    #
    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".

    backfillfull = 0.9 * primary_used_kb / total_kb
    nearfull = backfillfull * 0.9

    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
              (nearfull, backfillfull))
    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
                            '{:.3f}'.format(nearfull + 0.001))
    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
                            '{:.3f}'.format(backfillfull + 0.001))

    log.debug("start tartget osd %s" % target)

    manager.revive_osd(target)
    manager.wait_for_active()
    manager.wait_till_osd_is_up(target)

    wait_for_pg_state(manager, pgid, 'backfill_toofull', target)

    log.info("test pg not enter backfill_toofull after restarting backfill")

    # We want to set backfillfull ratio to be big enough for the target to
    # successfully backfill new data but smaller than the sum of old and new
    # data, so if the osd backfill reservation incorrectly calculates "toofull"
    # the test will detect this (fail).
    #
    # Note, we need to operate with "uncompressed" bytes because currently
    # osd backfill reservation does not take compression into account.
    #
    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".

    pdf = manager.get_pool_df(pool)
    log.debug("pool %s df: %s" % (pool, pdf))
    assert pdf
    compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \
        if pdf['compress_bytes_used'] > 0 else 1.0
    log.debug("compress_ratio: %s" % compress_ratio)

    backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb
    assert backfillfull < 0.9
    nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb
    assert nearfull_min < backfillfull
    delta = backfillfull - nearfull_min
    nearfull = nearfull_min + delta * 0.1
    backfillfull = nearfull_min + delta * 0.2

    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
              (nearfull, backfillfull))
    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
                            '{:.3f}'.format(nearfull + 0.001))
    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
                            '{:.3f}'.format(backfillfull + 0.001))

    wait_for_pg_state(manager, pgid, 'backfilling', target)

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg

    log.debug("interrupt %s backfill" % target)
    manager.mark_down_osd(target)
    # after marking the target osd down it will automatically be
    # up soon again

    log.debug("resume recovery")
    manager.raw_cluster_cmd('osd', 'unset', 'noout')
    manager.raw_cluster_cmd('osd', 'unset', 'nobackfill')
    manager.raw_cluster_cmd('osd', 'unset', 'norecover')

    # wait for everything to peer, backfill and recover
    manager.wait_for_clean()

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.info('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
Ejemplo n.º 6
0
def task(ctx, config):
    """
    Test handling of divergent entries with prior_version
    prior to log_tail and a ceph-objectstore-tool export/import

    overrides:
      ceph:
        conf:
          osd:
            debug osd: 5

    Requires 3 osds on a single test node.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'divergent_priors task only accepts a dict for configuration'

    manager = ctx.managers['ceph']

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'noin')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'
    dummyfile2 = '/etc/resolv.conf'
    testdir = teuthology.get_testdir(ctx)

    # create 1 pg pool
    log.info('creating foo')
    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')

    osds = [0, 1, 2]
    for i in osds:
        manager.set_config(i, osd_min_pg_log_entries=10)
        manager.set_config(i, osd_max_pg_log_entries=10)
        manager.set_config(i, osd_pg_log_trim_min=5)

    # determine primary
    divergent = manager.get_pg_primary('foo', 0)
    log.info("primary and soon to be divergent is %d", divergent)
    non_divergent = list(osds)
    non_divergent.remove(divergent)

    log.info('writing initial objects')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()
    # write 100 objects
    for i in range(100):
        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])

    manager.wait_for_clean()

    # blackhole non_divergent
    log.info("blackholing osds %s", str(non_divergent))
    for i in non_divergent:
        manager.set_config(i, objectstore_blackhole=1)

    DIVERGENT_WRITE = 5
    DIVERGENT_REMOVE = 5
    # Write some soon to be divergent
    log.info('writing divergent objects')
    for i in range(DIVERGENT_WRITE):
        rados(ctx,
              mon, ['-p', 'foo', 'put',
                    'existing_%d' % i, dummyfile2],
              wait=False)
    # Remove some soon to be divergent
    log.info('remove divergent objects')
    for i in range(DIVERGENT_REMOVE):
        rados(ctx,
              mon, ['-p', 'foo', 'rm',
                    'existing_%d' % (i + DIVERGENT_WRITE)],
              wait=False)
    time.sleep(10)
    mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False)

    # kill all the osds but leave divergent in
    log.info('killing all the osds')
    for i in osds:
        manager.kill_osd(i)
    for i in osds:
        manager.mark_down_osd(i)
    for i in non_divergent:
        manager.mark_out_osd(i)

    # bring up non-divergent
    log.info("bringing up non_divergent %s", str(non_divergent))
    for i in non_divergent:
        manager.revive_osd(i)
    for i in non_divergent:
        manager.mark_in_osd(i)

    # write 1 non-divergent object (ensure that old divergent one is divergent)
    objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
    log.info('writing non-divergent object ' + objname)
    rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])

    manager.wait_for_recovery()

    # ensure no recovery of up osds first
    log.info('delay recovery')
    for i in non_divergent:
        manager.wait_run_admin_socket('osd', i,
                                      ['set_recovery_delay', '100000'])

    # bring in our divergent friend
    log.info("revive divergent %d", divergent)
    manager.raw_cluster_cmd('osd', 'set', 'noup')
    manager.revive_osd(divergent)

    log.info('delay recovery divergent')
    manager.wait_run_admin_socket('osd', divergent,
                                  ['set_recovery_delay', '100000'])

    manager.raw_cluster_cmd('osd', 'unset', 'noup')
    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)

    log.info('wait for peering')
    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])

    # At this point the divergent_priors should have been detected

    log.info("killing divergent %d", divergent)
    manager.kill_osd(divergent)

    # Export a pg
    (exp_remote,) = ctx.\
        cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
    FSPATH = manager.get_filepath()
    JPATH = os.path.join(FSPATH, "journal")
    prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
              "--data-path {fpath} --journal-path {jpath} "
              "--log-file="
              "/var/log/ceph/objectstore_tool.$$.log ".format(fpath=FSPATH,
                                                              jpath=JPATH))
    pid = os.getpid()
    expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
    cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").format(
        id=divergent, file=expfile))
    try:
        exp_remote.sh(cmd, wait=True)
    except CommandFailedError as e:
        assert e.exitstatus == 0

    cmd = ((prefix + "--op import --file {file}").format(id=divergent,
                                                         file=expfile))
    try:
        exp_remote.sh(cmd, wait=True)
    except CommandFailedError as e:
        assert e.exitstatus == 0

    log.info("reviving divergent %d", divergent)
    manager.revive_osd(divergent)
    manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight'])
    time.sleep(20)

    log.info('allowing recovery')
    # Set osd_recovery_delay_start back to 0 and kick the queue
    for i in osds:
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
                                'kick_recovery_wq', ' 0')

    log.info('reading divergent objects')
    for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
        exit_status = rados(
            ctx, mon, ['-p', 'foo', 'get',
                       'existing_%d' % i, '/tmp/existing'])
        assert exit_status == 0

    cmd = 'rm {file}'.format(file=expfile)
    exp_remote.run(args=cmd, wait=True)
    log.info("success")
Ejemplo n.º 7
0
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
    manager = ctx.managers['ceph']

    osds = ctx.cluster.only(teuthology.is_type('osd'))

    TEUTHDIR = teuthology.get_testdir(ctx)
    DATADIR = os.path.join(TEUTHDIR, "ceph.data")
    DATALINECOUNT = 10000
    ERRORS = 0
    NUM_OBJECTS = config.get('objects', 10)
    log.info("objects: {num}".format(num=NUM_OBJECTS))

    pool_dump = manager.get_pool_dump(REP_POOL)
    REPID = pool_dump['pool']

    log.debug("repid={num}".format(num=REPID))

    db = {}

    LOCALDIR = tempfile.mkdtemp("cod")

    cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME,
                         DATALINECOUNT)
    allremote = []
    allremote.append(cli_remote)
    allremote += list(osds.remotes.keys())
    allremote = list(set(allremote))
    for remote in allremote:
        cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME,
                              DATALINECOUNT)

    ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME,
                        DATALINECOUNT, REP_POOL, db, ec)

    pgs = {}
    for stats in manager.get_pg_stats():
        if stats["pgid"].find(str(REPID) + ".") != 0:
            continue
        if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
            for osd in stats["acting"]:
                pgs.setdefault(osd, []).append(stats["pgid"])
        elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL:
            shard = 0
            for osd in stats["acting"]:
                pgs.setdefault(osd, []).append("{pgid}s{shard}".format(
                    pgid=stats["pgid"], shard=shard))
                shard += 1
        else:
            raise Exception("{pool} has an unexpected type {type}".format(
                pool=REP_POOL, type=pool_dump["type"]))

    log.info(pgs)
    log.info(db)

    for osd in manager.get_osd_status()['up']:
        manager.kill_osd(osd)
    time.sleep(5)

    pgswithobjects = set()
    objsinpg = {}

    # Test --op list and generate json for all objects
    log.info("Test --op list by generating json for all objects")
    prefix = ("sudo ceph-objectstore-tool "
              "--data-path {fpath} "
              "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH)
    for remote in osds.remotes.keys():
        log.debug(remote)
        log.debug(osds.remotes[remote])
        for role in osds.remotes[remote]:
            if not role.startswith("osd."):
                continue
            osdid = int(role.split('.')[1])
            log.info("process osd.{id} on {remote}".format(id=osdid,
                                                           remote=remote))
            cmd = (prefix + "--op list").format(id=osdid)
            try:
                lines = remote.sh(cmd, check_status=False).splitlines()
                for pgline in lines:
                    if not pgline:
                        continue
                    (pg, obj) = json.loads(pgline)
                    name = obj['oid']
                    if name in db:
                        pgswithobjects.add(pg)
                        objsinpg.setdefault(pg, []).append(name)
                        db[name].setdefault("pg2json",
                                            {})[pg] = json.dumps(obj)
            except CommandFailedError as e:
                log.error(
                    "Bad exit status {ret} from --op list request".format(
                        ret=e.exitstatus))
                ERRORS += 1

    log.info(db)
    log.info(pgswithobjects)
    log.info(objsinpg)

    if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
        # Test get-bytes
        log.info("Test get-bytes and set-bytes")
        for basename in db.keys():
            file = os.path.join(DATADIR, basename)
            GETNAME = os.path.join(DATADIR, "get")
            SETNAME = os.path.join(DATADIR, "set")

            for remote in osds.remotes.keys():
                for role in osds.remotes[remote]:
                    if not role.startswith("osd."):
                        continue
                    osdid = int(role.split('.')[1])
                    if osdid not in pgs:
                        continue

                    for pg, JSON in db[basename]["pg2json"].items():
                        if pg in pgs[osdid]:
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("get-bytes {fname}".format(
                                fname=GETNAME).split())
                            proc = remote.run(args=cmd, check_status=False)
                            if proc.exitstatus != 0:
                                remote.run(args="rm -f {getfile}".format(
                                    getfile=GETNAME).split())
                                log.error("Bad exit status {ret}".format(
                                    ret=proc.exitstatus))
                                ERRORS += 1
                                continue
                            cmd = ("diff -q {file} {getfile}".format(
                                file=file, getfile=GETNAME))
                            proc = remote.run(args=cmd.split())
                            if proc.exitstatus != 0:
                                log.error("Data from get-bytes differ")
                                # log.debug("Got:")
                                # cat_file(logging.DEBUG, GETNAME)
                                # log.debug("Expected:")
                                # cat_file(logging.DEBUG, file)
                                ERRORS += 1
                            remote.run(args="rm -f {getfile}".format(
                                getfile=GETNAME).split())

                            data = ("put-bytes going into {file}\n".format(
                                file=file))
                            teuthology.write_file(remote, SETNAME, data)
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("set-bytes {fname}".format(
                                fname=SETNAME).split())
                            proc = remote.run(args=cmd, check_status=False)
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.info(
                                    "set-bytes failed for object {obj} "
                                    "in pg {pg} osd.{id} ret={ret}".format(
                                        obj=basename,
                                        pg=pg,
                                        id=osdid,
                                        ret=proc.exitstatus))
                                ERRORS += 1

                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += "get-bytes -".split()
                            try:
                                output = remote.sh(cmd, wait=True)
                                if data != output:
                                    log.error("Data inconsistent after "
                                              "set-bytes, got:")
                                    log.error(output)
                                    ERRORS += 1
                            except CommandFailedError as e:
                                log.error("get-bytes after "
                                          "set-bytes ret={ret}".format(
                                              ret=e.exitstatus))
                                ERRORS += 1

                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("set-bytes {fname}".format(
                                fname=file).split())
                            proc = remote.run(args=cmd, check_status=False)
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.info(
                                    "set-bytes failed for object {obj} "
                                    "in pg {pg} osd.{id} ret={ret}".format(
                                        obj=basename,
                                        pg=pg,
                                        id=osdid,
                                        ret=proc.exitstatus))
                                ERRORS += 1

    log.info("Test list-attrs get-attr")
    for basename in db.keys():
        file = os.path.join(DATADIR, basename)
        GETNAME = os.path.join(DATADIR, "get")
        SETNAME = os.path.join(DATADIR, "set")

        for remote in osds.remotes.keys():
            for role in osds.remotes[remote]:
                if not role.startswith("osd."):
                    continue
                osdid = int(role.split('.')[1])
                if osdid not in pgs:
                    continue

                for pg, JSON in db[basename]["pg2json"].items():
                    if pg in pgs[osdid]:
                        cmd = ((prefix + "--pgid {pg}").format(id=osdid,
                                                               pg=pg).split())
                        cmd.append(run.Raw("'{json}'".format(json=JSON)))
                        cmd += ["list-attrs"]
                        try:
                            keys = remote.sh(cmd, wait=True,
                                             stderr=BytesIO()).split()
                        except CommandFailedError as e:
                            log.error("Bad exit status {ret}".format(
                                ret=e.exitstatus))
                            ERRORS += 1
                            continue
                        values = dict(db[basename]["xattr"])

                        for key in keys:
                            if (key == "_" or key == "snapset"
                                    or key == "hinfo_key"):
                                continue
                            key = key.strip("_")
                            if key not in values:
                                log.error(
                                    "The key {key} should be present".format(
                                        key=key))
                                ERRORS += 1
                                continue
                            exp = values.pop(key)
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("get-attr {key}".format(key="_" +
                                                            key).split())
                            try:
                                val = remote.sh(cmd, wait=True)
                            except CommandFailedError as e:
                                log.error("get-attr failed with {ret}".format(
                                    ret=e.exitstatus))
                                ERRORS += 1
                                continue
                            if exp != val:
                                log.error("For key {key} got value {got} "
                                          "instead of {expected}".format(
                                              key=key, got=val, expected=exp))
                                ERRORS += 1
                        if "hinfo_key" in keys:
                            cmd_prefix = prefix.format(id=osdid)
                            cmd = """
      expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64)
      echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} -
      test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder
      echo $expected | base64 --decode | \
         {prefix} --pgid {pg} '{json}' set-attr {key} -
      test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected
                            """.format(prefix=cmd_prefix,
                                       pg=pg,
                                       json=JSON,
                                       key="hinfo_key")
                            log.debug(cmd)
                            proc = remote.run(
                                args=['bash', '-e', '-x', '-c', cmd],
                                check_status=False,
                                stdout=BytesIO(),
                                stderr=BytesIO())
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.error("failed with " +
                                          str(proc.exitstatus))
                                log.error(" ".join([
                                    six.ensure_str(proc.stdout.getvalue()),
                                    six.ensure_str(proc.stderr.getvalue()),
                                ]))
                                ERRORS += 1

                        if len(values) != 0:
                            log.error("Not all keys found, remaining keys:")
                            log.error(values)

    log.info("Test pg info")
    for remote in osds.remotes.keys():
        for role in osds.remotes[remote]:
            if not role.startswith("osd."):
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--op info --pgid {pg}").format(
                    id=osdid, pg=pg).split())
                try:
                    info = remote.sh(cmd, wait=True)
                except CommandFailedError as e:
                    log.error("Failure of --op info command with {ret}".format(
                        e.exitstatus))
                    ERRORS += 1
                    continue
                if not str(pg) in info:
                    log.error("Bad data from info: {info}".format(info=info))
                    ERRORS += 1

    log.info("Test pg logging")
    for remote in osds.remotes.keys():
        for role in osds.remotes[remote]:
            if not role.startswith("osd."):
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--op log --pgid {pg}").format(id=osdid,
                                                                pg=pg).split())
                try:
                    output = remote.sh(cmd, wait=True)
                except CommandFailedError as e:
                    log.error("Getting log failed for pg {pg} "
                              "from osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=e.exitstatus))
                    ERRORS += 1
                    continue
                HASOBJ = pg in pgswithobjects
                MODOBJ = "modify" in output
                if HASOBJ != MODOBJ:
                    log.error("Bad log for pg {pg} from osd.{id}".format(
                        pg=pg, id=osdid))
                    MSG = (HASOBJ and [""] or ["NOT "])[0]
                    log.error(
                        "Log should {msg}have a modify entry".format(msg=MSG))
                    ERRORS += 1

    log.info("Test pg export")
    EXP_ERRORS = 0
    for remote in osds.remotes.keys():
        for role in osds.remotes[remote]:
            if not role.startswith("osd."):
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid,
                                                                    pg=pg))

                cmd = ((prefix +
                        "--op export --pgid {pg} --file {file}").format(
                            id=osdid, pg=pg, file=fpath))
                try:
                    remote.sh(cmd, wait=True)
                except CommandFailedError as e:
                    log.error("Exporting failed for pg {pg} "
                              "on osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=e.exitstatus))
                    EXP_ERRORS += 1

    ERRORS += EXP_ERRORS

    log.info("Test pg removal")
    RM_ERRORS = 0
    for remote in osds.remotes.keys():
        for role in osds.remotes[remote]:
            if not role.startswith("osd."):
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--force --op remove --pgid {pg}").format(
                    pg=pg, id=osdid))
                try:
                    remote.sh(cmd, wait=True)
                except CommandFailedError as e:
                    log.error("Removing failed for pg {pg} "
                              "on osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=e.exitstatus))
                    RM_ERRORS += 1

    ERRORS += RM_ERRORS

    IMP_ERRORS = 0
    if EXP_ERRORS == 0 and RM_ERRORS == 0:
        log.info("Test pg import")

        for remote in osds.remotes.keys():
            for role in osds.remotes[remote]:
                if not role.startswith("osd."):
                    continue
                osdid = int(role.split('.')[1])
                if osdid not in pgs:
                    continue

                for pg in pgs[osdid]:
                    fpath = os.path.join(
                        DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg))

                    cmd = ((prefix + "--op import --file {file}").format(
                        id=osdid, file=fpath))
                    try:
                        remote.sh(cmd, wait=True)
                    except CommandFailedError as e:
                        log.error(
                            "Import failed from {file} with {ret}".format(
                                file=fpath, ret=e.exitstatus))
                        IMP_ERRORS += 1
    else:
        log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")

    ERRORS += IMP_ERRORS

    if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
        log.info("Restarting OSDs....")
        # They are still look to be up because of setting nodown
        for osd in manager.get_osd_status()['up']:
            manager.revive_osd(osd)
        # Wait for health?
        time.sleep(5)
        # Let scrub after test runs verify consistency of all copies
        log.info("Verify replicated import data")
        objects = range(1, NUM_OBJECTS + 1)
        for i in objects:
            NAME = REP_NAME + "{num}".format(num=i)
            TESTNAME = os.path.join(DATADIR, "gettest")
            REFNAME = os.path.join(DATADIR, NAME)

            proc = rados(ctx,
                         cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME],
                         wait=False)

            ret = proc.wait()
            if ret != 0:
                log.error("After import, rados get failed with {ret}".format(
                    ret=proc.exitstatus))
                ERRORS += 1
                continue

            cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME,
                                                   ref=REFNAME)
            proc = cli_remote.run(args=cmd, check_status=False)
            proc.wait()
            if proc.exitstatus != 0:
                log.error("Data comparison failed for {obj}".format(obj=NAME))
                ERRORS += 1

    return ERRORS
Ejemplo n.º 8
0
def task(ctx, config):
    """
    Test handling of divergent entries with prior_version
    prior to log_tail

    overrides:
      ceph:
        conf:
          osd:
            debug osd: 5

    Requires 3 osds on a single test node.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'divergent_priors task only accepts a dict for configuration'

    manager = ctx.managers['ceph']

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'noin')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'
    dummyfile2 = '/etc/resolv.conf'

    # create 1 pg pool
    log.info('creating foo')
    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')

    osds = [0, 1, 2]
    for i in osds:
        manager.set_config(i, osd_min_pg_log_entries=10)
        manager.set_config(i, osd_max_pg_log_entries=10)
        manager.set_config(i, osd_pg_log_trim_min=5)

    # determine primary
    divergent = manager.get_pg_primary('foo', 0)
    log.info("primary and soon to be divergent is %d", divergent)
    non_divergent = list(osds)
    non_divergent.remove(divergent)

    log.info('writing initial objects')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
    # write 100 objects
    for i in range(100):
        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])

    manager.wait_for_clean()

    # blackhole non_divergent
    log.info("blackholing osds %s", str(non_divergent))
    for i in non_divergent:
        manager.set_config(i, objectstore_blackhole=1)

    DIVERGENT_WRITE = 5
    DIVERGENT_REMOVE = 5
    # Write some soon to be divergent
    log.info('writing divergent objects')
    for i in range(DIVERGENT_WRITE):
        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
                         dummyfile2], wait=False)
    # Remove some soon to be divergent
    log.info('remove divergent objects')
    for i in range(DIVERGENT_REMOVE):
        rados(ctx, mon, ['-p', 'foo', 'rm',
                         'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
    time.sleep(10)
    mon.run(
        args=['killall', '-9', 'rados'],
        wait=True,
        check_status=False)

    # kill all the osds but leave divergent in
    log.info('killing all the osds')
    for i in osds:
        manager.kill_osd(i)
    for i in osds:
        manager.mark_down_osd(i)
    for i in non_divergent:
        manager.mark_out_osd(i)

    # bring up non-divergent
    log.info("bringing up non_divergent %s", str(non_divergent))
    for i in non_divergent:
        manager.revive_osd(i)
    for i in non_divergent:
        manager.mark_in_osd(i)

    # write 1 non-divergent object (ensure that old divergent one is divergent)
    objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
    log.info('writing non-divergent object ' + objname)
    rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])

    manager.wait_for_recovery()

    # ensure no recovery of up osds first
    log.info('delay recovery')
    for i in non_divergent:
        manager.wait_run_admin_socket(
            'osd', i, ['set_recovery_delay', '100000'])

    # bring in our divergent friend
    log.info("revive divergent %d", divergent)
    manager.raw_cluster_cmd('osd', 'set', 'noup')
    manager.revive_osd(divergent)

    log.info('delay recovery divergent')
    manager.wait_run_admin_socket(
        'osd', divergent, ['set_recovery_delay', '100000'])

    manager.raw_cluster_cmd('osd', 'unset', 'noup')
    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)

    log.info('wait for peering')
    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])

    # At this point the divergent_priors should have been detected

    log.info("killing divergent %d", divergent)
    manager.kill_osd(divergent)
    log.info("reviving divergent %d", divergent)
    manager.revive_osd(divergent)

    time.sleep(20)

    log.info('allowing recovery')
    # Set osd_recovery_delay_start back to 0 and kick the queue
    for i in osds:
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
                                    'kick_recovery_wq', ' 0')

    log.info('reading divergent objects')
    for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
        exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
                                       '/tmp/existing'])
        assert exit_status == 0

    log.info("success")
Ejemplo n.º 9
0
def task(ctx, config):
    """
    Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
    configuration settings

    In order for test to pass must use log-ignorelist as follows

        tasks:
            - chef:
            - install:
            - ceph:
                log-ignorelist: ['OSD near full', 'OSD full dropping all updates']
            - osd_failsafe_enospc:

    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'osd_failsafe_enospc task only accepts a dict for configuration'

    # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
    sleep_time = 50

    # something that is always there
    dummyfile = '/etc/fstab'
    dummyfile2 = '/etc/resolv.conf'

    manager = ctx.managers['ceph']

    # create 1 pg pool with 1 rep which can only be on osd.0
    osds = manager.get_osd_dump()
    for osd in osds:
        if osd['osd'] != 0:
            manager.mark_out_osd(osd['osd'])

    log.info('creating pool foo')
    manager.create_pool("foo")
    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')

    # State NONE -> NEAR
    log.info('1. Verify warning messages when exceeding nearfull_ratio')

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    proc = mon.run(
        args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'],
        stdin=run.PIPE,
        stdout=StringIO(),
        wait=False,
    )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_nearfull_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close()  # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
    count = len(
        filter(lambda line: '[ERR] OSD full dropping all updates' in line,
               lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    # State NEAR -> FULL
    log.info('2. Verify error messages when exceeding full_ratio')

    proc = mon.run(
        args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'],
        stdin=run.PIPE,
        stdout=StringIO(),
        wait=False,
    )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_full_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close()  # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(
        filter(lambda line: '[ERR] OSD full dropping all updates' in line,
               lines))
    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count

    log.info('3. Verify write failure when exceeding full_ratio')

    # Write data should fail
    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
    assert ret != 0, 'Expected write failure but it succeeded with exit status 0'

    # Put back default
    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_full_ratio .97')
    time.sleep(10)

    # State FULL -> NEAR
    log.info('4. Verify write success when NOT exceeding full_ratio')

    # Write should succeed
    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
    assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret

    log.info('5. Verify warning messages again when exceeding nearfull_ratio')

    proc = mon.run(
        args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'],
        stdin=run.PIPE,
        stdout=StringIO(),
        wait=False,
    )

    time.sleep(sleep_time)
    proc.stdin.close()  # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
    count = len(
        filter(lambda line: '[ERR] OSD full dropping all updates' in line,
               lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_nearfull_ratio .90')
    time.sleep(10)

    # State NONE -> FULL
    log.info('6. Verify error messages again when exceeding full_ratio')

    proc = mon.run(
        args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'],
        stdin=run.PIPE,
        stdout=StringIO(),
        wait=False,
    )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_full_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close()  # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
    count = len(
        filter(lambda line: '[ERR] OSD full dropping all updates' in line,
               lines))
    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count

    # State FULL -> NONE
    log.info('7. Verify no messages settings back to default')

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs',
                            '--osd_failsafe_full_ratio .97')
    time.sleep(10)

    proc = mon.run(
        args=['sudo', 'daemon-helper', 'kill', 'ceph', '-w'],
        stdin=run.PIPE,
        stdout=StringIO(),
        wait=False,
    )

    time.sleep(sleep_time)
    proc.stdin.close()  # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
    count = len(
        filter(lambda line: '[ERR] OSD full dropping all updates' in line,
               lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    log.info('Test Passed')

    # Bring all OSDs back in
    manager.remove_pool("foo")
    for osd in osds:
        if osd['osd'] != 0:
            manager.mark_in_osd(osd['osd'])
Ejemplo n.º 10
0
def task(ctx, config):
    """
    Test peering.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'peer task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()

    for i in range(3):
        manager.set_config(i, osd_recovery_delay_start=120)

    # take on osd down
    manager.kill_osd(2)
    manager.mark_down_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # kill another and revive 2, so that some pgs can't peer.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.revive_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.flush_pg_stats([0, 2])

    manager.wait_for_active_or_down()

    manager.flush_pg_stats([0, 2])

    # look for down pgs
    num_down_pgs = 0
    pgs = manager.get_pg_stats()
    for pg in pgs:
        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
        log.debug("out string %s", out)
        j = json.loads(out)
        log.info("pg is %s, query json is %s", pg, j)

        if pg['state'].count('down'):
            num_down_pgs += 1
            # verify that it is blocked on osd.1
            rs = j['recovery_state']
            assert len(rs) >= 2
            assert rs[0]['name'] == 'Started/Primary/Peering/Down'
            assert rs[1]['name'] == 'Started/Primary/Peering'
            assert rs[1]['blocked']
            assert rs[1]['down_osds_we_would_probe'] == [1]
            assert len(rs[1]['peering_blocked_by']) == 1
            assert rs[1]['peering_blocked_by'][0]['osd'] == 1

    assert num_down_pgs > 0

    # bring it all back
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()
Ejemplo n.º 11
0
def task(ctx, config):
    """
    Test handling resolve stuck peering

    requires 3 osds on a single test node
    """
    if config is None:
        config = {}
        assert isinstance(config, dict), \
            'Resolve stuck peering only accepts a dict for config'

    manager = ctx.managers['ceph']

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)


    manager.wait_for_clean()

    dummyfile = '/etc/fstab'
    dummyfile1 = '/etc/resolv.conf'

    #create 1 PG pool
    pool='foo'
    log.info('creating pool foo')
    manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1')

    #set min_size of the pool to 1
    #so that we can continue with I/O
    #when 2 osds are down
    manager.set_pool_property(pool, "min_size", 1)

    osds = [0, 1, 2]

    primary = manager.get_pg_primary('foo', 0)
    log.info("primary osd is %d", primary)

    others = list(osds)
    others.remove(primary)

    log.info('writing initial objects')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
    #create few objects
    for i in range(100):
        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])

    manager.wait_for_clean()

    #kill other osds except primary
    log.info('killing other osds except primary')
    for i in others:
        manager.kill_osd(i)
    for i in others:
        manager.mark_down_osd(i)


    for i in range(100):
        rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1])

    #kill primary osd
    manager.kill_osd(primary)
    manager.mark_down_osd(primary)

    #revive other 2 osds
    for i in others:
        manager.revive_osd(i)

    #make sure that pg is down
    #Assuming pg number for single pg pool will start from 0
    pgnum=0
    pgstr = manager.get_pgid(pool, pgnum)
    stats = manager.get_single_pg_stats(pgstr)
    print(stats['state'])

    timeout=60
    start=time.time()

    while 'down' not in stats['state']:
        assert time.time() - start < timeout, \
            'failed to reach down state before timeout expired'
        stats = manager.get_single_pg_stats(pgstr)

    #mark primary as lost
    manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\
                            '--yes-i-really-mean-it')


    #expect the pg status to be active+undersized+degraded
    #pg should recover and become active+clean within timeout
    stats = manager.get_single_pg_stats(pgstr)
    print(stats['state'])

    timeout=10
    start=time.time()

    while manager.get_num_down():
        assert time.time() - start < timeout, \
            'failed to recover before timeout expired'

    manager.revive_osd(primary)
Ejemplo n.º 12
0
def task(ctx, config):
    """
    Test handling of objects with inconsistent hash info during backfill and deep-scrub.

    A pretty rigid cluster is brought up and tested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'ec_inconsistent_hinfo task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '1',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'backfill_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        pg_num=1,
        erasure_code_profile_name=profile_name,
        min_size=2)
    manager.raw_cluster_cmd('osd', 'pool', 'set', pool,
                            'pg_autoscale_mode', 'off')

    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_clean()

    pool_id = manager.get_pool_num(pool)
    pgid = '%d.0' % pool_id
    pgs = manager.get_pg_stats()
    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
    log.info("acting=%s" % acting)
    assert acting
    primary = acting[0]

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    log.debug("create test object")
    obj = 'test'
    rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile])

    victim = acting[1]

    log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair"
             % victim)

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=victim)
    check_time_now = time.strftime('%s')
    manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
    wait_for_deep_scrub_complete(manager, pgid, check_time_now, True)

    check_time_now = time.strftime('%s')
    manager.raw_cluster_cmd('pg', 'repair', pgid)
    wait_for_deep_scrub_complete(manager, pgid, check_time_now, False)

    log.info("remove test object hash info from primary osd.%s shard and test backfill"
             % primary)

    log.debug("write some data")
    rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096',
                     '--no-cleanup'])

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=primary)

    # mark the osd out to trigger a rebalance/backfill
    source = acting[1]
    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
    manager.mark_out_osd(source)

    # wait for everything to peer, backfill and recover
    wait_for_backfilling_complete(manager, pgid, source, target)
    manager.wait_for_clean()

    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
    assert 'inconsistent' not in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 0

    source, target = target, source
    log.info("remove test object hash info from non-primary osd.%s shard and test backfill"
             % source)

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=source)

    # mark the osd in to trigger a rebalance/backfill
    manager.mark_in_osd(target)

    # wait for everything to peer, backfill and recover
    wait_for_backfilling_complete(manager, pgid, source, target)
    manager.wait_for_clean()

    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
    assert 'inconsistent' not in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 0

    log.info("remove hash info from two shards and test backfill")

    source = acting[2]
    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=primary)
    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=source)

    # mark the osd out to trigger a rebalance/backfill
    manager.mark_out_osd(source)

    # wait for everything to peer, backfill and detect unfound object
    wait_for_backfilling_complete(manager, pgid, source, target)

    # verify that there is unfound object
    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'backfill_unfound' in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 1
    m = manager.list_pg_unfound(pgid)
    log.debug('list_pg_unfound=%s' % m)
    assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

    # mark stuff lost
    pgs = manager.get_pg_stats()
    manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete')

    # wait for everything to peer and be happy...
    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_recovery()