Esempio n. 1
0
def task(ctx, config):
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mon_clock_skew_check task only accepts a dict for configuration'
    interval = float(config.get('interval', 30.0))
    expect_skew = config.get('expect-skew', False)

    log.info('Beginning mon_clock_skew_check...')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()
    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    quorum_size = len(teuthology.get_mon_names(ctx))
    manager.wait_for_mon_quorum_size(quorum_size)

    # wait a bit
    log.info('sleeping for {s} seconds'.format(s=interval))
    time.sleep(interval)

    health = manager.get_mon_health(True)
    log.info('got health %s' % health)
    if expect_skew:
        if 'MON_CLOCK_SKEW' not in health['checks']:
            raise RuntimeError('expected MON_CLOCK_SKEW but got none')
    else:
        if 'MON_CLOCK_SKEW' in health['checks']:
            raise RuntimeError('got MON_CLOCK_SKEW but expected none')
    def __init__(self, ctx, config):
        self._ctx = ctx
        self._config = config

        mds_list = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
        if len(mds_list) != 1:
            # Require exactly one MDS, the code path for creation failure when
            # a standby is available is different
            raise RuntimeError("This task requires exactly one MDS")

        self.mds_id = mds_list[0]

        (mds_remote, ) = ctx.cluster.only(
            'mds.{_id}'.format(_id=self.mds_id)).remotes.iterkeys()
        manager = ceph_manager.CephManager(
            mds_remote,
            ctx=ctx,
            logger=log.getChild('ceph_manager'),
        )
        self.mds_manager = manager

        client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
        self.client_id = client_list[0]
        self.client_remote = list(
            misc.get_clients(ctx=ctx,
                             roles=["client.{0}".format(self.client_id)
                                    ]))[0][1]

        self.test_files = ['a', 'b', 'c']
Esempio n. 3
0
def task(ctx, config):
    """
    Stress test the monitor by thrashing them while another task/workunit
    is running.

    Please refer to MonitorThrasher class for further information on the
    available options.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mon_thrash task only accepts a dict for configuration'
    assert len(_get_mons(ctx)) > 2, \
        'mon_thrash task requires at least 3 monitors'
    log.info('Beginning mon_thrash...')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )
    thrash_proc = MonitorThrasher(ctx,
        manager, config,
        logger=log.getChild('mon_thrasher'))
    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining mon_thrasher')
        thrash_proc.do_join()
        mons = _get_mons(ctx)
        manager.wait_for_mon_quorum_size(len(mons))
Esempio n. 4
0
def task(ctx, config):
    """
    Test monitor recovery from OSD
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()
    manager = ceph_manager.CephManager(mon,
                                       ctx=ctx,
                                       logger=log.getChild('ceph_manager'))

    mons = ctx.cluster.only(teuthology.is_type('mon'))
    # note down the first cluster_name and mon_id
    # we will recover it later on
    cluster_name, _, mon_id = teuthology.split_role(first_mon)
    _nuke_mons(manager, mons, mon_id)
    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
        cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)
    _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
    _revive_mons(manager, mons, mon_id, keyring_path)
    _revive_mgrs(ctx, manager)
    _revive_osds(ctx, manager)
Esempio n. 5
0
def task(ctx, config):
  """
  Use clas ClockSkewCheck to check for clock skews on the monitors.
  This task will spawn a thread running ClockSkewCheck's do_check().

  All the configuration will be directly handled by ClockSkewCheck,
  so please refer to the class documentation for further information.
  """
  if config is None:
    config = {}
  assert isinstance(config, dict), \
      'mon_clock_skew_check task only accepts a dict for configuration'
  log.info('Beginning mon_clock_skew_check...')
  first_mon = teuthology.get_first_mon(ctx, config)
  (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
  manager = ceph_manager.CephManager(
      mon,
      ctx=ctx,
      logger=log.getChild('ceph_manager'),
      )

  skew_check = ClockSkewCheck(ctx,
      manager, config,
      logger=log.getChild('mon_clock_skew_check'))
  skew_check_thread = gevent.spawn(skew_check.do_check)
  try:
    yield
  finally:
    log.info('joining mon_clock_skew_check')
    skew_check.finish()
    skew_check_thread.get()
Esempio n. 6
0
def task(ctx, config):
    """
    Test [deep] repair in several situations:
      Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]

    The config should be as follows:

      Must include the log-whitelist below
      Must enable filestore_debug_inject_read_err config

    example:

    tasks:
    - chef:
    - install:
    - ceph:
        log-whitelist:
          - 'candidate had a stat error'
          - 'candidate had a read error'
          - 'deep-scrub 0 missing, 1 inconsistent objects'
          - 'deep-scrub 0 missing, 4 inconsistent objects'
          - 'deep-scrub 1 errors'
          - 'deep-scrub 4 errors'
          - '!= known omap_digest'
          - 'repair 0 missing, 1 inconsistent objects'
          - 'repair 0 missing, 4 inconsistent objects'
          - 'repair 1 errors, 1 fixed'
          - 'repair 4 errors, 4 fixed'
          - 'scrub 0 missing, 1 inconsistent'
          - 'scrub 1 errors'
          - 'size 1 != known size'
        conf:
          osd:
            filestore debug inject read err: true
    - repair_test:

    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'repair_test task only accepts a dict for config'

    if not hasattr(ctx, 'manager'):
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()
        ctx.manager = ceph_manager.CephManager(
            mon, ctx=ctx, logger=log.getChild('ceph_manager'))

    ctx.manager.wait_for_all_up()

    repair_test_1(ctx, mdataerr, choose_primary, "scrub")
    repair_test_1(ctx, mdataerr, choose_replica, "scrub")
    repair_test_1(ctx, dataerr, choose_primary, "deep-scrub")
    repair_test_1(ctx, dataerr, choose_replica, "deep-scrub")
    repair_test_1(ctx, trunc, choose_primary, "scrub")
    repair_test_1(ctx, trunc, choose_replica, "scrub")
    repair_test_2(ctx, config, choose_primary)
    repair_test_2(ctx, config, choose_replica)

    repair_test_erasure_code(ctx, hinfoerr, 'primary', "deep-scrub")
Esempio n. 7
0
def task(ctx, config):
    """
    Die if {testdir}/err exists or if an OSD dumps core
    """
    if config is None:
        config = {}

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    testdir = teuthology.get_testdir(ctx)

    while True:
        for i in range(num_osds):
            (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys()
            p = osd_remote.run(
                args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
                wait=True,
                check_status=False,
            )
            exit_status = p.exitstatus

            if exit_status == 0:
                log.info("osd %d has an error" % i)
                raise Exception("osd %d error" % i)

            log_path = '/var/log/ceph/osd.%d.log' % (i)

            p = osd_remote.run(
                args = [
                         'tail', '-1', log_path,
                         run.Raw('|'),
                         'grep', '-q', 'end dump'
                       ],
                wait=True,
                check_status=False,
            )
            exit_status = p.exitstatus

            if exit_status == 0:
                log.info("osd %d dumped core" % i)
                raise Exception("osd %d dumped core" % i)

        time.sleep(5)
Esempio n. 8
0
def task(ctx, config):
    """
    Benchmark the recovery system.

    Generates objects with smalliobench, runs it normally to get a
    baseline performance measurement, then marks an OSD out and reruns
    to measure performance during recovery.

    The config should be as follows:

    recovery_bench:
        duration: <seconds for each measurement run>
        num_objects: <number of objects>
        io_size: <io size in bytes>

    example:

    tasks:
    - ceph:
    - recovery_bench:
        duration: 60
        num_objects: 500
        io_size: 4096
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'recovery_bench task only accepts a dict for configuration'

    log.info('Beginning recovery bench...')

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    while len(manager.get_osd_status()['up']) < num_osds:
        manager.sleep(10)

    bench_proc = RecoveryBencher(
        manager,
        config,
        )
    try:
        yield
    finally:
        log.info('joining recovery bencher')
        bench_proc.do_join()
Esempio n. 9
0
def task(ctx, config):
    """
    Test [deep] repair in several situations:
      Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]

    The config should be as follows:

      Must include the log-whitelist below
      Must enable filestore_debug_inject_read_err config

    example:

    tasks:
    - chef:
    - install:
    - ceph:
        log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size']
        conf:
          osd:
            filestore debug inject read err: true
    - repair_test:

    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'repair_test task only accepts a dict for config'

    if not hasattr(ctx, 'manager'):
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()
        ctx.manager = ceph_manager.CephManager(
            mon, ctx=ctx, logger=log.getChild('ceph_manager'))

    tests = [
        gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"),
        gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"),
        gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"),
        gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"),
        gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"),
        gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"),
        gen_repair_test_2(choose_primary(ctx)),
        gen_repair_test_2(choose_replica(ctx))
    ]

    for test in tests:
        run_test(ctx, config, test)
Esempio n. 10
0
def task(ctx, config):
    """
    Run scrub periodically. Randomly chooses an OSD to scrub.

    The config should be as follows:

    scrub:
        frequency: <seconds between scrubs>
        deep: <bool for deepness>

    example:

    tasks:
    - ceph:
    - scrub:
        frequency: 30
        deep: 0
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub task only accepts a dict for configuration'

    log.info('Beginning scrub...')

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    scrub_proc = Scrubber(
        manager,
        config,
    )
    try:
        yield
    finally:
        log.info('joining scrub')
        scrub_proc.do_join()
Esempio n. 11
0
def setup(ctx, config):
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
    ctx.manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )
    ctx.manager.clear_pools()
    ctx.manager.create_pool(POOLNAME, config.num_pgs)
    log.info("populating pool")
    ctx.manager.rados_write_objects(
        POOLNAME,
        config.num_objects,
        config.object_size,
        config.creation_time_limit,
        config.create_threads)
    log.info("done populating pool")
Esempio n. 12
0
def task(ctx, config):
    """
    Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
    configuration settings

    In order for test to pass must use log-whitelist as follows

        tasks:
            - chef:
            - install:
            - ceph:
                log-whitelist: ['OSD near full', 'OSD full dropping all updates']
            - osd_failsafe_enospc:

    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'osd_failsafe_enospc task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )
    ctx.manager = manager

    # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
    sleep_time = 50

    # something that is always there
    dummyfile = '/etc/fstab'
    dummyfile2 = '/etc/resolv.conf'

    # create 1 pg pool with 1 rep which can only be on osd.0
    osds = manager.get_osd_dump()
    for osd in osds:
        if osd['osd'] != 0:
            manager.mark_out_osd(osd['osd'])

    log.info('creating pool foo')
    manager.create_pool("foo")
    manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')

    # State NONE -> NEAR
    log.info('1. Verify warning messages when exceeding nearfull_ratio')

    proc = mon.run(
             args=[
                'daemon-helper',
                'kill',
                'ceph', '-w'
             ],
             stdin=run.PIPE,
             stdout=StringIO(),
             wait=False,
        )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    # State NEAR -> FULL
    log.info('2. Verify error messages when exceeding full_ratio')

    proc = mon.run(
             args=[
                'daemon-helper',
                'kill',
                'ceph', '-w'
             ],
             stdin=run.PIPE,
             stdout=StringIO(),
             wait=False,
        )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count

    log.info('3. Verify write failure when exceeding full_ratio')

    # Write data should fail
    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
    assert ret != 0, 'Expected write failure but it succeeded with exit status 0'

    # Put back default
    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
    time.sleep(10)

    # State FULL -> NEAR
    log.info('4. Verify write success when NOT exceeding full_ratio')

    # Write should succeed
    ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
    assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret

    log.info('5. Verify warning messages again when exceeding nearfull_ratio')

    proc = mon.run(
             args=[
                'daemon-helper',
                'kill',
                'ceph', '-w'
             ],
             stdin=run.PIPE,
             stdout=StringIO(),
             wait=False,
        )

    time.sleep(sleep_time)
    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
    time.sleep(10)

    # State NONE -> FULL
    log.info('6. Verify error messages again when exceeding full_ratio')

    proc = mon.run(
             args=[
                'daemon-helper',
                'kill',
                'ceph', '-w'
             ],
             stdin=run.PIPE,
             stdout=StringIO(),
             wait=False,
        )

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')

    time.sleep(sleep_time)
    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
    assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count

    # State FULL -> NONE
    log.info('7. Verify no messages settings back to default')

    manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
    time.sleep(10)

    proc = mon.run(
             args=[
                'daemon-helper',
                'kill',
                'ceph', '-w'
             ],
             stdin=run.PIPE,
             stdout=StringIO(),
             wait=False,
        )

    time.sleep(sleep_time)
    proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
    proc.wait()

    lines = proc.stdout.getvalue().split('\n')

    count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
    assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
    count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
    assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count

    log.info('Test Passed')

    # Bring all OSDs back in
    manager.remove_pool("foo")
    for osd in osds:
        if osd['osd'] != 0:
            manager.mark_in_osd(osd['osd'])
Esempio n. 13
0
def task(ctx, config):
    """
    Test peering.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'peer task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        manager.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()

    for i in range(3):
        manager.set_config(i, osd_recovery_delay_start=120)

    # take on osd down
    manager.kill_osd(2)
    manager.mark_down_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_recovery()

    # kill another and revive 2, so that some pgs can't peer.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.revive_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')

    manager.wait_for_active_or_down()

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')

    # look for down pgs
    num_down_pgs = 0
    pgs = manager.get_pg_stats()
    for pg in pgs:
        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
        log.debug("out string %s", out)
        j = json.loads(out)
        log.info("pg is %s, query json is %s", pg, j)

        if pg['state'].count('down'):
            num_down_pgs += 1
            # verify that it is blocked on osd.1
            rs = j['recovery_state']
            assert len(rs) > 0
            assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo'
            assert rs[1]['name'] == 'Started/Primary/Peering'
            assert rs[1]['blocked']
            assert rs[1]['down_osds_we_would_probe'] == [1]
            assert len(rs[1]['peering_blocked_by']) == 1
            assert rs[1]['peering_blocked_by'][0]['osd'] == 1

    assert num_down_pgs > 0

    # bring it all back
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()
Esempio n. 14
0
def task(ctx, config):
    """
    Test handling of lost objects.

    A pretty rigid cluseter is brought up andtested by this task
    """
    POOL = 'unfounddel_pool'
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()

    manager.create_pool(POOL)

    # something that is always there
    dummyfile = '/etc/fstab'

    # take an osd out until the very end
    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.mark_out_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])

    # bring osd.0 back up, let it peer, but don't replicate the new
    # objects...
    log.info('osd.0 command_args is %s' % 'foo')
    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend(
        ['--osd-recovery-delay-start', '1000'])
    manager.revive_osd(0)
    manager.mark_in_osd(0)
    manager.wait_till_osd_is_up(0)

    manager.flush_pg_stats([0, 1])
    manager.wait_till_active()

    # take out osd.1 and the only copy of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.mark_out_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')

    # bring up osd.2 so that things would otherwise, in theory, recovery fully
    manager.revive_osd(2)
    manager.mark_in_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.flush_pg_stats([0, 2])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(
            mon.run(args=[
                "/bin/sh",
                "-c",
                " ".join([
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage',
                    'rados',
                    '--no-log-to-stderr',
                    '--name',
                    'client.admin',
                    '-b',
                    str(4 << 10),
                    '-p',
                    POOL,
                    '-t',
                    '20',
                    'bench',
                    '240',
                    'write',
                ]).format(tdir=testdir),
            ],
                    logger=log.getChild(
                        'radosbench.{id}'.format(id='client.admin')),
                    stdin=run.PIPE,
                    wait=False))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            primary = 'osd.%d' % pg['acting'][0]

            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_unfound(pg['pgid'])
            #log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
            num_unfound = 0
            for o in m['objects']:
                if len(o['locations']) == 0:
                    num_unfound += 1
            assert m['num_unfound'] == num_unfound

            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2])
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.mark_in_osd(1)
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
Esempio n. 15
0
def task(ctx, config):
    """
    Test monitor recovery from OSD
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'))

    mons = ctx.cluster.only(teuthology.is_type('mon'))
    assert mons
    # note down the first cluster_name and mon_id
    # we will recover it later on
    cluster_name = None
    mon_id = None
    for remote, roles in mons.remotes.iteritems():
        is_mon = teuthology.is_type('mon')
        for role in roles:
            if not is_mon(role):
                continue
            cluster, _, m = teuthology.split_role(role)
            if cluster_name is None:
                cluster_name = cluster
                mon_id = m
            assert cluster_name == cluster
            log.info('killing {cluster}:mon.{mon}'.format(
                cluster=cluster,
                mon=m))
            manager.kill_mon(m)
            mon_data = os.path.join('/var/lib/ceph/mon/',
                                    '{0}-{1}'.format(cluster_name, m))
            if m == mon_id:
                # so we will only need to recreate the store.db for the
                # first mon, would be easier than mkfs on it then replace
                # the its store.db with the recovered one
                store_dir = os.path.join(mon_data, 'store.db')
                remote.run(args=['sudo', 'rm', '-r', store_dir])
            else:
                remote.run(args=['sudo', 'rm', '-r', mon_data])

    local_mstore = tempfile.mkdtemp()

    # collect the maps from all OSDs
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    assert osds
    for osd, roles in osds.remotes.iteritems():
        is_osd = teuthology.is_type('osd')
        for role in roles:
            if not is_osd(role):
                continue
            cluster, _, osd_id = teuthology.split_role(role)
            assert cluster_name == cluster
            log.info('collecting maps from {cluster}:osd.{osd}'.format(
                cluster=cluster,
                osd=osd_id))
            # push leveldb to OSD
            osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
            osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])

            push_directory(local_mstore, osd, osd_mstore)
            log.info('rm -rf {0}'.format(local_mstore))
            shutil.rmtree(local_mstore)
            # update leveldb with OSD data
            options = '--op update-mon-db --mon-store-path {0}'
            log.info('cot {0}'.format(osd_mstore))
            manager.objectstore_tool(pool=None,
                                     options=options.format(osd_mstore),
                                     args='',
                                     osd=osd_id,
                                     do_revive=False)
            # pull the updated mon db
            log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
            local_mstore = tempfile.mkdtemp()
            teuthology.pull_directory(osd, osd_mstore, local_mstore)
            log.info('rm -rf osd:{0}'.format(osd_mstore))
            osd.run(args=['sudo', 'rm', '-fr', osd_mstore])

    # recover the first_mon with re-built mon db
    # pull from recovered leveldb from client
    mon_store_dir = os.path.join('/var/lib/ceph/mon',
                                 '{0}-{1}'.format(cluster_name, mon_id))
    push_directory(local_mstore, mon, mon_store_dir)
    mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
    shutil.rmtree(local_mstore)
    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
        cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)
    # fill up the caps in the keyring file
    mon.run(args=['sudo',
                  'ceph-authtool', keyring_path,
                  '-n', 'mon.',
                  '--cap', 'mon', 'allow *'])
    mon.run(args=['sudo',
                  'ceph-authtool', keyring_path,
                  '-n', 'client.admin',
                  '--cap', 'mon', 'allow *',
                  '--cap', 'osd', 'allow *',
                  '--cap', 'mds', 'allow *'])
    mon.run(args=['sudo', '-u', 'ceph',
                  'ceph-monstore-tool', mon_store_dir,
                  'rebuild', '--', '--keyring',
                  keyring_path])

    # revive monitors
    # the initial monmap is in the ceph.conf, so we are good.
    n_mons = 0
    for remote, roles in mons.remotes.iteritems():
        is_mon = teuthology.is_type('mon')
        for role in roles:
            if not is_mon(role):
                continue
            cluster, _, m = teuthology.split_role(role)
            assert cluster_name == cluster
            if mon_id != m:
                log.info('running mkfs on {cluster}:mon.{mon}'.format(
                    cluster=cluster,
                    mon=m))
                remote.run(
                    args=[
                        'sudo',
                        'ceph-mon',
                        '--cluster', cluster,
                        '--mkfs',
                        '-i', m,
                        '--keyring', keyring_path])
            manager.revive_mon(m)
            n_mons += 1

    manager.wait_for_mon_quorum_size(n_mons, timeout=30)
    for osd, roles in osds.remotes.iteritems():
        is_osd = teuthology.is_type('osd')
        for role in roles:
            if not is_osd(role):
                continue
            _, _, osd_id = teuthology.split_role(role)
            log.info('reviving osd.{0}'.format(osd_id))
            manager.revive_osd(osd_id)
Esempio n. 16
0
def task(ctx, config):
    """
    Stress test the mds by thrashing while another task/workunit
    is running.

    Please refer to MDSThrasher class for further information on the
    available options.
    """

    mds_cluster = MDSCluster(ctx)

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mds_thrash task only accepts a dict for configuration'
    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
    assert len(mdslist) > 1, \
        'mds_thrash task requires at least 2 metadata servers'

    # choose random seed
    if 'seed' in config:
        seed = int(config['seed'])
    else:
        seed = int(time.time())
    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
    random.seed(seed)

    (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys()
    manager = ceph_manager.CephManager(
        first, ctx=ctx, logger=log.getChild('ceph_manager'),
    )

    # make sure everyone is in active, standby, or standby-replay
    log.info('Wait for all MDSs to reach steady state...')
    status = mds_cluster.status()
    while True:
        steady = True
        for info in status.get_all():
            state = info['state']
            if state not in ('up:active', 'up:standby', 'up:standby-replay'):
                steady = False
                break
        if steady:
            break
        sleep(2)
        status = mds_cluster.status()
    log.info('Ready to start thrashing')

    manager.wait_for_clean()
    assert manager.is_clean()

    if 'cluster' not in config:
        config['cluster'] = 'ceph'

    for fs in status.get_filesystems():
        thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds'])
        thrasher.start()
        ctx.ceph[config['cluster']].thrashers.append(thrasher)

    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining mds_thrasher')
        thrasher.stop()
        if thrasher.exception is not None:
            raise RuntimeError('error during thrashing')
        thrasher.join()
        log.info('done joining')
Esempio n. 17
0
def task(ctx, config):
    """
    Test backfill
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'thrashosds task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 3

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        manager.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()

    # write some data
    p = rados_start(
        ctx, mon,
        ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', '--no-cleanup'])
    err = p.wait()
    log.info('err is %d' % err)

    # mark osd.0 out to trigger a rebalance/backfill
    manager.mark_out_osd(0)

    # also mark it down to it won't be included in pg_temps
    manager.kill_osd(0)
    manager.mark_down_osd(0)

    # wait for everything to peer and be happy...
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_recovery()

    # write some new data
    p = rados_start(
        ctx, mon,
        ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', '--no-cleanup'])

    time.sleep(15)

    # blackhole + restart osd.1
    # this triggers a divergent backfill target
    manager.blackhole_kill_osd(1)
    time.sleep(2)
    manager.revive_osd(1)

    # wait for our writes to complete + succeed
    err = p.wait()
    log.info('err is %d' % err)

    # cluster must recover
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_recovery()

    # re-add osd.0
    manager.revive_osd(0)
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()
Esempio n. 18
0
def task(ctx, config):
    """
    Go through filesystem creation with a synthetic failure in an MDS
    in its 'up:creating' state, to exercise the retry behaviour.
    """
    # Grab handles to the teuthology objects of interest
    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
    if len(mdslist) != 1:
        # Require exactly one MDS, the code path for creation failure when
        # a standby is available is different
        raise RuntimeError("This task requires exactly one MDS")

    mds_id = mdslist[0]
    (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys()
    manager = ceph_manager.CephManager(
        mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
    )

    # Stop MDS
    manager.raw_cluster_cmd('mds', 'set', "max_mds", "0")
    mds = ctx.daemons.get_daemon('mds', mds_id)
    mds.stop()
    manager.raw_cluster_cmd('mds', 'fail', mds_id)

    # Reset the filesystem so that next start will go into CREATING
    manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
    manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")

    # Start the MDS with mds_kill_create_at set, it will crash during creation
    mds.restart_with_args(["--mds_kill_create_at=1"])
    try:
        mds.wait_for_exit()
    except CommandFailedError as e:
        if e.exitstatus == 1:
            log.info("MDS creation killed as expected")
        else:
            log.error("Unexpected status code %s" % e.exitstatus)
            raise

    # Since I have intentionally caused a crash, I will clean up the resulting core
    # file to avoid task.internal.coredump seeing it as a failure.
    log.info("Removing core file from synthetic MDS failure")
    mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))])

    # It should have left the MDS map state still in CREATING
    status = manager.get_mds_status(mds_id)
    assert status['state'] == 'up:creating'

    # Start the MDS again without the kill flag set, it should proceed with creation successfully
    mds.restart()

    # Wait for state ACTIVE
    t = 0
    create_timeout = 120
    while True:
        status = manager.get_mds_status(mds_id)
        if status['state'] == 'up:active':
            log.info("MDS creation completed successfully")
            break
        elif status['state'] == 'up:creating':
            log.info("MDS still in creating state")
            if t > create_timeout:
                log.error("Creating did not complete within %ss" % create_timeout)
                raise RuntimeError("Creating did not complete within %ss" % create_timeout)
            t += 1
            time.sleep(1)
        else:
            log.error("Unexpected MDS state: %s" % status['state'])
            assert(status['state'] in ['up:active', 'up:creating'])

    # The system should be back up in a happy healthy state, go ahead and run any further tasks
    # inside this context.
    yield
Esempio n. 19
0
def task(ctx, config):
    """
    Test handling of object location going down
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'

    # take 0, 1 out
    manager.mark_out_osd(0)
    manager.mark_out_osd(1)
    manager.wait_for_clean()

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.0',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.2',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.3',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])

    manager.mark_out_osd(3)
    manager.wait_till_active()

    manager.mark_in_osd(0)
    manager.wait_till_active()

    manager.flush_pg_stats([2, 0])

    manager.mark_out_osd(2)
    manager.wait_till_active()

    # bring up 1
    manager.mark_in_osd(1)
    manager.wait_till_active()

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert not unfound

    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.kill_osd(3)
    manager.mark_down_osd(3)

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert unfound
Esempio n. 20
0
def task(ctx, config):
    """
    Test [deep] scrub

    tasks:
    - chef:
    - install:
    - ceph:
        log-whitelist:
        - '!= known digest'
        - '!= known omap_digest'
        - deep-scrub 0 missing, 1 inconsistent objects
        - deep-scrub 1 errors
        - repair 0 missing, 1 inconsistent objects
        - repair 1 errors, 1 fixed
    - scrub_test:
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub_test task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
    manager.wait_for_clean()

    # write some data
    p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1',
                               'write', '-b', '4096'])
    log.info('err is %d' % p.exitstatus)

    # wait for some PG to have data that we can mess with
    pg, acting = wait_for_victim_pg(manager)
    osd = acting[0]

    osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd)
    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val'])
    log.info('err is %d' % p.exitstatus)
    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr'])
    log.info('err is %d' % p.exitstatus)

    log.info('messing with PG %s on osd %d' % (pg, osd))
    test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path)
    test_repair_bad_omap(ctx, manager, pg, osd, obj_name)
    test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd,
                               obj_name, obj_path)
    log.info('test successful!')
Esempio n. 21
0
def task(ctx, config):
    """
    Test handling of lost objects on an ec pool.

    A pretty rigid cluster is brought up andtested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_for_clean()

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '2',
        'ruleset-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'lost_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        erasure_code_profile_name=profile_name)

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)
    manager.kill_osd(3)
    manager.mark_down_osd(3)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])

    # take out osd.1 and a necessary shard of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
    manager.revive_osd(0)
    manager.wait_till_osd_is_up(0)
    manager.revive_osd(3)
    manager.wait_till_osd_is_up(3)

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_till_active()
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_missing(pg['pgid'])
            log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

            log.info("reverting unfound in %s", pg['pgid'])
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
Esempio n. 22
0
def task(ctx, config):
    """
    Test [deep] scrub

    tasks:
    - chef:
    - install:
    - ceph:
        log-whitelist:
        - '!= known digest'
        - '!= known omap_digest'
        - deep-scrub 0 missing, 1 inconsistent objects
        - deep-scrub 1 errors
        - repair 0 missing, 1 inconsistent objects
        - repair 1 errors, 1 fixed
    - scrub_test: 
    
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub_test task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats')
    manager.wait_for_clean()

    # write some data
    p = manager.do_rados(
        mon,
        ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096'])
    err = p.exitstatus
    log.info('err is %d' % err)

    # wait for some PG to have data that we can mess with
    victim = None
    osd = None
    while victim is None:
        stats = manager.get_pg_stats()
        for pg in stats:
            size = pg['stat_sum']['num_bytes']
            if size > 0:
                victim = pg['pgid']
                osd = pg['acting'][0]
                break

        if victim is None:
            time.sleep(3)

    log.info('messing with PG %s on osd %d' % (victim, osd))

    (osd_remote, ) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys()
    data_path = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd),
                             'current', '{pg}_head'.format(pg=victim))

    # fuzz time
    ls_fp = StringIO()
    osd_remote.run(
        args=['sudo', 'ls', data_path],
        stdout=ls_fp,
    )
    ls_out = ls_fp.getvalue()
    ls_fp.close()

    # find an object file we can mess with
    osdfilename = None
    for line in ls_out.split('\n'):
        if 'object' in line:
            osdfilename = line
            break
    assert osdfilename is not None

    # Get actual object name from osd stored filename
    tmp = osdfilename.split('__')
    objname = tmp[0]
    objname = objname.replace('\u', '_')
    log.info('fuzzing %s' % objname)

    # put a single \0 at the beginning of the file
    osd_remote.run(args=[
        'sudo', 'dd', 'if=/dev/zero',
        'of=%s' %
        os.path.join(data_path, osdfilename), 'bs=1', 'count=1', 'conv=notrunc'
    ])

    # scrub, verify inconsistent
    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
    # Give deep-scrub a chance to start
    time.sleep(60)

    while True:
        stats = manager.get_single_pg_stats(victim)
        state = stats['state']

        # wait for the scrub to finish
        if 'scrubbing' in state:
            time.sleep(3)
            continue

        inconsistent = stats['state'].find('+inconsistent') != -1
        assert inconsistent
        break

    # repair, verify no longer inconsistent
    manager.raw_cluster_cmd('pg', 'repair', victim)
    # Give repair a chance to start
    time.sleep(60)

    while True:
        stats = manager.get_single_pg_stats(victim)
        state = stats['state']

        # wait for the scrub to finish
        if 'scrubbing' in state:
            time.sleep(3)
            continue

        inconsistent = stats['state'].find('+inconsistent') != -1
        assert not inconsistent
        break

    # Test deep-scrub with various omap modifications
    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val'])
    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr'])

    # Modify omap on specific osd
    log.info('fuzzing omap of %s' % objname)
    manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key'])
    manager.osd_admin_socket(
        osd, ['setomapval', 'rbd', objname, 'badkey', 'badval'])
    manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr'])

    # scrub, verify inconsistent
    manager.raw_cluster_cmd('pg', 'deep-scrub', victim)
    # Give deep-scrub a chance to start
    time.sleep(60)

    while True:
        stats = manager.get_single_pg_stats(victim)
        state = stats['state']

        # wait for the scrub to finish
        if 'scrubbing' in state:
            time.sleep(3)
            continue

        inconsistent = stats['state'].find('+inconsistent') != -1
        assert inconsistent
        break

    # repair, verify no longer inconsistent
    manager.raw_cluster_cmd('pg', 'repair', victim)
    # Give repair a chance to start
    time.sleep(60)

    while True:
        stats = manager.get_single_pg_stats(victim)
        state = stats['state']

        # wait for the scrub to finish
        if 'scrubbing' in state:
            time.sleep(3)
            continue

        inconsistent = stats['state'].find('+inconsistent') != -1
        assert not inconsistent
        break

    log.info('test successful!')
Esempio n. 23
0
def task(ctx, config):
    """
    Stress test the mds by thrashing while another task/workunit
    is running.

    Please refer to MDSThrasher class for further information on the
    available options.
    """

    mds_cluster = MDSCluster(ctx)

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mds_thrash task only accepts a dict for configuration'
    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
    assert len(mdslist) > 1, \
        'mds_thrash task requires at least 2 metadata servers'

    # choose random seed
    if 'seed' in config:
        seed = int(config['seed'])
    else:
        seed = int(time.time())
    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
    random.seed(seed)

    max_thrashers = config.get('max_thrash', 1)
    thrashers = {}

    (first, ) = ctx.cluster.only(
        'mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys()
    manager = ceph_manager.CephManager(
        first,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    # make sure everyone is in active, standby, or standby-replay
    log.info('Wait for all MDSs to reach steady state...')
    statuses = None
    statuses_by_rank = None
    while True:
        statuses = {m: mds_cluster.get_mds_info(m) for m in mdslist}
        statuses_by_rank = {}
        for _, s in statuses.iteritems():
            if isinstance(s, dict):
                statuses_by_rank[s['rank']] = s

        ready = filter(
            lambda (_, s): s is not None and (s['state'] == 'up:active' or s[
                'state'] == 'up:standby' or s['state'] == 'up:standby-replay'),
            statuses.items())
        if len(ready) == len(statuses):
            break
        time.sleep(2)
    log.info('Ready to start thrashing')

    # setup failure groups
    failure_groups = {}
    actives = {
        s['name']: s
        for (_, s) in statuses.iteritems() if s['state'] == 'up:active'
    }
    log.info('Actives is: {d}'.format(d=actives))
    log.info('Statuses is: {d}'.format(d=statuses_by_rank))
    for active in actives:
        for (r, s) in statuses.iteritems():
            if s['standby_for_name'] == active:
                if not active in failure_groups:
                    failure_groups[active] = []
                log.info('Assigning mds rank {r} to failure group {g}'.format(
                    r=r, g=active))
                failure_groups[active].append(r)

    manager.wait_for_clean()
    for (active, standbys) in failure_groups.iteritems():
        weight = 1.0
        if 'thrash_weights' in config:
            weight = int(config['thrash_weights'].get(
                'mds.{_id}'.format(_id=active), '0.0'))

        failure_group = [active]
        failure_group.extend(standbys)

        thrasher = MDSThrasher(
            ctx,
            manager,
            mds_cluster,
            config,
            logger=log.getChild(
                'mds_thrasher.failure_group.[{a}, {sbs}]'.format(
                    a=active, sbs=', '.join(standbys))),
            failure_group=failure_group,
            weight=weight)
        thrasher.start()
        thrashers[active] = thrasher

        # if thrash_weights isn't specified and we've reached max_thrash,
        # we're done
        if 'thrash_weights' not in config and len(thrashers) == max_thrashers:
            break

    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining mds_thrashers')
        for t in thrashers:
            log.info('join thrasher for failure group [{fg}]'.format(
                fg=', '.join(failure_group)))
            thrashers[t].stop()
            thrashers[t].get()  # Raise any exception from _run()
            thrashers[t].join()
        log.info('done joining')
Esempio n. 24
0
def test_incomplete_pgs(ctx, config):
    """
    Test handling of incomplete pgs.  Requires 4 osds.
    """
    testdir = teuthology.get_testdir(ctx)
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 4

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 4:
        time.sleep(10)

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_for_clean()

    log.info('Testing incomplete pgs...')

    for i in range(4):
        manager.set_config(i, osd_recovery_delay_start=1000)

    # move data off of osd.0, osd.1
    manager.raw_cluster_cmd('osd', 'out', '0', '1')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_for_clean()

    # lots of objects in rbd (no pg log, will backfill)
    p = rados_start(
        testdir, mon,
        ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', '--no-cleanup'])
    p.wait()

    # few objects in rbd pool (with pg log, normal recovery)
    for f in range(1, 20):
        p = rados_start(testdir, mon,
                        ['-p', 'rbd', 'put',
                         'foo.%d' % f, '/etc/passwd'])
        p.wait()

    # move it back
    manager.raw_cluster_cmd('osd', 'in', '0', '1')
    manager.raw_cluster_cmd('osd', 'out', '2', '3')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats')
    manager.wait_for_active()

    assert not manager.is_clean()
    assert not manager.is_recovered()

    # kill 2 + 3
    log.info('stopping 2,3')
    manager.kill_osd(2)
    manager.kill_osd(3)
    log.info('...')
    manager.raw_cluster_cmd('osd', 'down', '2', '3')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_active_or_down()

    assert manager.get_num_down() > 0

    # revive 2 + 3
    manager.revive_osd(2)
    manager.revive_osd(3)
    while len(manager.get_osd_status()['up']) < 4:
        log.info('waiting a bit...')
        time.sleep(2)
    log.info('all are up!')

    for i in range(4):
        manager.kick_recovery_wq(i)

    # cluster must recover
    manager.wait_for_clean()
Esempio n. 25
0
def task(ctx, config):
    """
    Test [deep] scrub

    tasks:
    - chef:
    - install:
    - ceph:
        log-whitelist:
        - '!= data_digest'
        - '!= omap_digest'
        - '!= size'
        - deep-scrub 0 missing, 1 inconsistent objects
        - deep-scrub [0-9]+ errors
        - repair 0 missing, 1 inconsistent objects
        - repair [0-9]+ errors, [0-9]+ fixed
        - shard [0-9]+ .* : missing
        - deep-scrub 1 missing, 1 inconsistent objects
        - does not match object info size
        - attr name mistmatch
        - deep-scrub 1 missing, 0 inconsistent objects
        - failed to pick suitable auth object
        - candidate size [0-9]+ info size [0-9]+ mismatch
      conf:
        osd:
          osd deep scrub update digest min age: 0
    - scrub_test:
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub_test task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
                                '--', '--osd-objectstore-fuse')
    manager.flush_pg_stats(range(num_osds))
    manager.wait_for_clean()

    # write some data
    p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1',
                               'write', '-b', '4096'])
    log.info('err is %d' % p.exitstatus)

    # wait for some PG to have data that we can mess with
    pg, acting = wait_for_victim_pg(manager)
    osd = acting[0]

    osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd)
    manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val'])
    log.info('err is %d' % p.exitstatus)
    manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr'])
    log.info('err is %d' % p.exitstatus)

    # Update missing digests, requires "osd deep scrub update digest min age: 0"
    pgnum = get_pgnum(pg)
    manager.do_pg_scrub('rbd', pgnum, 'deep-scrub')

    log.info('messing with PG %s on osd %d' % (pg, osd))
    test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd')
    test_repair_bad_omap(ctx, manager, pg, osd, obj_name)
    test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd,
                               obj_name, obj_path)
    log.info('test successful!')

    # shut down fuse mount
    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
                                '--', '--no-osd-objectstore-fuse')
    time.sleep(5)
    log.info('done')
Esempio n. 26
0
def task(ctx, config):
    """
    Test handling of lost objects on an ec pool.

    A pretty rigid cluster is brought up andtested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    manager.wait_for_clean()

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '2',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'lost_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        erasure_code_profile_name=profile_name, min_size=2)

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)
    manager.kill_osd(3)
    manager.mark_down_osd(3)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])

    # take out osd.1 and a necessary shard of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
    manager.revive_osd(0)
    manager.wait_till_osd_is_up(0)
    manager.revive_osd(3)
    manager.wait_till_osd_is_up(3)

    manager.flush_pg_stats([0, 2, 3])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2, 3])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(
            mon.run(args=[
                "/bin/sh",
                "-c",
                " ".join([
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage',
                    'rados',
                    '--no-log-to-stderr',
                    '--name',
                    'client.admin',
                    '-b',
                    str(4 << 10),
                    '-p',
                    pool,
                    '-t',
                    '20',
                    'bench',
                    '240',
                    'write',
                ]).format(tdir=testdir),
            ],
                    logger=log.getChild(
                        'radosbench.{id}'.format(id='client.admin')),
                    stdin=run.PIPE,
                    wait=False))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_unfound(pg['pgid'])
            log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

            log.info("reverting unfound in %s", pg['pgid'])
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2, 3])
    manager.wait_for_recovery()

    if not config.get('parallel_bench', True):
        time.sleep(20)

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
Esempio n. 27
0
def task(ctx, config):
    """
    Test handling of divergent entries with prior_version
    prior to log_tail

    config: none

    Requires 3 osds.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'divergent_priors task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )
    ctx.manager = manager

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'noin')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'
    dummyfile2 = '/etc/resolv.conf'

    # create 1 pg pool
    log.info('creating foo')
    manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')

    osds = [0, 1, 2]
    for i in osds:
        manager.set_config(i, osd_min_pg_log_entries=1)

    # determine primary
    divergent = manager.get_pg_primary('foo', 0)
    log.info("primary and soon to be divergent is %d", divergent)
    non_divergent = [0, 1, 2]
    non_divergent.remove(divergent)

    log.info('writing initial objects')
    # write 1000 objects
    for i in range(1000):
        rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])

    manager.wait_for_clean()

    # blackhole non_divergent
    log.info("blackholing osds %s", str(non_divergent))
    for i in non_divergent:
        manager.set_config(i, filestore_blackhole='')

    # write 1 (divergent) object
    log.info('writing divergent object existing_0')
    rados(ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2], wait=False)
    time.sleep(10)
    mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False)

    # kill all the osds
    log.info('killing all the osds')
    for i in osds:
        manager.kill_osd(i)
    for i in osds:
        manager.mark_down_osd(i)
    for i in osds:
        manager.mark_out_osd(i)

    # bring up non-divergent
    log.info("bringing up non_divergent %s", str(non_divergent))
    for i in non_divergent:
        manager.revive_osd(i)
    for i in non_divergent:
        manager.mark_in_osd(i)

    log.info('making log long to prevent backfill')
    for i in non_divergent:
        manager.set_config(i, osd_min_pg_log_entries=100000)

    # write 1 non-divergent object (ensure that old divergent one is divergent)
    log.info('writing non-divergent object existing_1')
    rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2])

    manager.wait_for_recovery()

    # ensure no recovery
    log.info('delay recovery')
    for i in non_divergent:
        manager.set_config(i, osd_recovery_delay_start=100000)

    # bring in our divergent friend
    log.info("revive divergent %d", divergent)
    manager.revive_osd(divergent)

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)

    log.info('delay recovery divergent')
    manager.set_config(divergent, osd_recovery_delay_start=100000)
    log.info('mark divergent in')
    manager.mark_in_osd(divergent)

    log.info('wait for peering')
    rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])

    log.info("killing divergent %d", divergent)
    manager.kill_osd(divergent)
    log.info("reviving divergent %d", divergent)
    manager.revive_osd(divergent)

    log.info('allowing recovery')
    for i in non_divergent:
        manager.set_config(i, osd_recovery_delay_start=0)

    log.info('reading existing_0')
    exit_status = rados(
        ctx, mon, ['-p', 'foo', 'get', 'existing_0', '-o', '/tmp/existing'])
    assert exit_status is 0
    log.info("success")
Esempio n. 28
0
def task(ctx, config):
    """
    Test the dump_stuck command.

    :param ctx: Context
    :param config: Configuration
    """
    assert config is None, \
        'dump_stuck requires no configuration'
    assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
        'dump_stuck requires exactly 2 osds'

    timeout = 60
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    manager.flush_pg_stats([0, 1])
    manager.wait_for_clean(timeout)

    manager.raw_cluster_cmd(
        'tell',
        'mon.0',
        'injectargs',
        '--',
        #                            '--mon-osd-report-timeout 90',
        '--mon-pg-stuck-threshold 10')

    # all active+clean
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )
    num_pgs = manager.get_num_pgs()

    manager.mark_out_osd(0)
    time.sleep(timeout)
    manager.flush_pg_stats([1])
    manager.wait_for_recovery(timeout)

    # all active+clean+remapped
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )

    manager.mark_in_osd(0)
    manager.flush_pg_stats([0, 1])
    manager.wait_for_clean(timeout)

    # all active+clean
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )

    log.info('stopping first osd')
    manager.kill_osd(0)
    manager.mark_down_osd(0)

    log.info('waiting for all to be unclean')
    starttime = time.time()
    done = False
    while not done:
        try:
            check_stuck(
                manager,
                num_inactive=0,
                num_unclean=num_pgs,
                num_stale=0,
            )
            done = True
        except AssertionError:
            # wait up to 15 minutes to become stale
            if time.time() - starttime > 900:
                raise

    log.info('stopping second osd')
    manager.kill_osd(1)
    manager.mark_down_osd(1)

    log.info('waiting for all to be stale')
    starttime = time.time()
    done = False
    while not done:
        try:
            check_stuck(
                manager,
                num_inactive=0,
                num_unclean=num_pgs,
                num_stale=num_pgs,
            )
            done = True
        except AssertionError:
            # wait up to 15 minutes to become stale
            if time.time() - starttime > 900:
                raise

    log.info('reviving')
    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
        manager.revive_osd(id_)
        manager.mark_in_osd(id_)
    while True:
        try:
            manager.flush_pg_stats([0, 1])
            break
        except Exception:
            log.exception('osds must not be started yet, waiting...')
            time.sleep(1)
    manager.wait_for_clean(timeout)

    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )
Esempio n. 29
0
def task(ctx, config):
    """
    Test handling of lost objects.

    A pretty rigid cluseter is brought up andtested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        manager.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'

    # take an osd out until the very end
    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.mark_out_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])

    # bring osd.0 back up, let it peer, but don't replicate the new
    # objects...
    log.info('osd.0 command_args is %s' % 'foo')
    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend(
        ['--osd-recovery-delay-start', '1000'])
    manager.revive_osd(0)
    manager.mark_in_osd(0)
    manager.wait_till_osd_is_up(0)

    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.wait_till_active()

    # take out osd.1 and the only copy of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.mark_out_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')

    # bring up osd.2 so that things would otherwise, in theory, recovery fully
    manager.revive_osd(2)
    manager.mark_in_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_till_active()
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            primary = 'osd.%d' % pg['acting'][0]

            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_missing(pg['pgid'])
            #log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
            num_unfound = 0
            for o in m['objects']:
                if len(o['locations']) == 0:
                    num_unfound += 1
            assert m['num_unfound'] == num_unfound

            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.mark_in_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
Esempio n. 30
0
def task(ctx, config):
    """
    Test (non-backfill) recovery
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'
    testdir = teuthology.get_testdir(ctx)
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 3

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()

    # test some osdmap flags
    manager.raw_cluster_cmd('osd', 'set', 'noin')
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'noup')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')
    manager.raw_cluster_cmd('osd', 'unset', 'noin')
    manager.raw_cluster_cmd('osd', 'unset', 'noout')
    manager.raw_cluster_cmd('osd', 'unset', 'noup')
    manager.raw_cluster_cmd('osd', 'unset', 'nodown')

    # write some new data
    p = rados_start(
        testdir, mon,
        ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', '--no-cleanup'])

    time.sleep(15)

    # trigger a divergent target:
    #  blackhole + restart osd.1 (shorter log)
    manager.blackhole_kill_osd(1)
    #  kill osd.2 (longer log... we'll make it divergent below)
    manager.kill_osd(2)
    time.sleep(2)
    manager.revive_osd(1)

    # wait for our writes to complete + succeed
    err = p.wait()
    log.info('err is %d' % err)

    # cluster must repeer
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.wait_for_active_or_down()

    # write some more (make sure osd.2 really is divergent)
    p = rados_start(testdir, mon,
                    ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
    p.wait()

    # revive divergent osd
    manager.revive_osd(2)

    while len(manager.get_osd_status()['up']) < 3:
        log.info('waiting a bit...')
        time.sleep(2)
    log.info('3 are up!')

    # cluster must recover
    manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats')
    manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats')
    manager.wait_for_clean()