コード例 #1
0
ファイル: test_strays.py プロジェクト: LargerPanda/ceph-1
    def test_replicated_delete_speed(self):
        """
        That deletions of replicated metadata are not pathologically slow
        """
        rank_0_id, rank_1_id = self._setup_two_ranks()

        self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
        self.mds_cluster.mds_fail_restart(rank_1_id)
        self.fs.wait_for_daemons()

        file_count = 10

        self.mount_a.create_n_files("delete_me/file", file_count)

        self._force_migrate(rank_1_id, "delete_me",
                            self.mount_a.path_to_ino("delete_me/file_0"))

        begin = datetime.datetime.now()
        self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
        end = datetime.datetime.now()

        # What we're really checking here is that we are completing client
        # operations immediately rather than delaying until the next tick.
        tick_period = float(
            self.fs.get_config("mds_tick_interval", service_type="mds"))

        duration = (end - begin).total_seconds()
        self.assertLess(duration, (file_count * tick_period) * 0.25)
コード例 #2
0
ファイル: test_strays.py プロジェクト: zhmtju/ceph
    def test_purge_on_shutdown(self):
        """
        That when an MDS rank is shut down, its purge queue is
        drained in the process.
        """
        rank_0_id, rank_1_id = self._setup_two_ranks()

        self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
        self.mds_cluster.mds_fail_restart(rank_1_id)
        self.fs.wait_for_daemons()

        file_count = 5

        self.mount_a.create_n_files("delete_me/file", file_count)

        self._force_migrate(rank_0_id, rank_1_id, "/delete_me",
                            self.mount_a.path_to_ino("delete_me/file_0"))

        self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
        self.mount_a.umount_wait()

        # See all the strays go into purge queue
        self._wait_for_counter("mds_cache",
                               "strays_created",
                               file_count,
                               mds_id=rank_1_id)
        self._wait_for_counter("mds_cache",
                               "strays_enqueued",
                               file_count,
                               mds_id=rank_1_id)
        self.assertEqual(
            self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0)

        # See nothing get purged from the purge queue (yet)
        time.sleep(10)
        self.assertEqual(
            self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)

        # Shut down rank 1
        self.fs.set_max_mds(1)
        self.fs.deactivate(1)

        # It shouldn't proceed past stopping because its still not allowed
        # to purge
        time.sleep(10)
        self.assertEqual(
            self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
        self.assertFalse(self._is_stopped(1))

        # Permit the daemon to start purging again
        self.fs.mon_manager.raw_cluster_cmd('tell',
                                            'mds.{0}'.format(rank_1_id),
                                            'injectargs',
                                            "--mds_max_purge_files 100")

        # It should now proceed through shutdown
        self.wait_until_true(lambda: self._is_stopped(1), timeout=60)

        # ...and in the process purge all that data
        self.await_data_pool_empty()
コード例 #3
0
    def conduct_neg_test_for_write_caps(self, filepaths, mounts):
        cmdargs = ['echo', 'some random data', Raw('|'), 'sudo', 'tee']

        for mount in mounts:
            for path in filepaths:
                if path.find(mount.hostfs_mntpt) != -1:
                    cmdargs.append(path)
                    mount.negtestcmd(args=cmdargs, retval=1,
                                     errmsg='permission denied')
コード例 #4
0
 def _install_deb_repo(self):
     self.remote.run(
         args=[
             'echo', 'deb', self.base_url, self.codename, 'main',
             Raw('|'),
             'sudo', 'tee',
             '/etc/apt/sources.list.d/{proj}.list'.format(
                 proj=self.project),
         ],
         stdout=StringIO(),
     )
コード例 #5
0
 def test_get_pmlogextract_cmd(self):
     obj = self.klass(
         hosts=['host1'],
         time_from='now-3h',
         time_until='now-1h',
     )
     expected = [
         'pmlogextract',
         '-S', 'now-3h',
         '-T', 'now-1h',
         Raw('/var/log/pcp/pmlogger/host1/*.0'),
     ]
     assert obj.get_pmlogextract_cmd('host1') == expected
コード例 #6
0
ファイル: test_admin.py プロジェクト: wwjiang007/ceph
    def test_single_path_rootsquash(self):
        filedata, filename = 'some data on fs 1', 'file_on_fs1'
        filepath = os_path_join(self.mount_a.hostfs_mntpt, filename)
        self.mount_a.write_file(filepath, filedata)

        keyring = self.fs.authorize(self.client_id, ('/', 'rw', 'root_squash'))
        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
        self.mount_a.remount(client_id=self.client_id,
                             client_keyring_path=keyring_path,
                             cephfs_mntpt='/')

        if filepath.find(self.mount_a.hostfs_mntpt) != -1:
            # can read, but not write as root
            contents = self.mount_a.read_file(filepath)
            self.assertEqual(filedata, contents)
            cmdargs = ['echo', 'some random data', Raw('|'), 'sudo', 'tee', filepath]
            self.mount_a.negtestcmd(args=cmdargs, retval=1, errmsg='permission denied')
コード例 #7
0
ファイル: test_snapshots.py プロジェクト: varshar16/ceph-ci
    def test_kill_mdstable(self):
        """
        check snaptable transcation
        """
        if not isinstance(self.mount_a, FuseMount):
            self.skipTest("Require FUSE client to forcibly kill mount")

        self.fs.set_allow_new_snaps(True)
        self.fs.set_max_mds(2)
        status = self.fs.wait_for_daemons()

        grace = float(
            self.fs.get_config("mds_beacon_grace", service_type="mon"))

        # setup subtrees
        self.mount_a.run_shell(["mkdir", "-p", "d1/dir"])
        self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
        self.wait_until_true(
            lambda: self._check_subtree(1, '/d1', status=status), timeout=30)

        last_created = self._get_last_created_snap(rank=0, status=status)

        # mds_kill_mdstable_at:
        #  1: MDSTableServer::handle_prepare
        #  2: MDSTableServer::_prepare_logged
        #  5: MDSTableServer::handle_commit
        #  6: MDSTableServer::_commit_logged
        for i in [1, 2, 5, 6]:
            log.info("testing snapserver mds_kill_mdstable_at={0}".format(i))

            status = self.fs.status()
            rank0 = self.fs.get_rank(rank=0, status=status)
            self.fs.rank_freeze(True, rank=0)
            self.fs.rank_asok(
                ['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)],
                rank=0,
                status=status)
            proc = self.mount_a.run_shell(
                ["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False)
            self.wait_until_true(
                lambda: "laggy_since" in self.fs.get_rank(rank=0),
                timeout=grace * 2)
            self.delete_mds_coredump(rank0['name'])

            self.fs.rank_fail(rank=0)
            self.fs.mds_restart(rank0['name'])
            self.wait_for_daemon_start([rank0['name']])
            status = self.fs.wait_for_daemons()

            proc.wait()
            last_created += 1
            self.wait_until_true(
                lambda: self._get_last_created_snap(rank=0) == last_created,
                timeout=30)

        self.set_conf("mds", "mds_reconnect_timeout", "5")

        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])

        # set mds_kill_mdstable_at, also kill snapclient
        for i in [2, 5, 6]:
            log.info(
                "testing snapserver mds_kill_mdstable_at={0}, also kill snapclient"
                .format(i))
            status = self.fs.status()
            last_created = self._get_last_created_snap(rank=0, status=status)

            rank0 = self.fs.get_rank(rank=0, status=status)
            rank1 = self.fs.get_rank(rank=1, status=status)
            self.fs.rank_freeze(True, rank=0)  # prevent failover...
            self.fs.rank_freeze(True, rank=1)  # prevent failover...
            self.fs.rank_asok(
                ['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)],
                rank=0,
                status=status)
            proc = self.mount_a.run_shell(
                ["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False)
            self.wait_until_true(
                lambda: "laggy_since" in self.fs.get_rank(rank=0),
                timeout=grace * 2)
            self.delete_mds_coredump(rank0['name'])

            self.fs.rank_signal(signal.SIGKILL, rank=1)

            self.mount_a.kill()
            self.mount_a.kill_cleanup()

            self.fs.rank_fail(rank=0)
            self.fs.mds_restart(rank0['name'])
            self.wait_for_daemon_start([rank0['name']])

            self.fs.wait_for_state('up:resolve',
                                   rank=0,
                                   timeout=MDS_RESTART_GRACE)
            if i in [2, 5]:
                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
            elif i == 6:
                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
                self.assertGreater(self._get_last_created_snap(rank=0),
                                   last_created)

            self.fs.rank_fail(rank=1)
            self.fs.mds_restart(rank1['name'])
            self.wait_for_daemon_start([rank1['name']])
            self.fs.wait_for_state('up:active',
                                   rank=0,
                                   timeout=MDS_RESTART_GRACE)

            if i in [2, 5]:
                self.wait_until_true(
                    lambda: len(self._get_pending_snap_update(rank=0)) == 0,
                    timeout=30)
                if i == 2:
                    self.assertEqual(self._get_last_created_snap(rank=0),
                                     last_created)
                else:
                    self.assertGreater(self._get_last_created_snap(rank=0),
                                       last_created)

            self.mount_a.mount()
            self.mount_a.wait_until_mounted()

        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])

        # mds_kill_mdstable_at:
        #  3: MDSTableClient::handle_request (got agree)
        #  4: MDSTableClient::commit
        #  7: MDSTableClient::handle_request (got ack)
        for i in [3, 4, 7]:
            log.info("testing snapclient mds_kill_mdstable_at={0}".format(i))
            last_created = self._get_last_created_snap(rank=0)

            status = self.fs.status()
            rank1 = self.fs.get_rank(rank=1, status=status)
            self.fs.rank_freeze(True, rank=1)  # prevent failover...
            self.fs.rank_asok(
                ['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)],
                rank=1,
                status=status)
            proc = self.mount_a.run_shell(
                ["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False)
            self.wait_until_true(
                lambda: "laggy_since" in self.fs.get_rank(rank=1),
                timeout=grace * 2)
            self.delete_mds_coredump(rank1['name'])

            self.mount_a.kill()
            self.mount_a.kill_cleanup()

            if i in [3, 4]:
                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
            elif i == 7:
                self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
                self.assertGreater(self._get_last_created_snap(rank=0),
                                   last_created)

            self.fs.rank_fail(rank=1)
            self.fs.mds_restart(rank1['name'])
            self.wait_for_daemon_start([rank1['name']])
            status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)

            if i in [3, 4]:
                self.wait_until_true(
                    lambda: len(self._get_pending_snap_update(rank=0)) == 0,
                    timeout=30)
                if i == 3:
                    self.assertEqual(self._get_last_created_snap(rank=0),
                                     last_created)
                else:
                    self.assertGreater(self._get_last_created_snap(rank=0),
                                       last_created)

            self.mount_a.mount()
            self.mount_a.wait_until_mounted()

        self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])

        # mds_kill_mdstable_at:
        #  3: MDSTableClient::handle_request (got agree)
        #  8: MDSTableServer::handle_rollback
        log.info(
            "testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8"
        )
        last_created = self._get_last_created_snap(rank=0)

        status = self.fs.status()
        rank0 = self.fs.get_rank(rank=0, status=status)
        rank1 = self.fs.get_rank(rank=1, status=status)
        self.fs.rank_freeze(True, rank=0)
        self.fs.rank_freeze(True, rank=1)
        self.fs.rank_asok(
            ['config', 'set', "mds_kill_mdstable_at", "8".format(i)],
            rank=0,
            status=status)
        self.fs.rank_asok(
            ['config', 'set', "mds_kill_mdstable_at", "3".format(i)],
            rank=1,
            status=status)
        proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4".format(i)],
                                      wait=False)
        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1),
                             timeout=grace * 2)
        self.delete_mds_coredump(rank1['name'])

        self.mount_a.kill()
        self.mount_a.kill_cleanup()

        self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)

        self.fs.rank_fail(rank=1)
        self.fs.mds_restart(rank1['name'])
        self.wait_for_daemon_start([rank1['name']])

        # rollback triggers assertion
        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0),
                             timeout=grace * 2)
        self.delete_mds_coredump(rank0['name'])
        self.fs.rank_fail(rank=0)
        self.fs.mds_restart(rank0['name'])
        self.wait_for_daemon_start([rank0['name']])
        self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)

        # mds.1 should re-send rollback message
        self.wait_until_true(
            lambda: len(self._get_pending_snap_update(rank=0)) == 0,
            timeout=30)
        self.assertEqual(self._get_last_created_snap(rank=0), last_created)

        self.mount_a.mount()
        self.mount_a.wait_until_mounted()
コード例 #8
0
ファイル: test_snapshots.py プロジェクト: varshar16/ceph-ci
    def test_snapclient_cache(self):
        """
        check if snapclient cache gets synced properly
        """
        self.fs.set_allow_new_snaps(True)
        self.fs.set_max_mds(3)
        status = self.fs.wait_for_daemons()

        grace = float(
            self.fs.get_config("mds_beacon_grace", service_type="mon"))

        self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"])
        self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"])
        self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
        self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
        self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2")
        self.wait_until_true(
            lambda: self._check_subtree(2, '/d0/d2', status=status),
            timeout=30)
        self.wait_until_true(
            lambda: self._check_subtree(1, '/d0/d1', status=status), timeout=5)
        self.wait_until_true(
            lambda: self._check_subtree(0, '/d0', status=status), timeout=5)

        def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0):
            if cache_dump is None:
                cache_dump = self._get_snapclient_dump(rank=rank)
            for key, value in cache_dump.items():
                if value != snaps_dump[key]:
                    return False
            return True

        # sync after mksnap
        last_created = self._get_last_created_snap(rank=0)
        self.mount_a.run_shell(
            ["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"])
        self.wait_until_true(
            lambda: len(self._get_pending_snap_update(rank=0)) == 0,
            timeout=30)
        self.assertGreater(self._get_last_created_snap(rank=0), last_created)

        snaps_dump = self._get_snapserver_dump(rank=0)
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2))

        # sync after rmsnap
        last_destroyed = self._get_last_destroyed_snap(rank=0)
        self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"])
        self.wait_until_true(
            lambda: len(self._get_pending_snap_destroy(rank=0)) == 0,
            timeout=30)
        self.assertGreater(self._get_last_destroyed_snap(rank=0),
                           last_destroyed)

        snaps_dump = self._get_snapserver_dump(rank=0)
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2))

        # sync during mds recovers
        self.fs.rank_fail(rank=2)
        status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2))

        self.fs.rank_fail(rank=0)
        self.fs.rank_fail(rank=1)
        status = self.fs.wait_for_daemons()
        self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1))
        self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2))

        # kill at MDSTableClient::handle_notify_prep
        status = self.fs.status()
        rank2 = self.fs.get_rank(rank=2, status=status)
        self.fs.rank_freeze(True, rank=2)
        self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"],
                          rank=2,
                          status=status)
        proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"],
                                      wait=False)
        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2),
                             timeout=grace * 2)
        self.delete_mds_coredump(rank2['name'])

        # mksnap should wait for notify ack from mds.2
        self.assertFalse(proc.finished)

        # mksnap should proceed after mds.2 fails
        self.fs.rank_fail(rank=2)
        self.wait_until_true(lambda: proc.finished, timeout=30)

        self.fs.mds_restart(rank2['name'])
        self.wait_for_daemon_start([rank2['name']])
        status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)

        self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")])

        # kill at MDSTableClient::commit
        # the recovering mds should sync all mds' cache when it enters resolve stage
        self.set_conf("mds", "mds_reconnect_timeout", "5")
        for i in range(1, 4):
            status = self.fs.status()
            rank2 = self.fs.get_rank(rank=2, status=status)
            self.fs.rank_freeze(True, rank=2)
            self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"],
                              rank=2,
                              status=status)
            last_created = self._get_last_created_snap(rank=0)
            proc = self.mount_a.run_shell(
                ["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False)
            self.wait_until_true(
                lambda: "laggy_since" in self.fs.get_rank(rank=2),
                timeout=grace * 2)
            self.delete_mds_coredump(rank2['name'])

            self.mount_a.kill()
            self.mount_a.kill_cleanup()

            self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)

            if i in [2, 4]:
                self.fs.rank_fail(rank=0)
            if i in [3, 4]:
                self.fs.rank_fail(rank=1)

            self.fs.rank_fail(rank=2)
            self.fs.mds_restart(rank2['name'])
            self.wait_for_daemon_start([rank2['name']])
            status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)

            rank0_cache = self._get_snapclient_dump(rank=0)
            rank1_cache = self._get_snapclient_dump(rank=1)
            rank2_cache = self._get_snapclient_dump(rank=2)

            self.assertGreater(int(rank0_cache["last_created"]), last_created)
            self.assertEqual(rank0_cache, rank1_cache)
            self.assertEqual(rank0_cache, rank2_cache)

            self.wait_until_true(
                lambda: len(self._get_pending_snap_update(rank=0)) == 0,
                timeout=30)

            snaps_dump = self._get_snapserver_dump(rank=0)
            self.assertEqual(snaps_dump["last_created"],
                             rank0_cache["last_created"])
            self.assertTrue(
                _check_snapclient_cache(snaps_dump, cache_dump=rank0_cache))

            self.mount_a.mount()
            self.mount_a.wait_until_mounted()

        self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")])
コード例 #9
0
def task(ctx, config):
    """
    Go through filesystem creation with a synthetic failure in an MDS
    in its 'up:creating' state, to exercise the retry behaviour.
    """
    # Grab handles to the teuthology objects of interest
    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
    if len(mdslist) != 1:
        # Require exactly one MDS, the code path for creation failure when
        # a standby is available is different
        raise RuntimeError("This task requires exactly one MDS")

    mds_id = mdslist[0]
    (mds_remote, ) = ctx.cluster.only(
        'mds.{_id}'.format(_id=mds_id)).remotes.keys()
    manager = ceph_manager.CephManager(
        mds_remote,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    # Stop MDS
    self.fs.set_max_mds(0)
    self.fs.mds_stop(mds_id)
    self.fs.mds_fail(mds_id)

    # Reset the filesystem so that next start will go into CREATING
    manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
    manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")

    # Start the MDS with mds_kill_create_at set, it will crash during creation
    mds.restart_with_args(["--mds_kill_create_at=1"])
    try:
        mds.wait_for_exit()
    except CommandFailedError as e:
        if e.exitstatus == 1:
            log.info("MDS creation killed as expected")
        else:
            log.error("Unexpected status code %s" % e.exitstatus)
            raise

    # Since I have intentionally caused a crash, I will clean up the resulting core
    # file to avoid task.internal.coredump seeing it as a failure.
    log.info("Removing core file from synthetic MDS failure")
    mds_remote.run(args=[
        'rm', '-f',
        Raw("{archive}/coredump/*.core".format(
            archive=misc.get_archive_dir(ctx)))
    ])

    # It should have left the MDS map state still in CREATING
    status = self.fs.status().get_mds(mds_id)
    assert status['state'] == 'up:creating'

    # Start the MDS again without the kill flag set, it should proceed with creation successfully
    mds.restart()

    # Wait for state ACTIVE
    self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id)

    # The system should be back up in a happy healthy state, go ahead and run any further tasks
    # inside this context.
    yield
コード例 #10
0
ファイル: mount.py プロジェクト: zhuanjiaozhou/ceph
 def run_shell_payload(self, payload, **kwargs):
     return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs)