def test_journal_smoke(self): workunit( self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], }, "timeout": "1h" }) for mount in self.mounts: mount.umount_wait() self.fs.mds_stop() self.fs.mds_fail() # journal tool smoke workunit( self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["suites/cephfs_journal_tool_smoke.sh"], }, "timeout": "1h" }) self.fs.mds_restart() self.fs.wait_for_daemons() self.mount_a.mount() # trivial sync moutn a workunit( self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], }, "timeout": "1h" })
def test_journal_smoke(self): workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): [ "fs/misc/trivial_sync.sh"], }, "timeout": "1h" }) for mount in self.mounts: mount.umount_wait() self.fs.mds_stop() self.fs.mds_fail() # journal tool smoke workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): [ "suites/cephfs_journal_tool_smoke.sh"], }, "timeout": "1h" }) self.fs.mds_restart() self.fs.wait_for_daemons() self.mount_a.mount() # trivial sync moutn a workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): [ "fs/misc/trivial_sync.sh"], }, "timeout": "1h" })
def test_journal_migration(self): old_journal_version = JOURNAL_FORMAT_LEGACY new_journal_version = JOURNAL_FORMAT_RESILIENT self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) # Create a filesystem using the older journal format. self.mount_a.umount_wait() self.fs.mds_stop() self.fs.reset() self.fs.mds_restart() self.fs.wait_for_daemons() # Do some client work so that the log is populated with something. with self.mount_a.mounted(): self.mount_a.create_files() self.mount_a.check_files() # sanity, this should always pass # Run a more substantial workunit so that the length of the log to be # coverted is going span at least a few segments workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], }, "timeout": "3h" }) # Modify the ceph.conf to ask the MDS to use the new journal format. self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) # Restart the MDS. self.fs.mds_fail_restart() self.fs.wait_for_daemons() # This ensures that all daemons come up into a valid state self.fs.wait_for_daemons() # Check that files created in the initial client workload are still visible # in a client mount. with self.mount_a.mounted(): self.mount_a.check_files() # Verify that the journal really has been rewritten. journal_version = self.fs.get_journal_version() if journal_version != new_journal_version: raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( new_journal_version, journal_version() )) # Verify that cephfs-journal-tool can now read the rewritten journal inspect_out = self.fs.journal_tool(["journal", "inspect"]) if not inspect_out.endswith(": OK"): raise RuntimeError("Unexpected journal-tool result: '{0}'".format( inspect_out )) self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"]) p = self.fs.tool_remote.run( args=[ "python", "-c", "import json; print len(json.load(open('/tmp/journal.json')))" ], stdout=StringIO()) event_count = int(p.stdout.getvalue().strip()) if event_count < 1000: # Approximate value of "lots", expected from having run fsstress raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) # Do some client work so that the log is populated with something. with self.mount_a.mounted(): workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], }, "timeout": "3h" })
def test_journal_migration(self): old_journal_version = JOURNAL_FORMAT_LEGACY new_journal_version = JOURNAL_FORMAT_RESILIENT self.mount_a.umount_wait() self.fs.mds_stop() # Create a filesystem using the older journal format. self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) self.fs.mds_restart() self.fs.recreate() # Enable standby replay, to cover the bug case #8811 where # a standby replay might mistakenly end up trying to rewrite # the journal at the same time as an active daemon. self.fs.set_allow_standby_replay(True) status = self.fs.wait_for_daemons() self.assertTrue(self.fs.get_replay(status=status) is not None) # Do some client work so that the log is populated with something. with self.mount_a.mounted(): self.mount_a.create_files() self.mount_a.check_files() # sanity, this should always pass # Run a more substantial workunit so that the length of the log to be # coverted is going span at least a few segments workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], }, "timeout": "3h" }) # Modify the ceph.conf to ask the MDS to use the new journal format. self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) # Restart the MDS. self.fs.mds_fail_restart() # This ensures that all daemons come up into a valid state status = self.fs.wait_for_daemons() # Check that files created in the initial client workload are still visible # in a client mount. with self.mount_a.mounted(): self.mount_a.check_files() # Verify that the journal really has been rewritten. journal_version = self.fs.get_journal_version() if journal_version != new_journal_version: raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( new_journal_version, journal_version() )) # Verify that cephfs-journal-tool can now read the rewritten journal inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) if not inspect_out.endswith(": OK"): raise RuntimeError("Unexpected journal-tool result: '{0}'".format( inspect_out )) self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"], 0) p = self.fs.tool_remote.run( args=[ "python", "-c", "import json; print len(json.load(open('/tmp/journal.json')))" ], stdout=StringIO()) event_count = int(p.stdout.getvalue().strip()) if event_count < 1000: # Approximate value of "lots", expected from having run fsstress raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) # Do some client work to check that writing the log is still working with self.mount_a.mounted(): workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], }, "timeout": "3h" }) # Check that both an active and a standby replay are still up status = self.fs.status() self.assertEqual(len(list(self.fs.get_replays(status=status))), 1) self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
def test_journal_migration(self): old_journal_version = JOURNAL_FORMAT_LEGACY new_journal_version = JOURNAL_FORMAT_RESILIENT # Pick out two daemons to use mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2]) self.mount_a.umount_wait() self.fs.mds_stop() # Enable standby replay, to cover the bug case #8811 where # a standby replay might mistakenly end up trying to rewrite # the journal at the same time as an active daemon. self.fs.set_ceph_conf('mds', 'mds standby replay', "true") self.fs.set_ceph_conf('mds', 'mds standby for rank', "0") # Create a filesystem using the older journal format. self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) self.fs.recreate() self.fs.mds_restart(mds_id=mds_a) self.fs.wait_for_daemons() self.assertEqual(self.fs.get_active_names(), [mds_a]) def replay_names(): return [s['name'] for s in self.fs.status().get_replays(fscid = self.fs.id)] # Start the standby and wait for it to come up self.fs.mds_restart(mds_id=mds_b) self.wait_until_equal( replay_names, [mds_b], timeout = 30) # Do some client work so that the log is populated with something. with self.mount_a.mounted(): self.mount_a.create_files() self.mount_a.check_files() # sanity, this should always pass # Run a more substantial workunit so that the length of the log to be # coverted is going span at least a few segments workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], }, "timeout": "3h" }) # Modify the ceph.conf to ask the MDS to use the new journal format. self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) # Restart the MDS. self.fs.mds_fail_restart(mds_id=mds_a) self.fs.mds_fail_restart(mds_id=mds_b) # This ensures that all daemons come up into a valid state self.fs.wait_for_daemons() # Check that files created in the initial client workload are still visible # in a client mount. with self.mount_a.mounted(): self.mount_a.check_files() # Verify that the journal really has been rewritten. journal_version = self.fs.get_journal_version() if journal_version != new_journal_version: raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( new_journal_version, journal_version() )) # Verify that cephfs-journal-tool can now read the rewritten journal inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) if not inspect_out.endswith(": OK"): raise RuntimeError("Unexpected journal-tool result: '{0}'".format( inspect_out )) self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"], 0) p = self.fs.tool_remote.run( args=[ "python", "-c", "import json; print len(json.load(open('/tmp/journal.json')))" ], stdout=StringIO()) event_count = int(p.stdout.getvalue().strip()) if event_count < 1000: # Approximate value of "lots", expected from having run fsstress raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) # Do some client work to check that writing the log is still working with self.mount_a.mounted(): workunit(self.ctx, { 'clients': { "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], }, "timeout": "3h" }) # Check that both an active and a standby replay are still up self.assertEqual(len(replay_names()), 1) self.assertEqual(len(self.fs.get_active_names()), 1) self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running()) self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running())
def task(ctx, config): """ Given a Ceph cluster has already been set up, exercise the migration of the CephFS journal from an older format to the latest format. On successful completion the filesystem will be running with a journal in the new format. Optionally specify which client to use like this: - mds-journal_migration: client: client.0 """ if not hasattr(ctx, 'ceph'): raise RuntimeError("This task must be nested in 'ceph' task") if not hasattr(ctx, 'mounts'): raise RuntimeError("This task must be nested inside 'kclient' or 'ceph_fuse' task") # Determine which client we will use if config and 'client' in config: # Use client specified in config client_role = config['client'] client_list = list(misc.get_clients(ctx, [client_role])) try: client_id = client_list[0][0] except IndexError: raise RuntimeError("Client role '{0}' not found".format(client_role)) else: # Pick one arbitrary client to use client_list = list(misc.all_roles_of_type(ctx.cluster, 'client')) try: client_id = client_list[0] except IndexError: raise RuntimeError("This task requires at least one client") fs = Filesystem(ctx) ctx.fs = fs old_journal_version = JOURNAL_FORMAT_LEGACY new_journal_version = JOURNAL_FORMAT_RESILIENT fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) # Create a filesystem using the older journal format. for mount in ctx.mounts.values(): mount.umount_wait() fs.mds_stop() fs.reset() fs.mds_restart() # Do some client work so that the log is populated with something. mount = ctx.mounts[client_id] with mount.mounted(): mount.create_files() mount.check_files() # sanity, this should always pass # Run a more substantial workunit so that the length of the log to be # coverted is going span at least a few segments workunit(ctx, { 'clients': { "client.{0}".format(client_id): ["suites/fsstress.sh"], }, "timeout": "3h" }) # Modify the ceph.conf to ask the MDS to use the new journal format. fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) # Restart the MDS. fs.mds_fail_restart() fs.wait_for_daemons() # This ensures that all daemons come up into a valid state fs.wait_for_daemons() # Check that files created in the initial client workload are still visible # in a client mount. with mount.mounted(): mount.check_files() # Verify that the journal really has been rewritten. journal_version = fs.get_journal_version() if journal_version != new_journal_version: raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( new_journal_version, journal_version() )) # Verify that cephfs-journal-tool can now read the rewritten journal proc = mount.client_remote.run( args=["cephfs-journal-tool", "journal", "inspect"], stdout=StringIO()) if not proc.stdout.getvalue().strip().endswith(": OK"): raise RuntimeError("Unexpected journal-tool result: '{0}'".format( proc.stdout.getvalue() )) mount.client_remote.run( args=["sudo", "cephfs-journal-tool", "event", "get", "json", "--path", "/tmp/journal.json"]) proc = mount.client_remote.run( args=[ "python", "-c", "import json; print len(json.load(open('/tmp/journal.json')))" ], stdout=StringIO()) event_count = int(proc.stdout.getvalue().strip()) if event_count < 1000: # Approximate value of "lots", expected from having run fsstress raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) # Leave all MDSs and clients running for any child tasks for mount in ctx.mounts.values(): mount.mount() mount.wait_until_mounted() yield