def test_invalid_memory_read_after_freed(self): """ Test case: 1. Create a volume and start it. 2. Mount the volume using FUSE. 3. Create multiple level of dirs and files inside every dir. 4. Rename files such that linkto files are created. 5. From the mount point do an rm -rf * and check if all files are delete or not from mount point as well as backend bricks. """ # Fetch timestamp to check for core files ret, test_timestamp, _ = g.run(self.mnode, "date +%s") self.assertEqual(ret, 0, "date command failed") test_timestamp = test_timestamp.strip() # Create multiple level of dirs and files inside every dir cmd = ("cd %s; for i in {1..100}; do mkdir dir$i; cd dir$i; " "for i in {1..200}; do dd if=/dev/urandom of=file$i bs=1K" " count=1; done; done" % self.mounts[0].mountpoint) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "Failed to create dirs and files") # Rename files such that linkto files are created cmd = ("cd %s; for i in {1..100}; do cd dir$i; for i in {1..200}; do " "mv file$i ntfile$i; done; done" % self.mounts[0].mountpoint) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "Failed to rename files") g.log.info("Files created and renamed successfully") # From the mount point do an rm -rf * and check if all files # are delete or not from mount point as well as backend bricks. ret, _, _ = g.run(self.first_client, "rm -rf {}/*".format(self.mounts[0].mountpoint)) self.assertFalse(ret, "rn -rf * failed on mount point") ret = get_dir_contents(self.first_client, "{}/".format(self.mounts[0].mountpoint)) self.assertEqual( ret, [], "Unexpected: Files and directories still " "seen from mount point") for brick in get_all_bricks(self.mnode, self.volname): node, brick_path = brick.split(":") ret = get_dir_contents(node, "{}/".format(brick_path)) self.assertEqual( ret, [], "Unexpected: Files and dirs still seen " "on brick %s on node %s" % (brick_path, node)) g.log.info("rm -rf * on mount point successful") # Check for core file on servers and clients servers = self.servers + [self.first_client] ret = is_core_file_created(servers, test_timestamp) self.assertTrue(ret, "Core files found on servers used for test") g.log.info("No cores found on all participating servers")
def check_mount_point_and_bricks_for_xattr(self, list_of_all_files): """ Check xattr on mount point and bricks. """ # Check if xattr is visable from mount point for mount_object in self.mounts: for fname in list_of_all_files: ret = get_fattr(mount_object.client_system, fname, 'user.foo') self.assertIsNone( ret, "Custom attribute visible at mount " "point even after deletion") # Check if xattr is visable from bricks for brick in get_all_bricks(self.mnode, self.volname): node, brick_path = brick.split(':') files_on_bricks = get_dir_contents(node, brick_path) files = [ fname.split('/')[3] for fname in self.list_of_files if fname.split('/')[3] in files_on_bricks ] for fname in files: ret = get_fattr(node, "{}/{}".format(brick_path, fname), 'user.foo') self.assertIsNone( ret, "Custom attribute visible on " "brick even after deletion") g.log.info("Custom xattr for file is not visible on " "mount point and bricks")
def check_custom_xattr_visible(self, xattr_val): """ Check custom xttar from mount point and on bricks. """ # Check custom xattr from mount point for mount_object in self.mounts: for fname in self.files_and_soft_links: attr_val = get_fattr(mount_object.client_system, fname, 'user.foo') self.assertEqual(attr_val, xattr_val, "Custom xattr not found from mount.") g.log.info("Custom xattr found on mount point.") # Check custom xattr on bricks for brick in get_all_bricks(self.mnode, self.volname): node, brick_path = brick.split(':') files_on_bricks = get_dir_contents(node, brick_path) files = [ fname.split('/')[3] for fname in self.list_of_files if fname.split('/')[3] in files_on_bricks ] for fname in files: attr_val = get_fattr(node, "{}/{}".format(brick_path, fname), 'user.foo') self.assertEqual(attr_val, xattr_val, "Custom xattr not visible on bricks") g.log.info("Custom xattr found on bricks.")
def _check_custom_xattr_trusted_foo(self, xattr_val, visible=True): """Check custom xttar from mount point and on bricks.""" # Check custom xattr from mount point for fname in self.list_of_device_files: ret = get_fattr(self.clients[0], fname, 'trusted.foo', encode='text') if visible: self.assertEqual(ret, xattr_val, "Custom xattr not found from mount.") else: self.assertIsNone( ret, "Custom attribute visible at mount " "point even after deletion") # Check custom xattr on bricks for brick in get_all_bricks(self.mnode, self.volname): node, brick_path = brick.split(':') files_on_bricks = get_dir_contents(node, brick_path) files = [ fname for fname in self.file_names if fname in files_on_bricks ] for fname in files: ret = get_fattr(node, "{}/{}".format(brick_path, fname), 'trusted.foo', encode='text') if visible: self.assertEqual(ret, xattr_val, "Custom xattr not visible on bricks") else: self.assertIsNone( ret, "Custom attribute visible on " "brick even after deletion")
def setUp(self): """ setUp method for every test """ bricks = get_servers_bricks_dict(self.servers, self.all_servers_info) # Checking brick dir and cleaning it. for server in self.servers: for brick in bricks[server]: if get_dir_contents(server, brick): cmd = "rm -rf " + brick + "/*" ret, _, _ = g.run(server, cmd) if ret: raise ExecutionError("Failed to delete the brick " "dirs of deleted volume.") # Creating Volume ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Volume creation or mount failed: %s" % self.volname) g.log.info("Volme created and mounted successfully : %s", self.volname) # calling GlusterBaseClass setUp self.get_super_method(self, 'setUp')()
def test_mount_remove_client_logs_dir_remount(self): # pylint: disable=too-many-statements """ 1. Create all types of volumes and start them. 2. Mount all volumes on clients. 3. Delete /var/log/glusterfs folder on client. 4. Run IO on all the mount points. 5. Unmount and remount all volumes. 6. Check if logs are regenerated or not. """ # Mounting the volume. ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, mpoint=self.mounts[0].mountpoint, mserver=self.mnode, mclient=self.mounts[0].client_system) self.assertEqual(ret, 0, ("Volume %s is not mounted.") % self.volname) g.log.info("Volume mounted successfully : %s", self.volname) # Removing dir /var/log/glusterfs on client. cmd = 'mv /var/log/glusterfs /root/' ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Unable to remove /var/log/glusterfs dir.") g.log.info("Successfully removed /var/log/glusterfs on client: %s", self.mounts[0]) # Running IO on the mount point. # Creating a dir on the mount point. ret = mkdir(self.mounts[0].client_system, self.mounts[0].mountpoint+"/dir") self.assertTrue(ret, "Failed to create dir.") g.log.info("dir created successfully for %s", self.mounts[0]) # Creating a file on the mount point. cmd = ('touch %s/file' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create file.") g.log.info("file created successfully for %s", self.mounts[0]) # Unmounting and remounting volume. ret, _, _ = umount_volume(mclient=self.mounts[0].client_system, mpoint=self.mounts[0].mountpoint) self.assertEqual(ret, 0, ("Volume %s is not unmounted.") % self.volname) g.log.info("Volume unmounted successfully : %s", self.volname) ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, mpoint=self.mounts[0].mountpoint, mserver=self.mnode, mclient=self.mounts[0].client_system) self.assertEqual(ret, 0, ("Volume %s is not mounted.") % self.volname) g.log.info("Volume mounted successfully : %s", self.volname) # Checking if logs are regenerated or not. ret = get_dir_contents(self.mounts[0].client_system, '/var/log/glusterfs/') self.assertIsNotNone(ret, 'Log files were not regenerated.') g.log.info("Log files were properly regenearted.")
def _perfrom_lookups_on_mount_point(self, node, mountpoint): """Perform lookups on a given mount point""" ret = get_dir_contents(node, mountpoint) self.assertEqual( len(ret), 8, "8 dirs not present on mount point %s on %s" % (node, mountpoint)) g.log.info("Lookup successful on node %s and mount point %s", node, mountpoint)
def test_nuke_happy_path(self): """ Test case: 1. Create a distributed volume, start and mount it 2. Create 1000 dirs and 1000 files under a directory say 'dir1' 3. Set xattr glusterfs.dht.nuke to "test" for dir1 4. Validate dir-1 is not seen from mount point 5. Validate if the entry is moved to '/brickpath/.glusterfs/landfill' and deleted eventually. """ # Create 1000 dirs and 1000 files under a directory say 'dir1' self.dir_1_path = "{}/dir1/".format(self.mounts[0].mountpoint) ret = mkdir(self.first_client, self.dir_1_path) self.assertTrue(ret, "Failed to create dir1 on mount point") cmd = ("cd {};for i in `seq 1 1000`;do mkdir dir$i;touch file$i;done" .format(self.dir_1_path)) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "I/O failed at dir1 on mount point") # Set xattr glusterfs.dht.nuke to "test" for dir1 ret = set_fattr(self.first_client, self.dir_1_path, 'glusterfs.dht.nuke', 'test') self.assertTrue(ret, "Failed to set xattr glusterfs.dht.nuke") # Validate dir-1 is not seen from mount point ret = get_dir_contents(self.first_client, self.mounts[0].mountpoint) self.assertEqual([], ret, "UNEXPECTED: Mount point has files ideally it should " "be empty.") # Validate if the entry is moved to '/brickpath/.glusterfs/landfill' # and deleted eventually for brick_path in get_all_bricks(self.mnode, self.volname): node, path = brick_path.split(":") path = "{}/.glusterfs/landfill/*/".format(path) ret = get_dir_contents(node, path) # In case if landfile is already cleaned before checking # stop execution of the loop. if ret is None: g.log.info("Bricks have been already cleaned up.") break self.assertIsNotNone(ret, "Files not present in /.glusterfs/landfill" " dir") g.log.info("Successully nuked dir1.")
def tearDown(self): # UnMount Volume g.log.info("Starting to Unmount Volume %s", self.volname) ret = umount_volume(self.mounts[0].client_system, self.mounts[0].mountpoint, mtype=self.mount_type) self.assertTrue(ret, ("Failed to Unmount Volume %s" % self.volname)) g.log.info("Successfully Unmounted Volume %s", self.volname) # Clean up all volumes and peer probe to form cluster vol_list = get_volume_list(self.mnode) if vol_list is not None: for volume in vol_list: ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Volume deleted successfully : %s", volume) # Peer probe detached servers pool = nodes_from_pool_list(self.mnode) for node in pool: peer_detach(self.mnode, node) ret = peer_probe_servers(self.mnode, self.servers) if not ret: raise ExecutionError("Failed to probe detached " "servers %s" % self.servers) g.log.info("Peer probe success for detached " "servers %s", self.servers) bricks = get_servers_bricks_dict(self.servers, self.all_servers_info) # Checking brick dir and cleaning it. for server in self.servers: for brick in bricks[server]: if get_dir_contents(server, brick): cmd = "rm -rf " + brick + "/*" ret, _, _ = g.run(server, cmd) if ret: raise ExecutionError("Failed to delete the brick " "dirs of deleted volume.") self.get_super_method(self, 'tearDown')()
def test_metadata_self_heal_client_side_heal(self): """ Testcase steps: 1.Turn off the options self heal daemon 2.Create IO 3.Calculate arequal of the bricks and mount point 4.Bring down "brick1" process 5.Change the permissions of the directories and files 6.Change the ownership of the directories and files 7.Change the group of the directories and files 8.Bring back the brick "brick1" process 9.Execute "find . | xargs stat" from the mount point to trigger heal 10.Verify the changes in permissions are not self healed on brick1 11.Verify the changes in permissions on all bricks but brick1 12.Verify the changes in ownership are not self healed on brick1 13.Verify the changes in ownership on all the bricks but brick1 14.Verify the changes in group are not successfully self-healed on brick1 15.Verify the changes in group on all the bricks but brick1 16.Turn on the option metadata-self-heal 17.Execute "find . | xargs md5sum" from the mount point to trgger heal 18.Wait for heal to complete 19.Verify the changes in permissions are self-healed on brick1 20.Verify the changes in ownership are successfully self-healed on brick1 21.Verify the changes in group are successfully self-healed on brick1 22.Calculate arequal check on all the bricks and mount point """ # Setting options ret = set_volume_options(self.mnode, self.volname, {"self-heal-daemon": "off"}) self.assertTrue(ret, 'Failed to set options self-heal-daemon ' 'and metadata-self-heal to OFF') g.log.info("Options are set successfully") # Creating files on client side self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal' for mount_object in self.mounts: command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;" "for i in `seq 1 100` ; " "do mkdir dir.$i ; " "for j in `seq 1 5` ; " "do dd if=/dev/urandom of=dir.$i/file.$j " "bs=1K count=$j ; done ; done ;".format (mount_object.mountpoint, self.test_meta_data_self_heal_folder)) proc = g.run_async(mount_object.client_system, command, user=mount_object.user) self.all_mounts_procs.append(proc) # Validate IO self.validate_io_on_clients() # Calculate and check arequal of the bricks and mount point self.check_arequal_from_mount_point_and_bricks() # Select bricks to bring offline from a replica set subvols_dict = get_subvols(self.mnode, self.volname) subvols = subvols_dict['volume_subvols'] bricks_to_bring_offline = [] bricks_to_be_online = [] for subvol in subvols: bricks_to_bring_offline.append(subvol[0]) for brick in subvol[1:]: bricks_to_be_online.append(brick) # Bring bricks offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Change the permissions of the directories and files self.all_mounts_procs = [] for mount_obj in self.mounts: command = ('cd {}/{}; ' 'for i in `seq 1 100` ; ' 'do chmod 555 dir.$i ; done ; ' 'for i in `seq 1 50` ; ' 'do for j in `seq 1 5` ; ' 'do chmod 666 dir.$i/file.$j ; done ; done ; ' 'for i in `seq 51 100` ; ' 'do for j in `seq 1 5` ; ' 'do chmod 444 dir.$i/file.$j ; done ; done ;' .format(mount_obj.mountpoint, self.test_meta_data_self_heal_folder)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.validate_io_on_clients() # Change the ownership of the directories and files self.all_mounts_procs = [] for mount_obj in self.mounts: command = ('cd {}/{} ; ' 'for i in `seq 1 35` ; ' 'do chown -R qa_func dir.$i ; done ; ' 'for i in `seq 36 70` ; ' 'do chown -R qa_system dir.$i ; done ; ' 'for i in `seq 71 100` ; ' 'do chown -R qa_perf dir.$i ; done ;' .format(mount_obj.mountpoint, self.test_meta_data_self_heal_folder)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.validate_io_on_clients() # Change the group of the directories and files self.all_mounts_procs = [] for mount_obj in self.mounts: command = ('cd {}/{}; ' 'for i in `seq 1 100` ; ' 'do chgrp -R qa_all dir.$i ; done ;' .format(mount_obj.mountpoint, self.test_meta_data_self_heal_folder)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO self.validate_io_on_clients() # Bring brick online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Trigger heal from mount point self.trigger_heal_from_mount_point() # Verify the changes are not self healed on brick1 for each subvol for brick in bricks_to_bring_offline: node, brick_path = brick.split(':') dir_list = get_dir_contents(node, "{}/{}".format( brick_path, self.test_meta_data_self_heal_folder)) self.assertIsNotNone(dir_list, "Dir list from " "brick is empty") g.log.info("Successfully got dir list from bick") # Verify changes for dirs for folder in dir_list: ret = get_file_stat(node, "{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder)) self.assertEqual('755', ret['access'], "Permissions mismatch on node {}" .format(node)) self.assertEqual('root', ret['username'], "User id mismatch on node {}" .format(node)) self.assertEqual('root', ret['groupname'], "Group id mismatch on node {}" .format(node)) # Get list of files for each dir file_list = get_dir_contents(node, "{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder)) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder, file_name)) self.assertEqual('644', ret['access'], "Permissions mismatch on node" " {} for file {}".format(node, file_name)) self.assertEqual('root', ret['username'], "User id mismatch on node" " {} for file {}".format(node, file_name)) self.assertEqual('root', ret['groupname'], "Group id mismatch on node" " {} for file {}".format(node, file_name)) # Verify the changes are self healed on all bricks except brick1 # for each subvol self.check_permssions_on_bricks(bricks_to_be_online) # Setting options ret = set_volume_options(self.mnode, self.volname, {"metadata-self-heal": "on"}) self.assertTrue(ret, 'Failed to set options to ON.') g.log.info("Options are set successfully") # Trigger heal from mount point self.trigger_heal_from_mount_point() # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Verify the changes are self healed on brick1 for each subvol self.check_permssions_on_bricks(bricks_to_bring_offline) # Calculate and check arequal of the bricks and mount point self.check_arequal_from_mount_point_and_bricks()
def check_permssions_on_bricks(self, bricks_list): """ Check permssions on a given set of bricks. """ for brick in bricks_list: node, brick_path = brick.split(':') dir_list = get_dir_contents(node, "{}/{}".format( brick_path, self.test_meta_data_self_heal_folder)) self.assertIsNotNone(dir_list, "Dir list from " "brick is empty") g.log.info("Successfully got dir list from bick") # Verify changes for dirs for folder in dir_list: ret = get_file_stat(node, "{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder)) self.assertEqual('555', ret['access'], "Permissions mismatch on node {}" .format(node)) self.assertEqual('1003', ret['gid'], "Group mismatch on node {}" .format(node)) # Get list of files for each dir file_list = get_dir_contents(node, "{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder)) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") # Verify for group for each file if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, folder, file_name)) self.assertEqual('1003', ret['gid'], "Group mismatch on node {}" .format(node)) # Verify permissions for files in dirs 1..50 for i in range(1, 51): file_list = get_dir_contents(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i), file_name)) self.assertEqual('666', ret['access'], "Permissions mismatch on node {}" .format(node)) # Verify permissions for files in dirs 51..100 for i in range(51, 101): file_list = get_dir_contents(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i), file_name)) self.assertEqual('444', ret['access'], "Permissions mismatch on node {}" .format(node)) # Verify ownership for dirs 1..35 for i in range(1, 36): ret = get_file_stat(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertEqual('1000', ret['uid'], "User id mismatch on node {}" .format(node)) # Verify ownership for files in dirs file_list = get_dir_contents(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i), file_name)) self.assertEqual('1000', ret['uid'], "User id mismatch on node {}" .format(node)) # Verify ownership for dirs 36..70 for i in range(36, 71): ret = get_file_stat(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertEqual('1001', ret['uid'], "User id mismatch on node {}" .format(node)) # Verify ownership for files in dirs file_list = get_dir_contents(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i), file_name)) self.assertEqual('1001', ret['uid'], "User id mismatch on node {}" .format(node)) # Verify ownership for dirs 71..100 for i in range(71, 101): ret = get_file_stat(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertEqual('1002', ret['uid'], "User id mismatch on node {}" .format(node)) # Verify ownership for files in dirs file_list = get_dir_contents(node, "{}/{}/dir.{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i))) self.assertIsNotNone(file_list, "File list from " "brick is empty.") g.log.info("Successfully got file list from bick.") if file_list: for file_name in file_list: ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( brick_path, self.test_meta_data_self_heal_folder, str(i), file_name)) self.assertEqual('1002', ret['uid'], "User id mismatch on node {}" .format(node))
def test_healing_of_custom_xattrs_on_newly_added_bricks(self): """ Description: Tests to check that the custom xattrs are healed on the dirs when new bricks are added Steps : 1) Create a volume. 2) Mount the volume using FUSE. 3) Create 100 directories on the mount point. 4) Set the xattr on the directories. 5) Add bricks to the volume and trigger rebalance. 6) Check if all the bricks have healed. 7) After rebalance completes, check the xattr for dirs on the newly added bricks. """ # pylint: disable=too-many-function-args # Creating 1000 directories on volume root m_point = self.mounts[0].mountpoint command = 'mkdir -p ' + m_point + '/dir{1..100}' ret, _, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, ("Directory creation failed on %s", self.mounts[0].mountpoint)) g.log.info("Directories created successfully.") # Lookup on the mount point command = 'ls ' + m_point + '/' ret, _, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, "ls failed on parent directory") g.log.info("ls on parent directory: successful") # Setting up the custom xattr for all the directories on mount point m_point = self.mounts[0].mountpoint command = 'setfattr -n user.foo -v "foobar" ' + m_point + '/dir*' ret, _, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, "Failed to set the xattr on the" " directories") g.log.info("Successfully set custom xattr on the directories") # Checking the layout of the directories on the back-end flag = validate_files_in_dir(self.clients[0], m_point, test_type=k.TEST_LAYOUT_IS_COMPLETE) self.assertTrue(flag, "Layout has some holes or overlaps") g.log.info("Layout is completely set") # Creating a list of directories on the mount point list_of_all_dirs = get_dir_contents(self.mounts[0].client_system, m_point) self.assertNotEqual(list_of_all_dirs, None, "Creation of directory" " list failed.") g.log.info("Creation of directory list is successful.") # Checking the custom xattr on backend bricks for the directories self.check_xattr(list_of_all_dirs) # Expanding volume by adding bricks to the volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Volume %s: Expand failed", self.volname)) g.log.info("Volume %s: Expand success", self.volname) # Start Rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance", self.volname)) g.log.info("Volume %s: Rebalance start success", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s: Rebalance failed to complete", self.volname)) g.log.info("Volume %s: Rebalance is completed", self.volname) # Lookup on the mount point command = 'ls -laR ' + m_point + '/' ret, _, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, "ls failed on parent directory") g.log.info("ls on parent directory: successful") # Check if all the bricks are healed ret = monitor_heal_completion(self.mnode, self.volname, timeout_period=900) self.assertTrue(ret, ("Heal is not complete for all bricks")) g.log.info("Healing is complete for all the bricks") # Checking the custom xattrs for all the directories on # back end bricks after rebalance is complete self.check_xattr(list_of_all_dirs)
def test_one_brick_full_add_brick_rebalance(self): """ Test case: 1. Create a pure distribute volume with 3 bricks. 2. Start it and mount it on client. 3. Fill one disk of the volume till it's full 4. Add brick to volume, start rebalance and wait for it to complete. 5. Check arequal checksum before and after add brick should be same. 6. Check if link files are present on bricks or not. """ # Fill few bricks till it is full bricks = get_all_bricks(self.mnode, self.volname) # Calculate the usable size and fill till it reaches # min free limit usable_size = get_usable_size_per_disk(bricks[0]) subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] fname = "abc" # Create directories in hierarchy dirp = "/dir1/dir2/" path = "{}{}".format(self.mounts[0].mountpoint, dirp) ret = mkdir(self.mounts[0].client_system, path, parents=True) self.assertTrue(ret, "Failed to create dir hierarchy") for _ in range(0, usable_size): # Create files inside directories while (subvols[find_hashed_subvol(subvols, dirp, fname)[1]][0] != subvols[0][0]): fname = self._get_random_string() ret, _, _ = g.run(self.mounts[0].client_system, "fallocate -l 1G {}{}".format(path, fname)) self.assertFalse(ret, "Failed to fill disk to min free limit") fname = self._get_random_string() g.log.info("Disk filled up to min free limit") # Collect arequal checksum before ops arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) # Add brick to volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1800) self.assertTrue(ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed") # Check for data loss by comparing arequal before and after ops arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) self.assertEqual(arequal_checksum_before, arequal_checksum_after, "arequal checksum is NOT MATCHNG") g.log.info("arequal checksum is SAME") # Check if linkto files exist or not as rebalance is already # completed we shouldn't be seeing any linkto files for brick in bricks: node, path = brick.split(":") path += dirp list_of_files = get_dir_contents(node, path) self.assertIsNotNone(list_of_files, "Unable to get files") for filename in list_of_files: ret = get_dht_linkto_xattr(node, "{}{}".format(path, filename)) self.assertIsNone(ret, "Unable to fetch dht linkto xattr")
def test_memory_leak_with_rm(self): """ Test case: 1. Create a volume, start it and mount it. 2. Create 10,000 files each of size 200K 3. Delete the files created at step 2 4. Check if the files are deleted from backend 5. Check if there are any memory leaks and OOM killers. """ # Start monitoring resource usage on servers and clients monitor_proc_dict = self.start_memory_and_cpu_usage_logging( self.test_id, count=30) self.assertIsNotNone( monitor_proc_dict, "Failed to start monitoring on servers and " "clients") # Create files on mount point cmd = ('cd %s;for i in {1..10000};' 'do dd if=/dev/urandom bs=200K count=1 of=file$i;done;' 'rm -rf %s/file*' % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create and delete files on" " mountpoint") g.log.info("Successfully created and removed files on mountpoint") # Delete files from mount point and check if all files # are deleted or not from mount point as well as backend bricks. ret, _, _ = g.run(self.clients[0], "rm -rf {}/*".format(self.mounts[0].mountpoint)) self.assertFalse(ret, "rm -rf * failed on mount point") ret = get_dir_contents(self.clients[0], "{}/".format(self.mounts[0].mountpoint)) self.assertEqual( ret, [], "Unexpected: Files and directories still " "seen from mount point") for brick in get_all_bricks(self.mnode, self.volname): node, brick_path = brick.split(":") ret = get_dir_contents(node, "{}/".format(brick_path)) self.assertEqual( ret, [], "Unexpected: Files and dirs still seen " "on brick %s on node %s" % (brick_path, node)) g.log.info("rm -rf * on mount point successful") # Wait for monitoring processes to complete ret = wait_for_logging_processes_to_stop(monitor_proc_dict, cluster=True) self.assertTrue(ret, "ERROR: Failed to stop monitoring processes") # Check if there are any memory leaks and OOM killers ret = self.check_for_memory_leaks_and_oom_kills_on_servers( self.test_id) self.assertFalse(ret, "Memory leak and OOM kills check failed on servers") ret = self.check_for_memory_leaks_and_oom_kills_on_clients( self.test_id) self.assertFalse(ret, "Memory leak and OOM kills check failed on clients") g.log.info("No memory leaks or OOM kills found on serves and clients")
def validate_xattr_values(self, dirname, ctime=True): """Validate existence and consistency of a specific xattr value across replica set Args: dirname (str): parent directory name Kwargs: ctime(bool): ctime feature enablement """ # pylint: disable=too-many-branches # Fetch all replica sets(subvols) in the volume ret = get_subvols(self.mnode, self.volname) # Iterating through each subvol(replicaset) for subvol in ret['volume_subvols']: brick_host_list = {} # Dict for storing host,brickpath pairs for each in subvol: # Fetching each replica in replica set # Splitting to brick,hostname pairs host, brick_path = each.split(':') brick_host_list[host] = brick_path # Fetch Complete parent directory path directory = brick_path + '/' + dirname # Fetching all entries recursively in a replicaset entry_list = get_dir_contents(host, directory, recursive=True) for each in entry_list: xattr_value = [] # list to store xattr value # Logic to get xattr values for host, brickpath in brick_host_list.items(): # Remove the prefix brick_path from entry-name each = sub(brick_path, '', each) # Adding the right brickpath name for fetching xattrval brick_entry_path = brickpath + each ret = get_extended_attributes_info(host, [brick_entry_path], encoding='hex', attr_name='trusted' '.glusterfs.' 'mdata') if ret: ret = ret[brick_entry_path]['trusted.glusterfs.mdata'] g.log.info("mdata xattr value of %s is %s", brick_entry_path, ret) else: pass if ctime: self.assertIsNotNone( ret, "glusterfs.mdata not set on" " {}".format(brick_entry_path)) g.log.info( "mdata xattr %s is set on the back-end" " bricks", ret) else: self.assertIsNone( ret, "trusted.glusterfs.mdata seen " " on {}".format(brick_entry_path)) g.log.info( "mdata xattr %s is not set on the back-end" " bricks", ret) xattr_value.append(ret) voltype = get_volume_type_info(self.mnode, self.volname) if voltype['volume_type_info']['arbiterCount'] == '0': ret = bool( xattr_value.count(xattr_value[0]) == len(xattr_value)) elif voltype['volume_type_info']['arbiterCount'] == '1': ret = bool(((xattr_value.count(xattr_value[0])) or (xattr_value.count(xattr_value[1])) > 1)) else: g.log.error("Arbiter value is neither 0 nor 1") if ctime: self.assertTrue( ret, 'trusted.glusterfs.mdata' + ' value not same across bricks for ' 'entry ' + each) else: self.assertTrue( ret, 'trusted.glusterfs.mdata' + ' seems to be set on some bricks for ' + each)
def test_delete_dir_with_self_pointing_linkto_files(self): """ Test case: 1. Create a pure distribute volume with 2 bricks, start and mount it. 2. Create dir dir0/dir1/dir2 inside which create 1000 files and rename all the files. 3. Start remove-brick operation on the volume. 4. Check remove-brick status till status is completed. 5. When remove-brick status is completed stop it. 6. Go to brick used for remove brick and perform lookup on the files. 8. Change the linkto xattr value for every file in brick used for remove brick to point to itself. 9. Perfrom rm -rf * from mount point. """ # Create dir /dir0/dir1/dir2 self.dir_path = "{}/dir0/dir1/dir2/".format(self.mounts[0].mountpoint) ret = mkdir(self.first_client, self.dir_path, parents=True) self.assertTrue(ret, "Failed to create /dir0/dir1/dir2/ dir") # Create 1000 files inside /dir0/dir1/dir2 ret, _, _ = g.run( self.first_client, 'cd %s;for i in {1..1000}; do echo "Test file" ' '> tfile-$i; done' % self.dir_path) self.assertFalse(ret, "Failed to create 1000 files inside /dir0/dir1/dir2") # Rename 1000 files present inside /dir0/dir1/dir2 ret, _, _ = g.run( self.first_client, "cd %s;for i in {1..1000};do mv tfile-$i " "ntfile-$i;done" % self.dir_path) self.assertFalse(ret, "Failed to rename 1000 files inside /dir0/dir1/dir2") g.log.info("I/O successful on mount point.") # Start remove-brick operation on the volume brick = form_bricks_list_to_remove_brick(self.mnode, self.volname, subvol_num=1) self.assertIsNotNone(brick, "Brick_list is empty") ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'start') self.assertFalse(ret, "Failed to start remov-brick on volume") # Check remove-brick status till status is completed ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, brick) self.assertTrue(ret, "Remove-brick didn't complete on volume") # When remove-brick status is completed stop it ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'stop') self.assertFalse(ret, "Failed to start remov-brick on volume") g.log.info("Successfully started and stopped remove-brick") # Go to brick used for remove brick and perform lookup on the files node, path = brick[0].split(":") path = "{}/dir0/dir1/dir2/".format(path) ret, _, _ = g.run(node, 'ls {}*'.format(path)) self.assertFalse(ret, "Failed to do lookup on %s" % brick[0]) # Change the linkto xattr value for every file in brick used for # remove brick to point to itself ret = get_dir_contents(node, path) self.assertIsNotNone(ret, "Unable to get files present in dir0/dir1/dir2") ret = get_dht_linkto_xattr(node, "{}{}".format(path, ret[0])) self.assertIsNotNone(ret, "Unable to fetch dht linkto xattr") # Change trusted.glusterfs.dht.linkto from dist-client-0 to # dist-client-1 or visa versa according to initial value dht_linkto_xattr = ret.split("-") if int(dht_linkto_xattr[2]): dht_linkto_xattr[2] = "0" else: dht_linkto_xattr[2] = "1" linkto_value = "-".join(dht_linkto_xattr) # Set xattr trusted.glusterfs.dht.linkto on all the linkto files ret = set_fattr(node, '{}*'.format(path), 'trusted.glusterfs.dht.linkto', linkto_value) self.assertTrue(ret, "Failed to change linkto file to point to itself") # Perfrom rm -rf * from mount point ret, _, _ = g.run(self.first_client, "rm -rf {}/*".format(self.mounts[0].mountpoint)) self.assertFalse(ret, "Failed to run rm -rf * on mount point") g.log.info("rm -rf * successful on mount point")
def test_sosreport_behavior_for_glusterfs(self): ''' Test Steps: 1) Download sos package if not installed 2) Fetch Sos version for reference 3) Note down all files in below locations before taking sosreport: a) /var/run/gluster b) /run/gluster c) /var/lib/glusterd d) /var/log/glusterfs 4) Take the sosreport 5) Again note down the list of all gluster file in locations mentioned in step#3. The list of files in this step should match step#3 6) untar the sosreport to see if gluster files are packaged ''' # Fetching sosreport version for information ret, version, _ = g.run(self.servers[1], 'rpm -qa|grep sos') if version[4:9] in ('3.8-6', '3.8-7', '3.8-8'): raise SkipTest("Skipping testcase as bug is fixed in " "sosreport version 3.8.9") g.log.info("sos version is %s", version) # Noting down list of entries in gluster directories before sos gluster_contents_before_sos = [] gluster_dirs = ('/var/run/gluster*', '/run/gluster*', '/var/lib/glusterd', '/var/log/glusterfs') for gdir in gluster_dirs: ret = get_dir_contents(self.servers[1], gdir, recursive=True) gluster_contents_before_sos.append(ret) # Check for any existing sosreport var_tmp_dircontents_before_sos = get_dir_contents( self.servers[1], '/var/tmp/') # Collect sosreport ret, _, err = g.run(self.servers[1], 'sosreport --batch --name=$HOSTNAME') self.assertEqual(ret, 0, "failed to fetch sosreport due to {}".format(err)) # Checking /var/tmp contents var_tmp_dircontents_after_sos = get_dir_contents( self.servers[1], '/var/tmp/') # Recheck if all gluster files still exist gluster_contents_after_sos = [] for gdir in gluster_dirs: ret = get_dir_contents(self.servers[1], gdir, recursive=True) gluster_contents_after_sos.append(ret) # Compare glusterfiles before and after taking sosreport # There should be no difference in contents # Ignoring /var/log/glusterfs ie last element of the list, to avoid # false negatives as sosreport triggers heal which creates new logs # and obvious difference in list of entries post sos self.assertTrue((gluster_contents_before_sos[:-1] == gluster_contents_after_sos[:-1]), "Gluster files not matching before and after " " sosreport generation {} and {}".format( gluster_contents_before_sos, gluster_contents_after_sos)) # Untar sosreport to check if gluster files are captured sosfile = list( set(var_tmp_dircontents_after_sos) - set(var_tmp_dircontents_before_sos)) sosfile.sort() untar_sosfile_cmd = 'tar -xvf /var/tmp/' + sosfile[0] + ' -C /var/tmp/' ret, _, err = g.run(self.servers[1], untar_sosfile_cmd) self.assertEqual(ret, 0, "Untar failed due to {}".format(err)) dirchecks = ('/var/lib/glusterd', '/var/log/glusterfs') olddirs = [ gluster_contents_after_sos[2], gluster_contents_after_sos[3] ] ret = {} for after, before in zip(dirchecks, olddirs): untar_dirpath = '/var/tmp/' + sosfile[0][0:-7] untardir = untar_dirpath + after _ = get_dir_contents(self.servers[1], untardir, recursive=True) ret[after] = list(x.split(untar_dirpath, 1)[-1] for x in _) if before == gluster_contents_after_sos[2]: difference = set(before) - set(ret[after]) self.assertEqual( len(difference), 0, 'gluster sosreport may be missing as they ' 'dont match with actual contents') else: # Need this logic for var/log/glusterfs entries as rotated(.gz) # logs are not collected by sos self.assertTrue( all(entry in before for entry in ret[after]), 'var-log-glusterfs entries in sosreport may be' ' missing as they dont match with actual ' 'contents')