def bricks_online_and_volume_reset(cls): """ reset the volume if any bricks are offline. waits for all bricks to be online and resets volume options set """ bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname) if bricks_offline is not None: ret = volume_start(cls.mnode, cls.volname, force=True) if not ret: raise ExecutionError("Failed to force start volume" "%s" % cls.volname) ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname) if not ret: raise ExecutionError("Failed to bring bricks online" "for volume %s" % cls.volname) ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True) if ret: raise ExecutionError("Failed to reset volume %s" % cls.volname) g.log.info("Successful in volume reset %s", cls.volname)
def test_snap_self_heal(self): """ Steps: 1. create a volume 2. mount volume 3. create snapshot of that volume 4. Activate snapshot 5. Clone snapshot and Mount 6. Perform I/O 7. Bring Down Few bricks from volume without affecting the volume or cluster. 8. Perform I/O 9. Bring back down bricks to online 10. Validate heal is complete with areequal """ # pylint: disable=too-many-statements, too-many-locals # Creating snapshot: g.log.info("Starting to Create snapshot") ret, _, _ = snap_create(self.mnode, self.volname, self.snap) self.assertEqual( ret, 0, ("Failed to create snapshot for volume %s" % self.volname)) g.log.info("Snapshot %s created successfully for volume %s", self.snap, self.volname) # Activating snapshot g.log.info("Starting to Activate Snapshot") ret, _, _ = snap_activate(self.mnode, self.snap) self.assertEqual(ret, 0, ("Failed to Activate snapshot %s" % self.snap)) g.log.info("Snapshot %s activated successfully", self.snap) # snapshot list ret, _, _ = snap_list(self.mnode) self.assertEqual(ret, 0, ("Failed to list all the snapshot")) g.log.info("Snapshot list command was successful") # Creating a Clone volume from snapshot: g.log.info("Starting to Clone volume from Snapshot") ret, _, _ = snap_clone(self.mnode, self.snap, self.clone) self.assertEqual(ret, 0, ("Failed to clone %s from snapshot %s" % (self.clone, self.snap))) g.log.info("%s created successfully", self.clone) # start clone volumes g.log.info("start to created clone volumes") ret, _, _ = volume_start(self.mnode, self.clone) self.assertEqual(ret, 0, "Failed to start clone %s" % self.clone) g.log.info("clone volume %s started successfully", self.clone) # Mounting a clone volume g.log.info("Mounting a clone volume") ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1, self.mnode, self.clients[0]) self.assertEqual(ret, 0, "Failed to mount clone Volume %s" % self.clone) g.log.info("Clone volume %s mounted Successfully", self.clone) # Checking cloned volume mounted or not ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0], self.mount_type) self.assertTrue( ret, "Failed to mount clone volume on mount point: %s" % self.mount1) g.log.info("clone Volume %s mounted on %s", self.clone, self.mount1) # write files on all mounts g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mount1) all_mounts_procs = [] cmd = ("python %s create_files " "-f 10 --base-file-name file %s" % (self.script_upload_path, self.mount1)) proc = g.run(self.clients[0], cmd) all_mounts_procs.append(proc) g.log.info("Successful in creating I/O on mounts") # get the bricks from the volume g.log.info("Fetching bricks for the volume : %s", self.clone) bricks_list = get_all_bricks(self.mnode, self.clone) g.log.info("Brick List : %s", bricks_list) # Select bricks to bring offline g.log.info("Starting to bring bricks to offline") bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = filter( None, (bricks_to_bring_offline_dict['hot_tier_bricks'] + bricks_to_bring_offline_dict['cold_tier_bricks'] + bricks_to_bring_offline_dict['volume_bricks'])) g.log.info("Brick to bring offline: %s ", bricks_to_bring_offline) ret = bring_bricks_offline(self.clone, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring the bricks offline") g.log.info("Successful in bringing bricks: %s offline", bricks_to_bring_offline) # Offline Bricks list offline_bricks = get_offline_bricks_list(self.mnode, self.clone) self.assertIsNotNone( offline_bricks, "Failed to get offline bricklist" "for volume %s" % self.clone) for bricks in offline_bricks: self.assertIn(bricks, bricks_to_bring_offline, "Failed to validate " "Bricks offline") g.log.info("Bricks Offline: %s", offline_bricks) # Online Bricks list online_bricks = get_online_bricks_list(self.mnode, self.clone) self.assertIsNotNone( online_bricks, "Failed to get online bricks" " for volume %s" % self.clone) g.log.info("Bricks Online: %s", online_bricks) # write files mountpoint g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mount1) all_mounts_procs = [] cmd = ("python %s create_files " "-f 10 --base-file-name file %s" % (self.script_upload_path, self.mount1)) proc = g.run(self.clients[0], cmd) all_mounts_procs.append(proc) g.log.info("Successful in creating I/O on mounts") # Bring all bricks online g.log.info("bring all bricks online") ret = bring_bricks_online(self.mnode, self.clone, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring bricks online") g.log.info("Successful in bringing all bricks online") # Validate Bricks are online g.log.info("Validating all bricks are online") ret = are_bricks_online(self.mnode, self.clone, bricks_list) self.assertTrue(ret, "Failed to bring all the bricks online") g.log.info("bricks online: %s", bricks_list) # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.clone) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online" % self.clone)) g.log.info( "Successful in waiting for volume %s processes to be " "online", self.clone) # Verify volume's all process are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.clone) self.assertTrue( ret, ("Volume %s : All process are not online" % self.clone)) g.log.info("Volume %s : All process are online", self.clone) # wait for the heal process to complete g.log.info("waiting for heal process to complete") ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, "Failed to complete the heal process") g.log.info("Successfully completed heal process") # Check areequal # get the subvolumes g.log.info("Starting to get sub-volumes for volume %s", self.clone) subvols = get_subvols(self.mnode, self.clone) num_subvols = len(subvols['volume_subvols']) g.log.info("Number of subvolumes in volume %s:", num_subvols) # Get arequals and compare g.log.info("Starting to Compare areequals") for i in range(0, num_subvols): # Get arequal for first brick subvol_brick_list = subvols['volume_subvols'][i] node, brick_path = subvol_brick_list[0].split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) first_brick_total = arequal.splitlines()[-1].split(':')[-1] # Get arequal for every brick and compare with first brick for brick in subvol_brick_list: node, brick_path = brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, brick_arequal, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick) g.log.info('Getting arequal for %s is successful', brick) brick_total = brick_arequal.splitlines()[-1].split(':')[-1] self.assertEqual( first_brick_total, brick_total, 'Arequals for subvol and %s are not equal' % brick) g.log.info('Arequals for subvol and %s are equal', brick) g.log.info('All arequals are equal for distributed-replicated')
def test_create_snap_bricks(self): """ 1. get brick list 2. check all bricks are online 3. Selecting one brick randomly to bring it offline 4. get brick list 5. check all bricks are online 6. Offline Bricks list 7. Online Bricks list 8. Create snapshot of volume 9. snapshot create should fail """ bricks_list = [] # get the bricks from the volume g.log.info("Fetching bricks for the volume : %s" % self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s" % bricks_list) # check all bricks are online g.log.info("Verifying all bricks are online or not.....") ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, ("Not all bricks are online")) g.log.info("All bricks are online.") # Selecting one brick randomly to bring it offline g.log.info("Selecting one brick randomly to bring it offline") brick_to_bring_offline = random.choice(bricks_list) g.log.info("Brick to bring offline:%s " % brick_to_bring_offline) ret = bring_bricks_offline(self.volname, brick_to_bring_offline, None) self.assertTrue(ret, "Failed to bring the bricks offline") g.log.info("Randomly Selected brick: %s" % brick_to_bring_offline) # get brick list g.log.info("Fetching bricks for the volume : %s" % self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s" % bricks_list) # check all bricks are online g.log.info("Verifying all bricks are online or not.....") ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertFalse(ret, ("Not all bricks are online")) g.log.info("All bricks are online.") # get the bricks for the volume g.log.info("Fetching bricks for the volume : %s" % self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s" % bricks_list) # Offline Bricks list offbricks = get_offline_bricks_list(self.mnode, self.volname) g.log.info("Bricks Offline: %s" % offbricks) # Online Bricks list onbricks = get_online_bricks_list(self.mnode, self.volname) g.log.info("Bricks Online: %s" % onbricks) # Create snapshot of volume ret = snap_create(self.mnode, self.volname, "snap1", False, "Description with $p3c1al characters!") self.assertTrue(ret, ("Failed to create snapshot snap1")) g.log.info("Snapshot snap1 of volume %s created Successfully" % (self.volname)) # Volume status ret = get_volume_info(self.mnode, self.volname) self.assertTrue(ret, ("Failed to perform gluster volume" "info on volume %s" % self.volname)) g.log.info("Gluster volume info on volume %s is successful" % self.volname) # snapshot list ret = snap_list(self.mnode) self.assertTrue( ret, ("Failed to list snapshot of volume %s" % self.volname)) g.log.info("Snapshot list command for volume %s was successful" % self.volname)
def test_ec_all_healtypes(self): """ Test steps: - Create directory dir1 - Create files inside dir1 - Rename all file inside dir1 - Create softlink and hardlink of files in mountpoint - Create tiny, small, medium nd large file - Get arequal of dir1 - Create directory dir2 - Creating files on dir2 - Bring down other bricks to max redundancy - Create directory dir3 - Start pumping IO to dir3 - Validating IO's on dir2 and waiting to complete - Bring bricks online - Wait for bricks to come online - Check if bricks are online - Monitor heal completion - Get arequal of dir1 - Compare arequal of dir1 """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Get the bricks from the volume bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s", bricks_list) mountpoint = self.mounts[0].mountpoint client = self.mounts[0].client_system # Creating dir1 ret = mkdir(client, "%s/dir1" % mountpoint) self.assertTrue(ret, "Failed to create dir1") g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) # Create files inside dir1 cmd = ('touch %s/dir1/file{1..5};' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Rename all files inside dir1 cmd = ('cd %s/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "Failed to rename files on " "client") g.log.info("Successfully renamed files on client") # Create softlink and hardlink of files in mountpoint cmd = ('cd %s/dir1/; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "Creating Softlinks have failed") g.log.info("Softlink of files have been changed successfully") cmd = ('cd %s/dir1/; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "Creating Hardlinks have failed") g.log.info("Hardlink of files have been changed successfully") # Create tiny, small, medium and large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for tiny files failed") g.log.info("Fallocate for tiny files successfully") cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for small files failed") g.log.info("Fallocate for small files successfully") cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for medium files failed") g.log.info("Fallocate for medium files successfully") cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for large files failed") g.log.info("Fallocate for large files successfully") offset += 1 # Get arequal of dir1 ret, result_before_brick_down = (collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Creating dir2 ret = mkdir(self.mounts[0].client_system, "%s/dir2" % mountpoint) self.assertTrue(ret, "Failed to create dir2") g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) # Creating files on dir2 # Write IO all_mounts_procs, count = [], 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir2" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring down other bricks to max redundancy # Bringing bricks offline bricks_to_offline = sample(bricks_list, 2) ret = bring_bricks_offline(self.volname, bricks_to_offline) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Creating dir3 ret = mkdir(self.mounts[0].client_system, "%s/dir3" % mountpoint) self.assertTrue(ret, "Failed to create dir2") g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) # Start pumping IO to dir3 cmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) ret, _, err = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, err) g.log.info('Finished writing on files while a brick is DOWN') appendcmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=1 oflag=append conv=notrunc;done" % mountpoint) readcmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=file$i of=/dev/null bs=1M " "count=5;done" % mountpoint) ret, _, err = g.run(self.mounts[0].client_system, appendcmd) self.assertEqual(ret, 0, err) g.log.info('Finished append on files after redundant bricks offline') ret, _, err = g.run(self.mounts[0].client_system, readcmd) self.assertEqual(ret, 0, err) g.log.info('Finished read on files after redundant bricks offline') # Validating IO's on dir2 and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all IO's") # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_offline) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Get arequal of dir1 ret, result_after_brick_up = (collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Comparing arequals of dir1 self.assertEqual( result_before_brick_down, result_after_brick_up, 'Arequals are not equals before and after ' 'bringing down redundant bricks') g.log.info('Arequals are equals before before and after ' 'bringing down redundant bricks')
def test_ec_data_integrity(self): """ Test steps: - Create directory dir1 - Create 5 dir and 5 files in each dir in directory 1 - Rename all file inside dir1 - Truncate at any dir in mountpoint inside dir1 - Create softlink and hardlink of files in mountpoint - chmod, chown, chgrp inside dir1 - Create tiny, small, medium nd large file - Creating files on client side for dir1 - Validating IO's and waiting to complete - Get arequal of dir1 - Bring redundant bricks offline - Get arequal of dir1 after 1st set of bricks down - Bring redundant bricks offline - Get arequal of dir1 after 2nd set of bricks down """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals brickset_to_offline = [] # Creating dir1 ret = mkdir(self.mounts[0].client_system, "%s/dir1" % self.mounts[0].mountpoint) self.assertTrue(ret, "Failed to create dir1") g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = ("%s..%s" % (str(start), str(end))) file_range = ("%s..%s" % (str(start), str(end))) # Create dir 1-5 at mountpoint. ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" % (mount_obj.mountpoint, dir_range)) self.assertTrue(ret, "Failed to create directory") g.log.info("Directory created successfully") # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; cd ~;' 'done;' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to rename file on " "client") g.log.info("Successfully renamed file on client") # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Truncate failed") g.log.info("Truncate of files successfull") # Create softlink and hardlink of files in mountpoint start = 1 for mount_obj in self.mounts: for link_type, ln_mode in (('softlink', 'ln -s'), ('hardlink', 'ln')): cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do %s $FILENAME %s_$FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start), ln_mode, link_type)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating %s have failed" % link_type) g.log.info("%s of files created successfully", link_type) start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing mode of files has failed") g.log.info("Mode of files have been changed successfully") cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing owner of files has failed") g.log.info("Owner of files have been changed successfully") cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing group of files has failed") g.log.info("Group of files have been changed successfully") start += 5 end += 5 # Create tiny, small, medium and large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: for size, filename in (('100', 'tiny_file'), ('20M', 'small_file'), ('200M', 'medium_file'), ('1G', 'large_file')): cmd = 'fallocate -l {} {}{}.txt'.format(size, filename, offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for files failed") g.log.info("Fallocate for files successfully") offset += 1 # Creating files on client side for dir1 # Write IO all_mounts_procs, count = [], 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % ( self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count += 10 # Validating IO's and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all IO's") # Get arequal of dir1 ret, result_before_bricks_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Bring redundant bricks offline brickset_to_offline = self._bring_redundant_bricks_offline( self.mnode, self.volname) # Get arequal of dir1 after 1st set of bricks down ret, result_after_1st_brickset_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, brickset_to_offline) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Bring redundant bricks offline brickset_to_offline = self._bring_redundant_bricks_offline( self.mnode, self.volname) # Get arequal of dir1 after 2nd set of bricks down ret, result_after_2nd_brickset_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Comparing arequals self.assertEqual(result_before_bricks_down, result_after_1st_brickset_down, 'Arequals are not equals before brickset ' 'down and after 1st brickset down') g.log.info('Arequals are equals before brickset down ' 'and after brickset down') self.assertEqual(result_after_2nd_brickset_down, result_after_1st_brickset_down, 'Arequals are not equals before 2nd set ' 'brick down and after 1st set brick down') g.log.info('Arequals are equals for 2nd brickset down ' 'and 1st brickset down')
def test_ec_quorumcount_5(self): """ Test Steps: - Write IO's when all bricks are online - Get subvol from which bricks to be brought down - Set volume disperse quorum count to 5 - Start writing and reading IO's - Bring a brick down,say b1 - Validate write and read is successful - Bring a brick down,say b2 - Validate write has failed and read is successful - Start IO's again while quorum is not met on volume write should fail and read should pass - Add-brick and log - Start Rebalance - Wait for rebalance,which should fail as quorum is not met - Bring brick online - Wait for brick to come online - Check if bricks are online - Start IO's again when all bricks are online - IO's should complete successfully - Start IO's again and reset volume - Bring down other bricks to max redundancy - Validating IO's and waiting to complete """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals mountpoint = self.mounts[0].mountpoint client1 = self.mounts[0].client_system client2 = self.mounts[1].client_system # Write IO's when all bricks are online writecmd = ("cd %s; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Finished writes on files sucessfully') # Select a subvol from which bricks to be brought down sub_vols = get_subvols(self.mnode, self.volname) bricks_list1 = list(choice(sub_vols['volume_subvols'])) brick_1, brick_2 = sample(bricks_list1, 2) # Set volume disperse quorum count to 5 ret = set_volume_options(self.mnode, self.volname, {"disperse.quorum-count": "5"}) self.assertTrue( ret, 'Failed to set volume {}' ' options'.format(self.volname)) g.log.info('Successfully set disperse quorum on %s', self.volname) # Start writing and reading IO's procwrite, procread, count = [], [], 1 for mount_obj in self.mounts: writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 5 " "--dir-length 10 --max-num-of-dirs 2 " "--num-of-files 15 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, writecmd, user=mount_obj.user) procwrite.append(proc) count += 10 self.generate_read_cmd(mountpoint, '1', '10') ret = g.run_async(client2, self.readcmd) procread.append(ret) # Brick 1st brick down ret = bring_bricks_offline(self.volname, brick_1) self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) g.log.info('Brick %s is offline successfully', brick_1) writecmd = ("cd %s; for i in `seq 101 110` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Finished writes on files sucessfully') self.generate_read_cmd(mountpoint, '101', '110') ret, _, err = g.run(client1, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Finished reads on files sucessfully') # Brick 2nd brick down ret = bring_bricks_offline(self.volname, brick_2) self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2)) g.log.info('Brick %s is offline successfully', brick_2) # Validate write has failed and read is successful ret = validate_io_procs(procwrite, self.mounts) self.assertFalse( ret, 'Write successful even after disperse quorum is ' 'not met') g.log.info('EXPECTED - Writes failed as disperse quroum is not met') ret = validate_io_procs(procread, self.mounts[1]) self.assertTrue(ret, 'Read operation failed on the client') g.log.info('Reads on files successful') # Start IO's again while quorum is not met on volume procwrite = [] writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num 20 --dir-depth 1 " "--dir-length 10 --max-num-of-dirs 1 " "--num-of-files 10 %s" % (self.script_upload_path, mountpoint)) proc = g.run_async(client1, writecmd) procwrite.append(proc) ret = validate_io_procs(procwrite, self.mounts[0]) self.assertFalse( ret, 'Write successful even after disperse quorum is ' 'not met') g.log.info('EXPECTED - Writes failed as disperse quroum is not met') self.generate_read_cmd(mountpoint, '1', '100') ret, _, err = g.run(client2, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Reads on files successful') # Add brick ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info, force=True) self.assertTrue( ret, ("Failed to expand the volume {}".format(self.volname))) g.log.info("Expanding volume %s is successful", self.volname) # Log Volume Info and Status after expanding the volume ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume {}".format(self.volname))) g.log.info("Successful in logging volume info and status of volume %s", self.volname) # Start Rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ('Rebalance failed on the volume' ' {}'.format(self.volname))) g.log.info('Rebalance has started on volume %s', self.volname) # Wait for rebalance to complete # Which should also fail as quorum is not met ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=600) self.assertFalse( ret, "Rebalance passed though disperse quorum " "is not met on volume") g.log.info( "Expected: Rebalance failed on the volume %s,disperse" " quorum is not met", self.volname) # Bring brick online brick_list = brick_1, brick_2 ret = bring_bricks_online(self.mnode, self.volname, brick_list) self.assertTrue(ret, 'Brick not brought online') g.log.info('Brick brought online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, 'Bricks are not online') g.log.info('EXPECTED : Bricks are online') # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Start IO's again when all bricks are online writecmd = ("cd %s; for i in `seq 101 200` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) self.generate_read_cmd(mountpoint, '101', '120') # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Writes on client % successful', client1) ret, _, err = g.run(client2, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Read on client % successful', client2) # Start IO's again all_mounts_procs, count = [], 30 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count += 10 # Reset volume ret, _, err = volume_reset(self.mnode, self.volname) self.assertEqual(ret, 0, err) g.log.info('Reset of volume %s successful', self.volname) # Bring down other bricks to max redundancy # Bringing bricks offline bricks_to_offline = sample(bricks_list1, 2) ret = bring_bricks_offline(self.volname, bricks_to_offline) self.assertTrue(ret, 'Redundant bricks not offline') g.log.info('Redundant bricks are offline successfully') # Validating IO's and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, 'IO failed on some of the clients') g.log.info("Successfully validated all IO's")
def test_brickreset_ec_volume(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """ - Start resource consumption tool - Create IO on dir2 of volume mountpoint - Reset brick start - Check if brick is offline - Reset brick with destination same as source with force running IO's - Validating IO's and waiting for it to complete on dir2 - Remove dir2 - Create 5 directory and 5 files in dir of mountpoint - Rename all files inside dir1 at mountpoint - Create softlink and hardlink of files in dir1 of mountpoint - Delete op for deleting all file in one of the dirs inside dir1 - Change chmod, chown, chgrp - Create tiny, small, medium and large file - Create IO's - Validating IO's and waiting for it to complete - Calculate arequal before kiiling brick - Get brick from Volume - Reset brick - Check if brick is offline - Reset brick by giving a different source and dst node - Reset brick by giving dst and source same without force - Obtain hostname - Reset brick with dst-source same force using hostname - Successful - Monitor heal completion - Bring down other bricks to max redundancy - Get arequal after bringing down bricks - Bring bricks online - Reset brick by giving a same source and dst brick - Kill brick manually - Check if brick is offline - Reset brick by giving a same source and dst brick - Wait for brick to come online - Bring down other bricks to max redundancy - Get arequal after bringing down bricks - Bring bricks online - Remove brick from backend - Check if brick is offline - Reset brick by giving dst and source same without force - Successful - Monitor heal completion - Compare the arequal's calculated """ # Starting resource consumption using top log_file_mem_monitor = getcwd() + '/mem_usage.log' cmd = 'for i in {1..100};do top -n 1 -b|egrep \ "RES|gluster" & free -h 2>&1 >> ' + \ log_file_mem_monitor + ' ;sleep 10;done' g.log.info(cmd) for mount_obj in self.mounts: g.run_async(mount_obj.client_system, cmd) bricks_list = [] # Get the bricks from the volume g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s", bricks_list) # Creating directory2 cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create directory2") g.log.info("Directory 2 on %s created successfully", self.mounts[0]) # Creating files on client side for dir2 for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 -l 2 -n 2 -f 20 %s/dir2" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Reset a brick g.log.info('Reset of brick using start') brick_reset = choice(bricks_list) ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "start") # Check if the brick is offline g.log.info("Check the brick status if it is offline") offline_bricks = get_offline_bricks_list(self.mnode, self.volname) self.assertEqual(offline_bricks[0], brick_reset, "Brick not offline") g.log.info("Expected : Brick is offline") # Reset brick with dest same as source with force while running IO's g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "commit", brick_reset, force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Validating IO's and waiting to complete self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Deleting dir2 cmd = ('rm -rf %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to delete directory2") g.log.info("Directory 2 deleted successfully for %s", self.mounts[0]) del self.all_mounts_procs[:] # Creating dir1 cmd = ('mkdir %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create directory1") g.log.info("Directory 1 created successfully for %s", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = str(start) + ".." + str(end) file_range = str(start) + ".." + str(end) # Create dir 1-5 at mountpoint. cmd = ('mkdir %s/dir1/dir{%s};' % (mount_obj.mountpoint, dir_range)) g.run(mount_obj.client_system, cmd) # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) g.run(mount_obj.client_system, cmd) # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 clients = [] for mount_obj in self.mounts: clients.append(mount_obj.client_system) cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; ' 'done;' % mount_obj.mountpoint) g.run_parallel(clients, cmd) # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) # Create softlink and hardlink of files in mountpoint. Start is an # offset to be added to dirname to act on diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start + 1))) g.run(mount_obj.client_system, cmd) start += 5 # Delete op for deleting all file in one of the dirs. start is being # used as offset like in previous testcase in dir1 start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do rm -f $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) start += 5 end += 5 # Create tiny, small, medium nd large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) offset += 1 # Creating files on client side for dir1 for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 -l 2 -n 2 -f 20 %s/dir1" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validating IO's and waiting to complete self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Get areequal before killing the brick g.log.info('Getting areequal before killing of brick...') ret, result_before_killing_brick = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting areequal before killing of brick ' 'is successful') # Reset a brick g.log.info('Reset of brick using start') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "start") # Check if the brick is offline g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]]) self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick by giving a different source and dst brick g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "commit", bricks_list[1]) self.assertNotEqual(ret, 0, "Not Expected: Reset brick is successfull") g.log.info("Expected : Source and Destination brick must be same for" " reset") # Reset brick with destination same as source g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "commit", bricks_list[0]) self.assertNotEqual(ret, 0, "Not Expected : Reset brick is successful") g.log.info("Expected : Reset brick failed,Vol id is same use force") # Obtain hostname of node ret, hostname_node1, _ = g.run(self.mnode, "hostname") self.assertEqual(ret, 0, ("Failed to obtain hostname of node %s", self.mnode)) g.log.info("Obtained hostname of client. IP- %s, hostname- %s", self.mnode, hostname_node1.strip()) # Reset brick with destination same as source with force using hostname g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(hostname_node1.strip(), self.volname, bricks_list[0], "commit", bricks_list[0], force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Bring down other bricks to max redundancy # Get List of bricks to bring offline # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[1:3]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') sleep(2) # Check if 4 bricks are online all_bricks = [] all_bricks = [ bricks_list[0], bricks_list[3], bricks_list[4], bricks_list[5] ] ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Check mount point cmd = 'ls -lrt /mnt' ret, _, _ = g.run(self.mounts[0].client_system, cmd) g.log.info("Client mount point details ") # Get arequal after bringing down bricks g.log.info('Getting arequal after bringing down bricks...') ret, result_offline_redundant_brick1 = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks offline ' 'is successful') # Bring bricks online list_of_bricks_to_bring_online = bricks_list[1:3] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Reset brick without bringing down brick g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1], "commit", bricks_list[1]) self.assertNotEqual(ret, 0, "Not Expected: Reset brick passed") g.log.info("Expected : Brick reset failed as source brick must be" " stopped") # Kill the brick manually ret = bring_bricks_offline(self.volname, [bricks_list[1]]) self.assertTrue(ret, 'Brick not offline') g.log.info('Brick is offline successfully') # Check if the brick is offline g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]]) self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick with dest same as source after killing brick manually g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1], "commit", bricks_list[1], force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Bring down other bricks to max redundancy # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[2:4]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Check mount point cmd = 'ls -lrt /mnt' ret, _, _ = g.run(self.mounts[0].client_system, cmd) g.log.info("Client mount point details") # Get arequal after bringing down bricks g.log.info('Getting arequal after bringing down redundant bricks...') ret, result_offline_redundant_brick2 = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks offline ' 'is successful') # Bring bricks online list_of_bricks_to_bring_online = bricks_list[2:4] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Removing brick from backend brick = bricks_list[0].strip().split(":") cmd = "rm -rf %s" % brick[1] ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, "Failed to delete brick %s" % bricks_list[0]) g.log.info("Removed brick %s sucessfully", bricks_list[0]) # Check if the brick is offline count = 0 while count <= 20: g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]]) if ret: break sleep(2) count = +1 self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick with destination same as source g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(hostname_node1.strip(), self.volname, bricks_list[0], "commit", bricks_list[0]) self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Comparing arequals self.assertEqual( result_before_killing_brick, result_offline_redundant_brick1, 'Arequals are not equals before killing brick' 'processes and after offlining redundant bricks') g.log.info('Arequals are equals before killing brick' 'processes and after offlining redundant bricks') # Comparing arequals self.assertEqual( result_offline_redundant_brick2, result_offline_redundant_brick1, 'Arequals are not equals for offlining redundant' ' bricks') g.log.info('Arequals are equals for offlining redundant bricks') # Deleting dir1 cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to delete directory1") g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
def test_fops_ec_brickdown(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """ - 1.Start resource consumption tool - 2.Create directory dir1 - 3.Create 5 dir and 5 files in each dir in directory 1 - 4.Rename all file inside dir1 - 5.Truncate at any dir in mountpoint inside dir1 - 6.Create softlink and hardlink of files in mountpoint - 7.chmod, chown, chgrp inside dir1 - 8.Create tiny, small, medium nd large file - 9.Creating files on client side for dir1 - 10.Brick redundant bricks down - 11.Validating IO's and waiting to complete - 12.Creating dir2 - 13.Creating files on client side for dir2 - 14.Bring bricks online - 15.Wait for brick to come online - 16.Check if bricks are online - 17.Monitor heal completion - 18.Validating IO's and waiting to complete """ # Starting resource consumption using top log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' cmd = ('for i in {1..100};do top -n 1 -b|egrep \ "RES|gluster" & free -h 2>&1 >> %s ; \ sleep 10;done' % (log_file_mem_monitor)) g.log.info(cmd) for server in self.servers: g.run_async(server, cmd) bricks_list = [] # get the bricks from the volume g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Brick list is empty") g.log.info("Brick List : %s", bricks_list) # Creating dir1 cmd = ('mkdir %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create dir1") g.log.info("dir1 created successfully for %s", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = ("%s..%s" % (str(start), str(end))) file_range = ("%s..%s" % (str(start), str(end))) # Create dir 1-5 at mountpoint. cmd = ('mkdir %s/dir1/dir{%s};' % (mount_obj.mountpoint, dir_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Directory creation failed") g.log.info("Directory created successfull") # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; ' 'done;' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to rename file on" "client") g.log.info("Successfully renamed file on client") # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Truncate failed") g.log.info("Truncate of files successfull") # Create softlink and hardlink of files in mountpoint. Start is an # offset to be added to dirname to act on diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Softlinks have failed") g.log.info("Softlink of files have been changed successfully") cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start + 1))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Hardlinks have failed") g.log.info("Hardlink of files have been changed successfully") start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing mode of files has failed") g.log.info("Mode of files have been changed successfully") cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing owner of files has failed") g.log.info("Owner of files have been changed successfully") cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing group of files has failed") g.log.info("Group of files have been changed successfully") start += 5 end += 5 # Create tiny, small, medium nd large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for tiny files failed") g.log.info("Fallocate for tiny files successfully") cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for small files failed") g.log.info("Fallocate for small files successfully") cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for medium files failed") g.log.info("Fallocate for medium files successfully") cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for large files failed") g.log.info("Fallocate for large files successfully") offset += 1 # Creating files on client side for dir1 # Write IO all_mounts_procs = [] count = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring down other bricks to max redundancy # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[2:4]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Validating IO's and waiting to complete g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Creating dir2 cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create dir2 ") g.log.info("dir2 created successfully for %s", self.mounts[0]) # Creating files on client side for dir2 # Write IO all_mounts_procs = [] count = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s/dir2" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring bricks online list_of_bricks_to_bring_online = bricks_list[2:4] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Validating IO's and waiting to complete g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Check file exist for memory log g.log.info("Validating log exists") ret = file_exists(self.mnode, '/var/log/glusterfs/mem_usage.log') self.assertTrue(ret, "Memory log file does not exist") g.log.info("Memory log file exists")