def toggle_bricks_and_perform_io(self, file_list, brick_list): """ Kills bricks, does I/O and brings the brick back up. """ # Bring down bricks. g.log.info("Going to bring down the brick process for %s", brick_list) ret = bring_bricks_offline(self.volname, brick_list) self.assertTrue(ret, ("Failed to bring down the bricks. Please " "check the log file for more details.")) g.log.info("Brought down the brick process " "for %s successfully", brick_list) ret = are_bricks_offline(self.mnode, self.volname, brick_list) self.assertTrue(ret, 'Bricks %s are not offline' % brick_list) # Perform I/O for filename in file_list: fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \ filename cmd = ("dd if=/dev/urandom of=%s bs=1024 count=1" % fpath) ret, _, _ = g.run(self.clients[0], cmd) self.assertEqual(ret, 0, "Creating %s failed" % fpath) # Bring up bricks ret = bring_bricks_online(self.mnode, self.volname, brick_list) self.assertTrue(ret, 'Failed to bring brick %s online' % brick_list) g.log.info('Bringing brick %s online is successful', brick_list) # Waiting for bricks to come online g.log.info("Waiting for brick process to come online") timeout = 30 ret = wait_for_bricks_to_be_online(self.mnode, self.volname, timeout) self.assertTrue(ret, "bricks didn't come online after adding bricks") g.log.info("Bricks are online")
def wait_for_volume_process_to_be_online(mnode, volname, timeout=300): """Waits for the volume's processes to be online until timeout Args: mnode (str): Node on which commands will be executed. volname (str): Name of the volume. Kwargs: timeout (int): timeout value in seconds to wait for all volume processes to be online. Returns: True if the volume's processes are online within timeout, False otherwise """ # Adding import here to avoid cyclic imports from glustolibs.gluster.brick_libs import wait_for_bricks_to_be_online # Wait for bricks to be online bricks_online_status = wait_for_bricks_to_be_online(mnode, volname, timeout) if not bricks_online_status: g.log.error("Failed to wait for the volume '%s' processes " "to be online", volname) return False # ToDo: Wait for self-heal-daemons to be online # TODO: Add any process checks here g.log.info("Volume '%s' processes are all online", volname) return True
def tearDown(self): # UnMount Volume g.log.info("Starting to Unmount Volume %s", self.volname) ret = umount_volume(self.mounts[0].client_system, self.mounts[0].mountpoint, mtype=self.mount_type) self.assertTrue(ret, ("Failed to Unmount Volume %s" % self.volname)) g.log.info("Successfully Unmounted Volume %s", self.volname) # Clean up all volumes and peer probe to form cluster vol_list = get_volume_list(self.mnode) if vol_list is not None: for volume in vol_list: # check all bricks are online ret = wait_for_bricks_to_be_online(self.mnode, volume) if not ret: raise ExecutionError("Failed to bring bricks online" "for volume %s" % volume) ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Volume deleted successfully : %s", volume) # Peer probe detached servers pool = nodes_from_pool_list(self.mnode) for node in pool: peer_detach(self.mnode, node) ret = peer_probe_servers(self.mnode, self.servers) if not ret: raise ExecutionError("Failed to probe peer " "servers %s" % self.servers) g.log.info("Peer probe success for detached " "servers %s", self.servers) self.get_super_method(self, 'tearDown')()
def _bring_bricks_online(self): """ Bring bricks online and monitor heal completion """ # Bring bricks online ret = bring_bricks_online( self.mnode, self.volname, self.bricks_to_bring_offline, bring_bricks_online_methods=['volume_start_force']) self.assertTrue(ret, 'Failed to bring bricks online') # Wait for volume processes to be online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume {} processes to " "be online".format(self.volname)))
def bricks_online_and_volume_reset(cls): """ reset the volume if any bricks are offline. waits for all bricks to be online and resets volume options set """ bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname) if bricks_offline is not None: ret = volume_start(cls.mnode, cls.volname, force=True) if not ret: raise ExecutionError("Failed to force start volume" "%s" % cls.volname) ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname) if not ret: raise ExecutionError("Failed to bring bricks online" "for volume %s" % cls.volname) ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True) if ret: raise ExecutionError("Failed to reset volume %s" % cls.volname) g.log.info("Successful in volume reset %s", cls.volname)
def test_volume_create(self): # create and start a volume self.volume['name'] = "first_volume" self.volname = "first_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertTrue(ret, "Failed to create and start volume") # bring a brick down and volume start force should bring it to online g.log.info("Get all the bricks of the volume") bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Failed to get the brick list") g.log.info("Successfully got the list of bricks of volume") ret = bring_bricks_offline(self.volname, bricks_list[0:2]) self.assertTrue(ret, "Failed to bring down the bricks") g.log.info("Successfully brought the bricks down") ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to start the volume") g.log.info("Volume start with force is success") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Failed to bring the bricks online") g.log.info("Volume start with force successfully brought all the " "bricks online") # create volume with previously used bricks and different volume name self.volname = "second_volume" ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) self.assertNotEqual( ret, 0, "Expected: It should fail to create a " "volume with previously used bricks. Actual:" "Successfully created the volume with previously" " used bricks") g.log.info("Failed to create the volume with previously used bricks") # create a volume with already existing volume name self.volume['name'] = "first_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertTrue( ret, "Expected: It should fail to create a volume" " with already existing volume name. Actual: " "Successfully created the volume with " "already existing volname") g.log.info("Failed to create the volume with already existing volname") # creating a volume with non existing brick path should fail self.volname = "second_volume" bricks_list = form_bricks_list(self.mnode, self.volname, len(self.servers), self.servers, self.all_servers_info) nonexisting_brick_index = random.randint(0, len(bricks_list) - 1) non_existing_brick = bricks_list[nonexisting_brick_index].split(":")[0] non_existing_path = ":/brick/non_existing_path" non_existing_brick = non_existing_brick + non_existing_path bricks_list[nonexisting_brick_index] = non_existing_brick ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) self.assertNotEqual( ret, 0, "Expected: Creating a volume with non " "existing brick path should fail. Actual: " "Successfully created the volume with " "non existing brick path") g.log.info("Failed to create the volume with non existing brick path") # cleanup the volume and peer detach all servers. form two clusters,try # to create a volume with bricks whose nodes are in different clusters # cleanup volumes vol_list = get_volume_list(self.mnode) self.assertIsNotNone(vol_list, "Failed to get the volume list") for volume in vol_list: ret = cleanup_volume(self.mnode, volume) self.assertTrue(ret, "Unable to delete volume % s" % volume) # peer detach all servers ret = peer_detach_servers(self.mnode, self.servers) self.assertTrue(ret, "Peer detach to all servers is failed") g.log.info("Peer detach to all the servers is success") # form cluster 1 ret, _, _ = peer_probe(self.servers[0], self.servers[1]) self.assertEqual( ret, 0, "Peer probe from %s to %s is failed" % (self.servers[0], self.servers[1])) g.log.info("Peer probe is success from %s to %s" % (self.servers[0], self.servers[1])) # form cluster 2 ret, _, _ = peer_probe(self.servers[2], self.servers[3]) self.assertEqual( ret, 0, "Peer probe from %s to %s is failed" % (self.servers[2], self.servers[3])) g.log.info("Peer probe is success from %s to %s" % (self.servers[2], self.servers[3])) # Creating a volume with bricks which are part of another # cluster should fail ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertFalse( ret, "Expected: Creating a volume with bricks" " which are part of another cluster should fail." " Actual: Successfully created the volume with " "bricks which are part of another cluster") g.log.info("Failed to create the volume with bricks which are " "part of another cluster") # form a cluster, bring a node down. try to create a volume when one of # the brick node is down ret, _, _ = peer_detach(self.servers[2], self.servers[3]) self.assertEqual(ret, 0, "Peer detach is failed") g.log.info("Peer detach is success") ret = peer_probe_servers(self.mnode, self.servers) self.assertTrue(ret, "Peer probe is failed") g.log.info("Peer probe to all the servers is success") random_server = self.servers[random.randint(1, len(self.servers) - 1)] ret = stop_glusterd(random_server) self.assertTrue(ret, "Glusterd is stopped successfully") self.volume['name'] = "third_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertFalse( ret, "Expected: It should fail to create a volume " "when one of the node is down. Actual: Successfully " "created the volume with bbrick whose node is down") g.log.info("Failed to create the volume with brick whose node is down")
def test_self_heal_algorithm_full_daemon_off(self): """"" Description:- Checking healing when algorithm is set to "full" and self heal daemon is "off". """"" # pylint: disable=too-many-statements # Setting volume option of self heal & algorithm options = {"metadata-self-heal": "disable", "entry-self-heal": "disable", "data-self-heal": "disable", "data-self-heal-algorithm": "full", "self-heal-daemon": "off"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, "Failed to set the volume options %s" % options) g.log.info(" Volume set options success") # Select bricks to bring down bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring bricks: %s offline" % bricks_to_bring_offline) g.log.info("Successful in bringing bricks: %s offline", bricks_to_bring_offline) # Validate if bricks are offline g.log.info("Validating if bricks: %s are offline", bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, "Not all the bricks in list:%s are offline" % bricks_to_bring_offline) g.log.info("Successfully validated that bricks %s are all offline", bricks_to_bring_offline) # IO on the mount point all_mounts_procs = [] g.log.info("Creating Files on %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) cmd = ("cd %s ;for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=1;done" % self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue( validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients" ) # Collecting Arequal before bring the bricks up g.log.info("Collecting Arequal before the bring of bricks down") result_before = collect_mounts_arequal(self.mounts) # Turning self heal daemon ON optionstwo = {"self-heal-daemon": "on"} ret = set_volume_options(self.mnode, self.volname, optionstwo) self.assertTrue(ret, "Failed to turn self-heal ON") g.log.info("Volume set options %s: success", optionstwo) # Bring bricks online g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring bricks: %s online" % bricks_to_bring_offline) g.log.info("Successfully brought all bricks:%s online", bricks_to_bring_offline) # Waiting for bricks to come online g.log.info("Waiting for brick process to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname, timeout=30) self.assertTrue(ret, "bricks didn't come online after adding bricks") g.log.info("Bricks are online") # Verifying all bricks online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, "Volume %s : All process are not online" % self.volname) g.log.info("Volume %s : All process are online", self.volname) # Wait for self heal processes to come online g.log.info("Wait for selfheal process to come online") ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname, timeout=300) self.assertTrue(ret, "Self-heal process are not online") g.log.info("All self heal process are online") # Wait for self-heal to complete g.log.info("Wait for self-heal to complete") ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, "Self heal didn't complete even after waiting " "for 20 minutes. 20 minutes is too much a time for " "current test workload") g.log.info("self-heal is successful after replace-brick operation") # arequal after healing g.log.info("Collecting Arequal before the bring of bricks down") result_after = collect_mounts_arequal(self.mounts) # Comparing the results g.log.info("comparing both the results") self.assertEqual(result_before, result_after, "Arequals are not equal")
def test_ec_replace_brick(self): """ - Start resource consumption tool - Create directory dir1 - Create 5 directory and 5 files in dir of mountpoint - Rename all files inside dir1 at mountpoint - Create softlink and hardlink of files in dir1 of mountpoint - Delete op for deleting all file in one of the dirs inside dir1 - Change chmod, chown, chgrp - Create tiny, small, medium and large file - Get arequal before replacing brick - Replace brick - Get arequal after replacing brick - Compare Arequal's - Create IO's - Replace brick while IO's are going on - Validating IO's and waiting for it to complete """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Starting resource consumption using top log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' cmd = ("for i in {1..20};do top -n 1 -b|egrep " "'RES|gluster' & free -h 2>&1 >> %s ;" "sleep 10;done" % (log_file_mem_monitor)) g.log.info(cmd) cmd_list_procs = [] for server in self.servers: proc = g.run_async(server, cmd) cmd_list_procs.append(proc) # Creating dir1 ret = mkdir(self.mounts[0].client_system, "%s/dir1" % self.mounts[0].mountpoint) self.assertTrue(ret, "Failed to create dir1") g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = ("%s..%s" % (str(start), str(end))) file_range = ("%s..%s" % (str(start), str(end))) # Create dir 1-5 at mountpoint. ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" % (mount_obj.mountpoint, dir_range)) self.assertTrue(ret, "Failed to create directory") g.log.info("Directory created successfully") # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; cd ~;' 'done;' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to rename file on " "client") g.log.info("Successfully renamed file on client") # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Truncate failed") g.log.info("Truncate of files successfull") # Create softlink and hardlink of files in mountpoint. Start is an # offset to be added to dirname to act on diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Softlinks have failed") g.log.info("Softlink of files have been changed successfully") cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start + 1))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Hardlinks have failed") g.log.info("Hardlink of files have been changed successfully") start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing mode of files has failed") g.log.info("Mode of files have been changed successfully") cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing owner of files has failed") g.log.info("Owner of files have been changed successfully") cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing group of files has failed") g.log.info("Group of files have been changed successfully") start += 5 end += 5 # Create tiny, small, medium and large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for tiny files failed") g.log.info("Fallocate for tiny files successfully") cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for small files failed") g.log.info("Fallocate for small files successfully") cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for medium files failed") g.log.info("Fallocate for medium files successfully") cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for large files failed") g.log.info("Fallocate for large files successfully") offset += 1 # Get arequal before replacing brick ret, result_before_replacing_brick = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before replacing of brick ' 'is successful') # Replacing a brick of random choice ret = replace_brick_from_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Unexpected:Replace brick is not successful") g.log.info("Expected : Replace brick is successful") # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Unexpected:Bricks are not online") g.log.info("Expected : Bricks are online") # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Unexpected:Heal has not yet completed') g.log.info('Heal has completed successfully') # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'Unexpected:All bricks are not online') g.log.info('All bricks are online') # Get areequal after replacing brick ret, result_after_replacing_brick = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting areequal after replacing of brick ' 'is successful') # Comparing arequals self.assertEqual( result_before_replacing_brick, result_after_replacing_brick, 'Arequals are not equals before replacing ' 'brick and after replacing brick') g.log.info('Arequals are equals before replacing brick ' 'and after replacing brick') # Creating files on client side for dir1 # Write IO all_mounts_procs, count = [], 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % (self.script_upload_path1, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count += 10 # Replacing a brick while IO's are going on ret = replace_brick_from_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Unexpected:Replace brick is not successful") g.log.info("Expected : Replace brick is successful") # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Unexpected:Bricks are not online") g.log.info("Expected : Bricks are online") # Validating IO's and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Create 2 directories and start IO's which opens FD ret = mkdir(self.mounts[0].client_system, "%s/count{1..2}" % self.mounts[0].mountpoint) self.assertTrue(ret, "Failed to create directories") g.log.info("Directories created on %s successfully", self.mounts[0]) all_fd_procs, count = [], 1 for mount_obj in self.mounts: cmd = ("cd %s ;/usr/bin/env python %s -n 10 -t 120 " "-d 5 -c 16 --dir count%s" % (mount_obj.mountpoint, self.script_upload_path2, count)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_fd_procs.append(proc) count += 1 # Replacing a brick while open FD IO's are going on ret = replace_brick_from_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Unexpected:Replace brick is not successful") g.log.info("Expected : Replace brick is successful") # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Unexpected:Bricks are not online") g.log.info("Expected : Bricks are online") # Validating IO's and waiting to complete ret = validate_io_procs(all_fd_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Close connection and check file exist for memory log ret = file_exists(self.mnode, '/var/log/glusterfs/mem_usage.log') self.assertTrue(ret, "Unexpected:Memory log file does " "not exist") g.log.info("Memory log file exists") for proc in cmd_list_procs: ret, _, _ = proc.async_communicate() self.assertEqual(ret, 0, "Memory logging failed") g.log.info("Memory logging is successful")
def test_ec_all_healtypes(self): """ Test steps: - Create directory dir1 - Create files inside dir1 - Rename all file inside dir1 - Create softlink and hardlink of files in mountpoint - Create tiny, small, medium nd large file - Get arequal of dir1 - Create directory dir2 - Creating files on dir2 - Bring down other bricks to max redundancy - Create directory dir3 - Start pumping IO to dir3 - Validating IO's on dir2 and waiting to complete - Bring bricks online - Wait for bricks to come online - Check if bricks are online - Monitor heal completion - Get arequal of dir1 - Compare arequal of dir1 """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals # Get the bricks from the volume bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s", bricks_list) mountpoint = self.mounts[0].mountpoint client = self.mounts[0].client_system # Creating dir1 ret = mkdir(client, "%s/dir1" % mountpoint) self.assertTrue(ret, "Failed to create dir1") g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) # Create files inside dir1 cmd = ('touch %s/dir1/file{1..5};' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Rename all files inside dir1 cmd = ('cd %s/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertEqual(ret, 0, "Failed to rename files on " "client") g.log.info("Successfully renamed files on client") # Create softlink and hardlink of files in mountpoint cmd = ('cd %s/dir1/; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "Creating Softlinks have failed") g.log.info("Softlink of files have been changed successfully") cmd = ('cd %s/dir1/; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; cd ~;' 'done;' % mountpoint) ret, _, _ = g.run(client, cmd) self.assertFalse(ret, "Creating Hardlinks have failed") g.log.info("Hardlink of files have been changed successfully") # Create tiny, small, medium and large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for tiny files failed") g.log.info("Fallocate for tiny files successfully") cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for small files failed") g.log.info("Fallocate for small files successfully") cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for medium files failed") g.log.info("Fallocate for medium files successfully") cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for large files failed") g.log.info("Fallocate for large files successfully") offset += 1 # Get arequal of dir1 ret, result_before_brick_down = (collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Creating dir2 ret = mkdir(self.mounts[0].client_system, "%s/dir2" % mountpoint) self.assertTrue(ret, "Failed to create dir2") g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) # Creating files on dir2 # Write IO all_mounts_procs, count = [], 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir2" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring down other bricks to max redundancy # Bringing bricks offline bricks_to_offline = sample(bricks_list, 2) ret = bring_bricks_offline(self.volname, bricks_to_offline) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Creating dir3 ret = mkdir(self.mounts[0].client_system, "%s/dir3" % mountpoint) self.assertTrue(ret, "Failed to create dir2") g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) # Start pumping IO to dir3 cmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) ret, _, err = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, err) g.log.info('Finished writing on files while a brick is DOWN') appendcmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=1 oflag=append conv=notrunc;done" % mountpoint) readcmd = ("cd %s/dir3; for i in `seq 1 100` ;" "do dd if=file$i of=/dev/null bs=1M " "count=5;done" % mountpoint) ret, _, err = g.run(self.mounts[0].client_system, appendcmd) self.assertEqual(ret, 0, err) g.log.info('Finished append on files after redundant bricks offline') ret, _, err = g.run(self.mounts[0].client_system, readcmd) self.assertEqual(ret, 0, err) g.log.info('Finished read on files after redundant bricks offline') # Validating IO's on dir2 and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all IO's") # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, bricks_to_offline) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Get arequal of dir1 ret, result_after_brick_up = (collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Comparing arequals of dir1 self.assertEqual( result_before_brick_down, result_after_brick_up, 'Arequals are not equals before and after ' 'bringing down redundant bricks') g.log.info('Arequals are equals before before and after ' 'bringing down redundant bricks')
def test_glusterd_quorum_validation(self): """ -> Creating two volumes and starting them, stop the second volume -> set the server quorum and set the ratio to 90 -> Stop the glusterd in one of the node, so the quorum won't meet -> Peer probing a new node should fail -> Volume stop will fail -> volume delete will fail -> volume reset will fail -> Start the glusterd on the node where it is stopped -> Volume stop, start, delete will succeed once quorum is met """ # pylint: disable=too-many-statements, too-many-branches # Peer probe first 3 servers servers_info_from_three_nodes = {} for server in self.servers[0:3]: servers_info_from_three_nodes[server] = self.all_servers_info[ server] # Peer probe the first 3 servers ret, _, _ = peer_probe(self.mnode, server) self.assertEqual(ret, 0, ("Peer probe failed to one of the server")) g.log.info("Peer probe to first 3 nodes succeeded") self.volume['servers'] = self.servers[0:3] # Create a volume using the first 3 nodes ret = setup_volume(self.mnode, servers_info_from_three_nodes, self.volume, force=True) self.assertTrue(ret, ("Failed to create and start volume")) g.log.info("Volume created and started successfully") # Creating another volume and stopping it second_volume = "second_volume" self.volume['name'] = second_volume ret = setup_volume(self.mnode, servers_info_from_three_nodes, self.volume, force=True) self.assertTrue(ret, ("Failed to create and start volume")) g.log.info("Volume created and started succssfully") # stopping the second volume g.log.info("Stopping the second volume %s", second_volume) ret, _, _ = volume_stop(self.mnode, second_volume) self.assertEqual(ret, 0, ("Failed to stop the volume")) g.log.info("Successfully stopped second volume %s", second_volume) # Setting the server-quorum-type as server self.options = {"cluster.server-quorum-type": "server"} vol_list = get_volume_list(self.mnode) self.assertIsNotNone(vol_list, "Failed to get the volume list") g.log.info("Fetched the volume list") for volume in vol_list: g.log.info( "Setting the server-quorum-type as server" " on volume %s", volume) ret = set_volume_options(self.mnode, volume, self.options) self.assertTrue(ret, ("Failed to set the quorum type as a server" " on volume %s", volume)) g.log.info("Server Quorum type is set as a server") # Setting the server quorum ratio to 90 self.quorum_perecent = {'cluster.server-quorum-ratio': '90%'} ret = set_volume_options(self.mnode, 'all', self.quorum_perecent) self.assertTrue(ret, ("Failed to set the server quorum ratio " "to 90 on servers")) g.log.info("Successfully set server quorum ratio to 90% on servers") # Stop glusterd on one of the node ret = stop_glusterd(self.servers[2]) self.assertTrue(ret, ("Failed to stop glusterd on " "node %s", self.servers[2])) g.log.info("Glusterd stop on the nodes : %s" " succeeded", self.servers[2]) # Check glusterd is stopped ret = is_glusterd_running(self.servers[2]) self.assertEqual(ret, 1, "Unexpected: Glusterd is running on node") g.log.info("Expected: Glusterd stopped on node %s", self.servers[2]) # Adding a new peer will fail as quorum not met ret, _, _ = peer_probe(self.mnode, self.servers[3]) self.assertNotEqual(ret, 0, ("Unexpected:" "Succeeded to peer probe new node %s when quorum " "is not met", self.servers[3])) g.log.info("Failed to peer probe new node as expected" " when quorum not met") # Stopping an already started volume should fail as quorum is not met ret, _, _ = volume_start(self.mnode, second_volume) self.assertNotEqual( ret, 0, "Unexpected: Successfuly started " "volume even when quorum not met.") g.log.info( "Volume start %s failed as expected when quorum " "is not met", second_volume) # Stopping a volume should fail stop the first volume ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual( ret, 1, "Unexpected: Successfully stopped" " volume even when quourm is not met") g.log.info( "volume stop %s failed as expected when quorum " "is not met", self.volname) # Stopping a volume with force option should fail ret, _, _ = volume_stop(self.mnode, self.volname, force=True) self.assertNotEqual( ret, 0, "Unexpected: Successfully " "stopped volume with force. Expected: " "Volume stop should fail when quourm is not met") g.log.info("volume stop failed as expected when quorum is not met") # Deleting a volume should fail. Deleting the second volume. ret = volume_delete(self.mnode, second_volume) self.assertFalse( ret, "Unexpected: Volume delete was " "successful even when quourm is not met") g.log.info("volume delete failed as expected when quorum is not met") # Volume reset should fail when quorum is not met ret, _, _ = volume_reset(self.mnode, self.volname) self.assertNotEqual( ret, 0, "Unexpected: Volume reset was " "successful even when quorum is not met") g.log.info("volume reset failed as expected when quorum is not met") # Volume reset should fail even with force when quourum is not met ret, _, _ = volume_reset(self.mnode, self.volname, force=True) self.assertNotEqual( ret, 0, "Unexpected: Volume reset was " "successful with force even " "when quourm is not met") g.log.info("volume reset failed as expected when quorum is not met") # Start glusterd on the node where glusterd is stopped ret = start_glusterd(self.servers[2]) self.assertTrue(ret, "Failed to start glusterd on one node") g.log.info("Started glusterd on server" " %s successfully", self.servers[2]) ret = is_glusterd_running(self.servers[2]) self.assertEqual(ret, 0, ("glusterd is not running on " "node %s", self.servers[2])) g.log.info("glusterd is running on node" " %s ", self.servers[2]) # Check peer status whether all peer are in connected state none of the # nodes should be in peer rejected state halt, counter, _rc = 30, 0, False g.log.info("Wait for some seconds, right after glusterd start it " "will create two daemon process it need few seconds " "(like 3-5) to initialize the glusterd") while counter < halt: ret = is_peer_connected(self.mnode, self.servers[0:3]) if not ret: g.log.info("Peers are not connected state," " Retry after 2 seconds .......") sleep(2) counter = counter + 2 else: _rc = True g.log.info("Peers are in connected state in the cluster") break self.assertTrue(_rc, ("Peers are not connected state after " "bringing back glusterd online on the " "nodes in which previously glusterd " "had been stopped")) # Check all bricks are online or wait for the bricks to be online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "All bricks are not online") g.log.info("All bricks of the volume %s are online", self.volname) # Once quorum is met should be able to cleanup the volume ret = volume_delete(self.mnode, second_volume) self.assertTrue(ret, "Volume delete failed even when quorum is met") g.log.info("volume delete succeed without any issues") # Volume stop should succeed ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual(ret, 0, "Volume stop failed") g.log.info("succeeded stopping the volume as expected") # volume reset should succeed ret, _, _ = volume_reset(self.mnode, self.volname) self.assertEqual(ret, 0, "Volume reset failed ") g.log.info("volume reset succeeded as expected when quorum is not met") # Peer probe new node should succeed ret, _, _ = peer_probe(self.mnode, self.servers[3]) self.assertEqual( ret, 0, ("Failed to peer probe new node even when quorum is met")) g.log.info("Succeeded to peer probe new node when quorum met") # Check peer status whether all peer are in connected state none of the # nodes should be in peer rejected state halt, counter, _rc = 30, 0, False g.log.info("Wait for some seconds, right after peer probe") while counter < halt: ret = is_peer_connected(self.mnode, self.servers[0:3]) if not ret: g.log.info("Peers are not connected state," " Retry after 2 seconds .......") sleep(2) counter = counter + 2 else: _rc = True g.log.info("Peers are in connected state in the cluster") break self.assertTrue(_rc, ("Peers are not connected state"))
def test_ec_data_integrity(self): """ Test steps: - Create directory dir1 - Create 5 dir and 5 files in each dir in directory 1 - Rename all file inside dir1 - Truncate at any dir in mountpoint inside dir1 - Create softlink and hardlink of files in mountpoint - chmod, chown, chgrp inside dir1 - Create tiny, small, medium nd large file - Creating files on client side for dir1 - Validating IO's and waiting to complete - Get arequal of dir1 - Bring redundant bricks offline - Get arequal of dir1 after 1st set of bricks down - Bring redundant bricks offline - Get arequal of dir1 after 2nd set of bricks down """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals brickset_to_offline = [] # Creating dir1 ret = mkdir(self.mounts[0].client_system, "%s/dir1" % self.mounts[0].mountpoint) self.assertTrue(ret, "Failed to create dir1") g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = ("%s..%s" % (str(start), str(end))) file_range = ("%s..%s" % (str(start), str(end))) # Create dir 1-5 at mountpoint. ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" % (mount_obj.mountpoint, dir_range)) self.assertTrue(ret, "Failed to create directory") g.log.info("Directory created successfully") # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; cd ~;' 'done;' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to rename file on " "client") g.log.info("Successfully renamed file on client") # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Truncate failed") g.log.info("Truncate of files successfull") # Create softlink and hardlink of files in mountpoint start = 1 for mount_obj in self.mounts: for link_type, ln_mode in (('softlink', 'ln -s'), ('hardlink', 'ln')): cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do %s $FILENAME %s_$FILENAME; cd ~;' 'done;' % (mount_obj.mountpoint, str(start), ln_mode, link_type)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating %s have failed" % link_type) g.log.info("%s of files created successfully", link_type) start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing mode of files has failed") g.log.info("Mode of files have been changed successfully") cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing owner of files has failed") g.log.info("Owner of files have been changed successfully") cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing group of files has failed") g.log.info("Group of files have been changed successfully") start += 5 end += 5 # Create tiny, small, medium and large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: for size, filename in (('100', 'tiny_file'), ('20M', 'small_file'), ('200M', 'medium_file'), ('1G', 'large_file')): cmd = 'fallocate -l {} {}{}.txt'.format(size, filename, offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for files failed") g.log.info("Fallocate for files successfully") offset += 1 # Creating files on client side for dir1 # Write IO all_mounts_procs, count = [], 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % ( self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count += 10 # Validating IO's and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all IO's") # Get arequal of dir1 ret, result_before_bricks_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Bring redundant bricks offline brickset_to_offline = self._bring_redundant_bricks_offline( self.mnode, self.volname) # Get arequal of dir1 after 1st set of bricks down ret, result_after_1st_brickset_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Bring bricks online ret = bring_bricks_online(self.mnode, self.volname, brickset_to_offline) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Bring redundant bricks offline brickset_to_offline = self._bring_redundant_bricks_offline( self.mnode, self.volname) # Get arequal of dir1 after 2nd set of bricks down ret, result_after_2nd_brickset_down = ( collect_mounts_arequal(self.mounts[0], path='dir1/')) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal of dir1 ' 'is successful') # Comparing arequals self.assertEqual(result_before_bricks_down, result_after_1st_brickset_down, 'Arequals are not equals before brickset ' 'down and after 1st brickset down') g.log.info('Arequals are equals before brickset down ' 'and after brickset down') self.assertEqual(result_after_2nd_brickset_down, result_after_1st_brickset_down, 'Arequals are not equals before 2nd set ' 'brick down and after 1st set brick down') g.log.info('Arequals are equals for 2nd brickset down ' 'and 1st brickset down')
def test_ec_quorumcount_5(self): """ Test Steps: - Write IO's when all bricks are online - Get subvol from which bricks to be brought down - Set volume disperse quorum count to 5 - Start writing and reading IO's - Bring a brick down,say b1 - Validate write and read is successful - Bring a brick down,say b2 - Validate write has failed and read is successful - Start IO's again while quorum is not met on volume write should fail and read should pass - Add-brick and log - Start Rebalance - Wait for rebalance,which should fail as quorum is not met - Bring brick online - Wait for brick to come online - Check if bricks are online - Start IO's again when all bricks are online - IO's should complete successfully - Start IO's again and reset volume - Bring down other bricks to max redundancy - Validating IO's and waiting to complete """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals mountpoint = self.mounts[0].mountpoint client1 = self.mounts[0].client_system client2 = self.mounts[1].client_system # Write IO's when all bricks are online writecmd = ("cd %s; for i in `seq 1 100` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Finished writes on files sucessfully') # Select a subvol from which bricks to be brought down sub_vols = get_subvols(self.mnode, self.volname) bricks_list1 = list(choice(sub_vols['volume_subvols'])) brick_1, brick_2 = sample(bricks_list1, 2) # Set volume disperse quorum count to 5 ret = set_volume_options(self.mnode, self.volname, {"disperse.quorum-count": "5"}) self.assertTrue( ret, 'Failed to set volume {}' ' options'.format(self.volname)) g.log.info('Successfully set disperse quorum on %s', self.volname) # Start writing and reading IO's procwrite, procread, count = [], [], 1 for mount_obj in self.mounts: writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 5 " "--dir-length 10 --max-num-of-dirs 2 " "--num-of-files 15 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, writecmd, user=mount_obj.user) procwrite.append(proc) count += 10 self.generate_read_cmd(mountpoint, '1', '10') ret = g.run_async(client2, self.readcmd) procread.append(ret) # Brick 1st brick down ret = bring_bricks_offline(self.volname, brick_1) self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) g.log.info('Brick %s is offline successfully', brick_1) writecmd = ("cd %s; for i in `seq 101 110` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Finished writes on files sucessfully') self.generate_read_cmd(mountpoint, '101', '110') ret, _, err = g.run(client1, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Finished reads on files sucessfully') # Brick 2nd brick down ret = bring_bricks_offline(self.volname, brick_2) self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2)) g.log.info('Brick %s is offline successfully', brick_2) # Validate write has failed and read is successful ret = validate_io_procs(procwrite, self.mounts) self.assertFalse( ret, 'Write successful even after disperse quorum is ' 'not met') g.log.info('EXPECTED - Writes failed as disperse quroum is not met') ret = validate_io_procs(procread, self.mounts[1]) self.assertTrue(ret, 'Read operation failed on the client') g.log.info('Reads on files successful') # Start IO's again while quorum is not met on volume procwrite = [] writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num 20 --dir-depth 1 " "--dir-length 10 --max-num-of-dirs 1 " "--num-of-files 10 %s" % (self.script_upload_path, mountpoint)) proc = g.run_async(client1, writecmd) procwrite.append(proc) ret = validate_io_procs(procwrite, self.mounts[0]) self.assertFalse( ret, 'Write successful even after disperse quorum is ' 'not met') g.log.info('EXPECTED - Writes failed as disperse quroum is not met') self.generate_read_cmd(mountpoint, '1', '100') ret, _, err = g.run(client2, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Reads on files successful') # Add brick ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info, force=True) self.assertTrue( ret, ("Failed to expand the volume {}".format(self.volname))) g.log.info("Expanding volume %s is successful", self.volname) # Log Volume Info and Status after expanding the volume ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume {}".format(self.volname))) g.log.info("Successful in logging volume info and status of volume %s", self.volname) # Start Rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ('Rebalance failed on the volume' ' {}'.format(self.volname))) g.log.info('Rebalance has started on volume %s', self.volname) # Wait for rebalance to complete # Which should also fail as quorum is not met ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=600) self.assertFalse( ret, "Rebalance passed though disperse quorum " "is not met on volume") g.log.info( "Expected: Rebalance failed on the volume %s,disperse" " quorum is not met", self.volname) # Bring brick online brick_list = brick_1, brick_2 ret = bring_bricks_online(self.mnode, self.volname, brick_list) self.assertTrue(ret, 'Brick not brought online') g.log.info('Brick brought online successfully') # Wait for brick to come online ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, 'Bricks are not online') g.log.info('EXPECTED : Bricks are online') # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Start IO's again when all bricks are online writecmd = ("cd %s; for i in `seq 101 200` ;" "do dd if=/dev/urandom of=file$i bs=1M " "count=5;done" % mountpoint) self.generate_read_cmd(mountpoint, '101', '120') # IO's should complete successfully ret, _, err = g.run(client1, writecmd) self.assertEqual(ret, 0, err) g.log.info('Writes on client % successful', client1) ret, _, err = g.run(client2, self.readcmd) self.assertEqual(ret, 0, err) g.log.info('Read on client % successful', client2) # Start IO's again all_mounts_procs, count = [], 30 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d --dir-depth 2 " "--dir-length 10 --max-num-of-dirs 5 " "--num-of-files 5 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count += 10 # Reset volume ret, _, err = volume_reset(self.mnode, self.volname) self.assertEqual(ret, 0, err) g.log.info('Reset of volume %s successful', self.volname) # Bring down other bricks to max redundancy # Bringing bricks offline bricks_to_offline = sample(bricks_list1, 2) ret = bring_bricks_offline(self.volname, bricks_to_offline) self.assertTrue(ret, 'Redundant bricks not offline') g.log.info('Redundant bricks are offline successfully') # Validating IO's and waiting to complete ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, 'IO failed on some of the clients') g.log.info("Successfully validated all IO's")
def test_brickreset_ec_volume(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """ - Start resource consumption tool - Create IO on dir2 of volume mountpoint - Reset brick start - Check if brick is offline - Reset brick with destination same as source with force running IO's - Validating IO's and waiting for it to complete on dir2 - Remove dir2 - Create 5 directory and 5 files in dir of mountpoint - Rename all files inside dir1 at mountpoint - Create softlink and hardlink of files in dir1 of mountpoint - Delete op for deleting all file in one of the dirs inside dir1 - Change chmod, chown, chgrp - Create tiny, small, medium and large file - Create IO's - Validating IO's and waiting for it to complete - Calculate arequal before kiiling brick - Get brick from Volume - Reset brick - Check if brick is offline - Reset brick by giving a different source and dst node - Reset brick by giving dst and source same without force - Obtain hostname - Reset brick with dst-source same force using hostname - Successful - Monitor heal completion - Bring down other bricks to max redundancy - Get arequal after bringing down bricks - Bring bricks online - Reset brick by giving a same source and dst brick - Kill brick manually - Check if brick is offline - Reset brick by giving a same source and dst brick - Wait for brick to come online - Bring down other bricks to max redundancy - Get arequal after bringing down bricks - Bring bricks online - Remove brick from backend - Check if brick is offline - Reset brick by giving dst and source same without force - Successful - Monitor heal completion - Compare the arequal's calculated """ # Starting resource consumption using top log_file_mem_monitor = getcwd() + '/mem_usage.log' cmd = 'for i in {1..100};do top -n 1 -b|egrep \ "RES|gluster" & free -h 2>&1 >> ' + \ log_file_mem_monitor + ' ;sleep 10;done' g.log.info(cmd) for mount_obj in self.mounts: g.run_async(mount_obj.client_system, cmd) bricks_list = [] # Get the bricks from the volume g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s", bricks_list) # Creating directory2 cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create directory2") g.log.info("Directory 2 on %s created successfully", self.mounts[0]) # Creating files on client side for dir2 for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 -l 2 -n 2 -f 20 %s/dir2" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Reset a brick g.log.info('Reset of brick using start') brick_reset = choice(bricks_list) ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "start") # Check if the brick is offline g.log.info("Check the brick status if it is offline") offline_bricks = get_offline_bricks_list(self.mnode, self.volname) self.assertEqual(offline_bricks[0], brick_reset, "Brick not offline") g.log.info("Expected : Brick is offline") # Reset brick with dest same as source with force while running IO's g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "commit", brick_reset, force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Validating IO's and waiting to complete self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Deleting dir2 cmd = ('rm -rf %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to delete directory2") g.log.info("Directory 2 deleted successfully for %s", self.mounts[0]) del self.all_mounts_procs[:] # Creating dir1 cmd = ('mkdir %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create directory1") g.log.info("Directory 1 created successfully for %s", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = str(start) + ".." + str(end) file_range = str(start) + ".." + str(end) # Create dir 1-5 at mountpoint. cmd = ('mkdir %s/dir1/dir{%s};' % (mount_obj.mountpoint, dir_range)) g.run(mount_obj.client_system, cmd) # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) g.run(mount_obj.client_system, cmd) # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 clients = [] for mount_obj in self.mounts: clients.append(mount_obj.client_system) cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; ' 'done;' % mount_obj.mountpoint) g.run_parallel(clients, cmd) # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) # Create softlink and hardlink of files in mountpoint. Start is an # offset to be added to dirname to act on diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start + 1))) g.run(mount_obj.client_system, cmd) start += 5 # Delete op for deleting all file in one of the dirs. start is being # used as offset like in previous testcase in dir1 start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do rm -f $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) g.run(mount_obj.client_system, cmd) start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) g.run(mount_obj.client_system, cmd) start += 5 end += 5 # Create tiny, small, medium nd large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) g.run(mount_obj.client_system, cmd) offset += 1 # Creating files on client side for dir1 for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 -l 2 -n 2 -f 20 %s/dir1" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validating IO's and waiting to complete self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") self.io_validation_complete = True # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Get areequal before killing the brick g.log.info('Getting areequal before killing of brick...') ret, result_before_killing_brick = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting areequal before killing of brick ' 'is successful') # Reset a brick g.log.info('Reset of brick using start') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "start") # Check if the brick is offline g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]]) self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick by giving a different source and dst brick g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "commit", bricks_list[1]) self.assertNotEqual(ret, 0, "Not Expected: Reset brick is successfull") g.log.info("Expected : Source and Destination brick must be same for" " reset") # Reset brick with destination same as source g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0], "commit", bricks_list[0]) self.assertNotEqual(ret, 0, "Not Expected : Reset brick is successful") g.log.info("Expected : Reset brick failed,Vol id is same use force") # Obtain hostname of node ret, hostname_node1, _ = g.run(self.mnode, "hostname") self.assertEqual(ret, 0, ("Failed to obtain hostname of node %s", self.mnode)) g.log.info("Obtained hostname of client. IP- %s, hostname- %s", self.mnode, hostname_node1.strip()) # Reset brick with destination same as source with force using hostname g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(hostname_node1.strip(), self.volname, bricks_list[0], "commit", bricks_list[0], force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Bring down other bricks to max redundancy # Get List of bricks to bring offline # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[1:3]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') sleep(2) # Check if 4 bricks are online all_bricks = [] all_bricks = [ bricks_list[0], bricks_list[3], bricks_list[4], bricks_list[5] ] ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Check mount point cmd = 'ls -lrt /mnt' ret, _, _ = g.run(self.mounts[0].client_system, cmd) g.log.info("Client mount point details ") # Get arequal after bringing down bricks g.log.info('Getting arequal after bringing down bricks...') ret, result_offline_redundant_brick1 = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks offline ' 'is successful') # Bring bricks online list_of_bricks_to_bring_online = bricks_list[1:3] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Reset brick without bringing down brick g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1], "commit", bricks_list[1]) self.assertNotEqual(ret, 0, "Not Expected: Reset brick passed") g.log.info("Expected : Brick reset failed as source brick must be" " stopped") # Kill the brick manually ret = bring_bricks_offline(self.volname, [bricks_list[1]]) self.assertTrue(ret, 'Brick not offline') g.log.info('Brick is offline successfully') # Check if the brick is offline g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]]) self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick with dest same as source after killing brick manually g.log.info('Reset of brick by giving different src and dst brick') ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1], "commit", bricks_list[1], force="true") self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("Expected : Bricks are online") # Check if bricks are online all_bricks = get_all_bricks(self.mnode, self.volname) ret = are_bricks_online(self.mnode, self.volname, all_bricks) self.assertTrue(ret, 'All bricks are not online') g.log.info('All bricks are online') # Bring down other bricks to max redundancy # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[2:4]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Check mount point cmd = 'ls -lrt /mnt' ret, _, _ = g.run(self.mounts[0].client_system, cmd) g.log.info("Client mount point details") # Get arequal after bringing down bricks g.log.info('Getting arequal after bringing down redundant bricks...') ret, result_offline_redundant_brick2 = (collect_mounts_arequal( self.mounts[0])) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks offline ' 'is successful') # Bring bricks online list_of_bricks_to_bring_online = bricks_list[2:4] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Removing brick from backend brick = bricks_list[0].strip().split(":") cmd = "rm -rf %s" % brick[1] ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, "Failed to delete brick %s" % bricks_list[0]) g.log.info("Removed brick %s sucessfully", bricks_list[0]) # Check if the brick is offline count = 0 while count <= 20: g.log.info("Check the brick status if it is offline") ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]]) if ret: break sleep(2) count = +1 self.assertTrue(ret, "Brick is not offline") g.log.info("Expected : Brick is offline") # Reset brick with destination same as source g.log.info('Reset of brick with same src and dst brick') ret, _, _ = reset_brick(hostname_node1.strip(), self.volname, bricks_list[0], "commit", bricks_list[0]) self.assertEqual(ret, 0, "Not Expected: Reset brick failed") g.log.info("Expected : Reset brick is successful") # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Comparing arequals self.assertEqual( result_before_killing_brick, result_offline_redundant_brick1, 'Arequals are not equals before killing brick' 'processes and after offlining redundant bricks') g.log.info('Arequals are equals before killing brick' 'processes and after offlining redundant bricks') # Comparing arequals self.assertEqual( result_offline_redundant_brick2, result_offline_redundant_brick1, 'Arequals are not equals for offlining redundant' ' bricks') g.log.info('Arequals are equals for offlining redundant bricks') # Deleting dir1 cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to delete directory1") g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
def test_fops_ec_brickdown(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """ - 1.Start resource consumption tool - 2.Create directory dir1 - 3.Create 5 dir and 5 files in each dir in directory 1 - 4.Rename all file inside dir1 - 5.Truncate at any dir in mountpoint inside dir1 - 6.Create softlink and hardlink of files in mountpoint - 7.chmod, chown, chgrp inside dir1 - 8.Create tiny, small, medium nd large file - 9.Creating files on client side for dir1 - 10.Brick redundant bricks down - 11.Validating IO's and waiting to complete - 12.Creating dir2 - 13.Creating files on client side for dir2 - 14.Bring bricks online - 15.Wait for brick to come online - 16.Check if bricks are online - 17.Monitor heal completion - 18.Validating IO's and waiting to complete """ # Starting resource consumption using top log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' cmd = ('for i in {1..100};do top -n 1 -b|egrep \ "RES|gluster" & free -h 2>&1 >> %s ; \ sleep 10;done' % (log_file_mem_monitor)) g.log.info(cmd) for server in self.servers: g.run_async(server, cmd) bricks_list = [] # get the bricks from the volume g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Brick list is empty") g.log.info("Brick List : %s", bricks_list) # Creating dir1 cmd = ('mkdir %s/dir1' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create dir1") g.log.info("dir1 created successfully for %s", self.mounts[0]) # Create 5 dir and 5 files in each dir at mountpoint on dir1 start, end = 1, 5 for mount_obj in self.mounts: # Number of dir and files to be created. dir_range = ("%s..%s" % (str(start), str(end))) file_range = ("%s..%s" % (str(start), str(end))) # Create dir 1-5 at mountpoint. cmd = ('mkdir %s/dir1/dir{%s};' % (mount_obj.mountpoint, dir_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Directory creation failed") g.log.info("Directory created successfull") # Create files inside each dir. cmd = ('touch %s/dir1/dir{%s}/file{%s};' % (mount_obj.mountpoint, dir_range, file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "File creation failed") g.log.info("File created successfull") # Increment counter so that at next client dir and files are made # with diff offset. Like at next client dir will be named # dir6, dir7...dir10. Same with files. start += 5 end += 5 # Rename all files inside dir1 at mountpoint on dir1 cmd = ('cd %s/dir1/dir1/; ' 'for FILENAME in *;' 'do mv $FILENAME Unix_$FILENAME; ' 'done;' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to rename file on" "client") g.log.info("Successfully renamed file on client") # Truncate at any dir in mountpoint inside dir1 # start is an offset to be added to dirname to act on # diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s/; ' 'for FILENAME in *;' 'do echo > $FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Truncate failed") g.log.info("Truncate of files successfull") # Create softlink and hardlink of files in mountpoint. Start is an # offset to be added to dirname to act on diff files at diff clients. start = 1 for mount_obj in self.mounts: cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln -s $FILENAME softlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Softlinks have failed") g.log.info("Softlink of files have been changed successfully") cmd = ('cd %s/dir1/dir%s; ' 'for FILENAME in *; ' 'do ln $FILENAME hardlink_$FILENAME; ' 'done;' % (mount_obj.mountpoint, str(start + 1))) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Creating Hardlinks have failed") g.log.info("Hardlink of files have been changed successfully") start += 5 # chmod, chown, chgrp inside dir1 # start and end used as offset to access diff files # at diff clients. start, end = 2, 5 for mount_obj in self.mounts: dir_file_range = '%s..%s' % (str(start), str(end)) cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing mode of files has failed") g.log.info("Mode of files have been changed successfully") cmd = ('chown root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing owner of files has failed") g.log.info("Owner of files have been changed successfully") cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' % (mount_obj.mountpoint, dir_file_range, dir_file_range)) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Changing group of files has failed") g.log.info("Group of files have been changed successfully") start += 5 end += 5 # Create tiny, small, medium nd large file # at mountpoint. Offset to differ filenames # at diff clients. offset = 1 for mount_obj in self.mounts: cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for tiny files failed") g.log.info("Fallocate for tiny files successfully") cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for small files failed") g.log.info("Fallocate for small files successfully") cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for medium files failed") g.log.info("Fallocate for medium files successfully") cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, "Fallocate for large files failed") g.log.info("Fallocate for large files successfully") offset += 1 # Creating files on client side for dir1 # Write IO all_mounts_procs = [] count = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s/dir1" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring down other bricks to max redundancy # Bringing bricks offline ret = bring_bricks_offline(self.volname, bricks_list[2:4]) self.assertTrue(ret, 'Bricks not offline') g.log.info('Bricks are offline successfully') # Validating IO's and waiting to complete g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Creating dir2 cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to create dir2 ") g.log.info("dir2 created successfully for %s", self.mounts[0]) # Creating files on client side for dir2 # Write IO all_mounts_procs = [] count = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s/dir2" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Bring bricks online list_of_bricks_to_bring_online = bricks_list[2:4] ret = bring_bricks_online(self.mnode, self.volname, list_of_bricks_to_bring_online) self.assertTrue(ret, 'Bricks not brought online') g.log.info('Bricks are online successfully') # Wait for brick to come online g.log.info("Waiting for brick to come online") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Bricks are not online") g.log.info("EXPECTED : Bricks are online") # Check if bricks are online ret = get_offline_bricks_list(self.mnode, self.volname) self.assertListEqual(ret, [], 'All bricks are not online') g.log.info('All bricks are online') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') g.log.info('Heal has completed successfully') # Validating IO's and waiting to complete g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Check file exist for memory log g.log.info("Validating log exists") ret = file_exists(self.mnode, '/var/log/glusterfs/mem_usage.log') self.assertTrue(ret, "Memory log file does not exist") g.log.info("Memory log file exists")