def test_rebalance_start_when_glusterd_down(self): # Expanding volume by adding bricks to the volume g.log.info("Start adding bricks to volume") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Volume %s: Expand failed", self.volname)) g.log.info("Volume %s: Expand success", self.volname) # Form a new list of servers without mnode in it to prevent mnode # from glusterd failure nodes = self.servers[:] nodes.remove(self.mnode) # Stop glusterd on a server self.random_server = random.choice(nodes) g.log.info("Stop glusterd on server %s", self.random_server) ret = stop_glusterd(self.random_server) self.assertTrue(ret, ("Server %s: Failed to stop glusterd", self.random_server)) g.log.info("Server %s: Stopped glusterd", self.random_server) # Start Rebalance g.log.info("Starting rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance", self.volname)) g.log.info("Volume %s: Rebalance start success", self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertFalse(ret, ("Volume %s: Rebalance is completed", self.volname)) g.log.info("Rebalance failed on one or more nodes. Check rebalance " "status for more details")
def _add_brick_rebalance(self): """Create files,Perform Add brick and wait for rebalance to complete""" # Create files on mount point using dd command cmd = ('cd %s;for i in {1..100000};' 'do dd if=/dev/urandom bs=1024 count=1 of=file$i;done;' % (self.mounts[0].mountpoint)) ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to createfiles on mountpoint") g.log.info("Successfully created files on mountpoint") # Add brick to volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1200) self.assertTrue(ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed")
def test_add_brick_rebalance_files_with_holes(self): """ Test case: 1. Create a volume, start it and mount it using fuse. 2. On the volume root, create files with holes. 3. After the file creation is complete, add bricks to the volume. 4. Trigger rebalance on the volume. 5. Wait for rebalance to complete. """ # On the volume root, create files with holes cmd = ("cd %s;for i in {1..5000}; do dd if=/dev/urandom" " of=file_with_holes$i bs=1M count=1 seek=100M; done" % self.mounts[0].mountpoint) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "Failed to create files with holes") # After the file creation is complete, add bricks to the volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance on the volume ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual( ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=9000) self.assertTrue( ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed")
def test_induce_holes_thenfixlayout(self): # pylint: disable=too-many-statements m_point = self.mounts[0].mountpoint command = 'mkdir -p ' + m_point + '/testdir' ret, _, _ = g.run(self.clients[0], command) self.assertEqual(ret, 0, "mkdir failed") g.log.info("mkdir is successful") # DHT Layout validation g.log.debug("Verifying hash layout values %s:%s", self.clients[0], self.mounts[0].mountpoint) ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED") g.log.info("LAYOUT_IS_COMPLETE: PASS") # Log Volume Info and Status before shrinking the volume. g.log.info("Logging volume info and Status before shrinking volume") log_volume_info_and_status(self.mnode, self.volname) # Shrinking volume by removing bricks g.log.info("Start removing bricks from volume") ret, _, _ = remove_brick(self.mnode, self.volname, self.remove_brick_list, "force") self.assertFalse(ret, "Remove-brick with force: FAIL") g.log.info("Remove-brick with force: PASS") # Check the layout ret = is_layout_complete(self.mnode, self.volname, dirpath='/testdir') self.assertFalse(ret, "Volume %s: Layout is complete") g.log.info("Volume %s: Layout has some holes") # Start Rebalance fix-layout g.log.info("Volume %s: Start fix-layout", self.volname) ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True) self.assertEqual(ret, 0, ("Volume %s: fix-layout start failed" "%s", self.volname)) g.log.info("Volume %s: fix-layout start success", self.volname) # Wait for fix-layout to complete g.log.info("Waiting for fix-layout to complete") ret = wait_for_fix_layout_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s: Fix-layout is either failed or " "in-progress", self.volname)) g.log.info("Volume %s: Fix-layout completed successfully", self.volname) # DHT Layout validation g.log.debug("Verifying hash layout values %s:%s", self.clients[0], self.mounts[0].mountpoint) ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED") g.log.info("LAYOUT_IS_COMPLETE: PASS")
def test_rebalance_while_remove_brick_in_progress(self): """ - Create directories and files on the mount point. - now remove one of the brick from the volume gluster volume remove-brick <vol> <brick> start - immediately start rebalance on the same volume gluster volume rebalance <vol> start """ # pylint: disable=too-many-statements # DHT Layout validation for mount in self.mounts: g.log.debug('Check DHT values %s:%s', mount.client_system, mount.mountpoint) ret = validate_files_in_dir(self.clients[0], mount.mountpoint, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "TEST_LAYOUT_IS_COMPLETE: FAILED") g.log.info("TEST_LAYOUT_IS_COMPLETE: PASS") # Log Volume Info and Status before shrinking the volume. g.log.info("Logging volume info and Status before shrinking volume") log_volume_info_and_status(self.mnode, self.volname) g.log.info("Successful in logging volume info and status of volume " "%s", self.volname) # Form bricks list for Shrinking volume self.remove_brick_list = form_bricks_list_to_remove_brick( self.mnode, self.volname, subvol_name=1) self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to " "form bricks list for " "shrink", self.volname)) g.log.info("Volume %s: Formed bricks list for shrink", self.volname) # Shrink volume by removing bricks with option start g.log.info("Start removing bricks for %s", self.volname) ret, _, _ = remove_brick(self.mnode, self.volname, self.remove_brick_list, "start") self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed", self.volname)) g.log.info("Volume %s: Remove-brick start success ", self.volname) # Log remove-brick status g.log.info("Logging Remove-brick status") ret, out, err = remove_brick(self.mnode, self.volname, self.remove_brick_list, "status") self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed", self.volname)) g.log.info("Volume %s: Remove-brick status", self.volname) g.log.info(out) # Start rebalance while volume shrink in-progress g.log.info("Volume %s: Start rebalance while volume shrink is " "in-progress") _, _, err = rebalance_start(self.mnode, self.volname) self.assertIn("Either commit or stop the remove-brick task.", err, "Rebalance started successfully while volume shrink" " is in-progress") g.log.info("Failed to start rebalance while volume shrink is " "in progress <EXPECTED>")
def test_rebalance_with_add_brick_and_lookup(self): """ Rebalance with add brick and then lookup on mount - Create a Distributed-Replicated volume. - Create deep dirs(200) and 100 files on the deepest directory. - Expand volume. - Initiate rebalance - Once rebalance is completed, do a lookup on mount and time it. """ # Create Deep dirs. cmd = ( "cd %s/; for i in {1..200};do mkdir dir${i}; cd dir${i};" " if [ ${i} -eq 100 ]; then for j in {1..100}; do touch file${j};" " done; fi; done;" % (self.mounts[0].mountpoint)) ret, _, _ = g.run(self.clients[0], cmd) self.assertEqual(ret, 0, "Failed to create the deep dirs and files") g.log.info("Deep dirs and files created.") # Expand the volume. ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) g.log.info("Expanding volume is successful on " "volume %s", self.volname) # Start Rebalance. ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=500) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Do a lookup on the mountpoint and note the time taken to run. # The time used for comparison is taken as a benchmark on using a # RHGS 3.5.2 for this TC. For 3.5.2, the time takes came out to be # 4 seconds. Now the condition for subtest to pass is for the lookup # should not be more than 10% of this value, i.e. 4.4 seconds. cmd = ("ls -R %s/" % (self.mounts[0].mountpoint)) start_time = time() ret, _, _ = g.run(self.clients[0], cmd) end_time = time() self.assertEqual(ret, 0, "Failed to do a lookup") time_taken = end_time - start_time # ToDo: Implement a better approach to get benchmark value # self.assertTrue(time_taken <= 4.4, "Lookup takes more time " # "than the previously benchmarked value.") g.log.info("Lookup took : %d seconds", time_taken)
def test_rebalance_start_when_glusterd_down(self): # Expanding volume by adding bricks to the volume g.log.info("Start adding bricks to volume") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Volume %s: Expand failed", self.volname)) g.log.info("Volume %s: Expand success", self.volname) # Get all servers IP addresses which are part of volume ret = get_all_bricks(self.mnode, self.volname) list_of_servers_used = [] for brick in ret: list_of_servers_used.append(brick.split(":")[0]) self.assertTrue(ret, ("Failed to get server IP list for volume %s", self.volname)) g.log.info("Succesfully got server IP list for volume %s", self.volname) # Form a new list of servers without mnode in it to prevent mnode # from glusterd failure for element in list_of_servers_used: if element == self.mnode: list_of_servers_used.remove(element) # Stop glusterd on a server self.random_server = choice(list_of_servers_used) g.log.info("Stop glusterd on server %s", self.random_server) ret = stop_glusterd(self.random_server) self.assertTrue(ret, ("Server %s: Failed to stop glusterd", self.random_server)) g.log.info("Server %s: Stopped glusterd", self.random_server) # Start Rebalance g.log.info("Starting rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance", self.volname)) g.log.info("Volume %s: Rebalance start success", self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertFalse(ret, ("Volume %s: Rebalance is completed", self.volname)) g.log.info("Expected: Rebalance failed on one or more nodes." " Check rebalance status for more details") error_msg1 = "\"fix layout on / failed\"" error_msg2 = "\"Transport endpoint is not connected\"" ret, _, _ = g.run(self.mnode, "grep -w %s /var/log/glusterfs/" "%s-rebalance.log| grep -w %s" % (error_msg1, self.volname, error_msg2)) self.assertEqual(ret, 0, ("Unexpected : Rebalance failed on volume %s" "not because of glusterd down on a node", self.volname)) g.log.info("\n\nRebalance failed on volume %s due to glusterd down on" "one of the nodes\n\n", self.volname)
def test_add_brick_rebalance_with_acl_set_to_files(self): """ Test case: 1. Create a volume, start it and mount it to a client. 2. Create 10 files on the mount point and set acls on the files. 3. Check the acl value and collect arequal-checksum. 4. Add bricks to the volume and start rebalance. 5. Check the value of acl(it should be same as step 3), collect and compare arequal-checksum with the one collected in step 3 """ # Create 10 files on the mount point. cmd = ("cd {}; for i in `seq 1 10`;do touch file$i;done".format( self.mount_point)) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "Failed to create files on mount point") for number in range(1, 11): ret = set_acl(self.first_client, 'u:joker:rwx', '{}/file{}'.format(self.mount_point, str(number))) self.assertTrue(ret, "Failed to set acl on files") # Collect arequal on mount point and check acl value arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) self._check_acl_set_to_files() g.log.info("Files created and acl set to files properly") # Add brick to volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) self.assertEqual( ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1200) self.assertTrue( ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed") # Check acl value if it's same as before rebalance self._check_acl_set_to_files() # Check for data loss by comparing arequal before and after ops arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) self.assertEqual(arequal_checksum_before, arequal_checksum_after, "arequal checksum is NOT MATCHNG") g.log.info("arequal checksum and acl value are SAME")
def test_rebalance_start_status_stop(self): # Getting arequal checksum before rebalance start g.log.info("Getting arequal before rebalance start") arequal_checksum_before_rebalance_start = collect_mounts_arequal( self.mounts) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual( ret, 0, ("Volume %s: Failed to start rebalance", self.volname)) g.log.info("Volume %s: Rebalance started ", self.volname) # Stop on-going rebalance g.log.info("Stop rebalance on the volume") ret, _, _ = rebalance_stop(self.mnode, self.volname) self.assertEqual(ret, 0, ("Volume %s: Failed to stop rebalance", self.volname)) g.log.info("Checking whether the migration is stopped or not") # Wait till the on-going file migration completes on all servers count = 0 while count < 80: rebalance_count = 0 for server in self.servers: ret, _, _ = g.run(server, "pgrep rebalance") if ret != 0: rebalance_count += 1 if rebalance_count == len(self.servers): break sleep(2) count += 1 g.log.info("Volume %s: Rebalance process is not running on servers", self.volname) # List all files and dirs from mount point g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) g.log.info("Listing all files and directories is successful") # Getting arequal checksum after the rebalance is stopped g.log.info("Getting arequal checksum after the rebalance is stopped") arequal_checksum_after_rebalance_stop = collect_mounts_arequal( self.mounts) # Comparing arequals checksum before start of rebalance and # after the rebalance is stopped g.log.info("Comparing arequals checksum before start of rebalance and" "after the rebalance is stopped") self.assertEqual(arequal_checksum_before_rebalance_start, arequal_checksum_after_rebalance_stop, "arequal checksum is NOT MATCHNG") g.log.info("arequal checksum is SAME")
def test_rebalance_multiple_expansions(self): """ Test case: 1. Create a volume, start it and mount it 2. Create some file on mountpoint 3. Collect arequal checksum on mount point pre-rebalance 4. Do the following 3 times: 5. Expand the volume 6. Start rebalance and wait for it to finish 7. Collect arequal checksum on mount point post-rebalance and compare with value from step 3 """ # Create some file on mountpoint cmd = ("cd %s; for i in {1..500} ; do " "dd if=/dev/urandom of=file$i bs=10M count=1; done" % self.mounts[0].mountpoint) ret, _, _ = g.run(self.first_client, cmd) self.assertEqual(ret, 0, "IO failed on volume %s" % self.volname) # Collect arequal checksum before rebalance arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) for _ in range(3): # Add brick to volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) self.assertEqual( ret, 0, "Failed to start rebalance on " "volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1200) self.assertTrue( ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed") # Collect arequal checksum after rebalance arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) # Check for data loss by comparing arequal before and after # rebalance self.assertEqual(arequal_checksum_before, arequal_checksum_after, "arequal checksum is NOT MATCHNG") g.log.info("arequal checksum is SAME")
def test_add_brick_replace_brick_fix_layout(self): """ Test case: 1. Create a volume, start it and mount it. 2. Create files and dirs on the mount point. 3. Add bricks to the volume. 4. Replace 2 old bricks to the volume. 5. Trigger rebalance fix layout and wait for it to complete. 6. Check layout on all the bricks through trusted.glusterfs.dht. """ # Create directories with some files on mount point cmd = ("cd %s; for i in {1..10}; do mkdir dir$i; for j in {1..5};" " do dd if=/dev/urandom of=dir$i/file$j bs=1M count=1; done;" " done" % self.mounts[0].mountpoint) ret, _, _ = g.run(self.first_client, cmd) self.assertFalse(ret, "Failed to create dirs and files.") # Orginal brick list before add brick brick_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(brick_list, "Empty present brick list") # Add bricks to the volume add_brick_list = form_bricks_list_to_add_brick(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertIsNotNone(add_brick_list, "Empty add brick list") ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list) self.assertFalse(ret, "Failed to add bricks to the volume") g.log.info("Successfully added bricks to the volume") # Replace 2 old bricks to the volume for _ in range(0, 2): brick = choice(brick_list) self._replace_a_old_added_brick(brick) brick_list.remove(brick) # Start rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True) self.assertFalse(ret, "Failed to start rebalance on volume") ret = wait_for_fix_layout_to_complete(self.mnode, self.volname, timeout=800) self.assertTrue(ret, "Rebalance failed on volume") # Check layout on all the bricks through trusted.glusterfs.dht self._check_trusted_glusterfs_dht_on_all_bricks()
def _start_rebalance_and_wait(self): """Start rebalance and wait""" # Start Rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname)
def _rebalance_completion(self, volume_name): """Rebalance start and completion after expansion.""" ret, _, err = rebalance_ops.rebalance_start( 'auto_get_gluster_endpoint', volume_name) self.assertFalse( ret, "Rebalance for {} volume not started with error {}".format( volume_name, err)) for w in waiter.Waiter(240, 10): reb_status = rebalance_ops.get_rebalance_status( 'auto_get_gluster_endpoint', volume_name) if reb_status["aggregate"]["statusStr"] == "completed": break if w.expired: raise AssertionError( "Failed to complete the rebalance in 240 seconds")
def _trigger_rebalance_and_wait(self, rebal_force=False): """Start rebalance with or without force and wait""" # Trigger rebalance on volume ret, _, _ = rebalance_start(self.mnode, self.volname, force=rebal_force) self.assertEqual( ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1200) self.assertTrue( ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed")
def _expand_volume_and_wait_for_rebalance_to_complete(self): """Expand volume and wait for rebalance to complete""" # Add brick to volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname) # Trigger rebalance and wait for it to complete ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=6000) self.assertTrue(ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed")
def _expand_volume_and_verify_rebalance(self): """ Expands the volume, trigger rebalance and verify file is copied""" # Expand the volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to expand the volume") # Trigger rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual( ret, 0, "Failed to start rebalance on the volume %s" % self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1200) self.assertTrue( ret, "Rebalance is not yet complete on the volume " "%s" % self.volname) g.log.info("Rebalance successfully completed")
def test_spurious_rebalance(self): """ In this test case: 1. Trusted storage Pool of 3 nodes 2. Create a distributed volumes with 3 bricks 3. Start the volume 4. Fuse mount the gluster volume on out of trusted nodes 5. Remove a brick from the volume 6. Check remove-brick status 7. Stop the remove brick process 8. Perform fix-layoyt on the volume 9. Get the rebalance fix-layout status 10. Create a directory from mount point 11. Check trusted.glusterfs.dht extended attribue for newly created directory on the remove brick """ # pylint: disable=too-many-statements my_servers = self.servers[0:3] my_server_info = {} for server in self.servers[0:3]: my_server_info[server] = self.all_servers_info[server] for index in range(1, 3): ret, _, _ = peer_probe(self.servers[0], self.servers[index]) self.assertEqual(ret, 0, ("peer probe from %s to %s is failed", self.servers[0], self.servers[index])) g.log.info("peer probe is success from %s to " "%s", self.servers[0], self.servers[index]) self.volname = "testvol" bricks_list = form_bricks_list(self.mnode, self.volname, 3, my_servers, my_server_info) g.log.info("Creating a volume %s ", self.volname) ret, _, _ = volume_create(self.mnode, self.volname, bricks_list, force=False) self.assertEqual(ret, 0, ("Unable" "to create volume %s" % self.volname)) g.log.info("Volume created successfully %s", self.volname) ret, _, _ = volume_start(self.mnode, self.volname, False) self.assertEqual(ret, 0, ("Failed to start the " "volume %s", self.volname)) g.log.info("Get all the bricks of the volume") bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Failed to get the brick list") g.log.info("Successfully got the list of bricks of volume") # Mounting a volume ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, mpoint=self.mounts[0].mountpoint, mserver=self.mnode, mclient=self.mounts[0].client_system) self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname) g.log.info("Volume mounted successfully : %s", self.volname) remove_brick_list = [] remove_brick_list.append(bricks_list[2]) ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list, 'start') self.assertEqual(ret, 0, "Failed to start remove brick operation") g.log.info("Remove bricks operation started successfully") ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list, 'stop') self.assertEqual(ret, 0, "Failed to stop remove brick operation") g.log.info("Remove bricks operation stopped successfully") g.log.info("Starting Fix-layoyt on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname, True) self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout" "on the volume %s", self.volname)) g.log.info("Successfully started fix-layout on the volume %s", self.volname) # Wait for fix-layout to complete g.log.info("Waiting for fix-layout to complete") ret = wait_for_fix_layout_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Fix-layout is not yet complete on the volume " "%s", self.volname)) g.log.info("Fix-layout is successfully complete on the volume %s", self.volname) ret = mkdir(self.mounts[0].client_system, "%s/dir1" % self.mounts[0].mountpoint) self.assertTrue(ret, ("Failed to create directory dir1")) g.log.info("directory dir1 is created successfully") brick_server, brick_dir = remove_brick_list[0].split(':') folder_name = brick_dir + "/dir1" g.log.info("Check trusted.glusterfs.dht on host %s for directory %s", brick_server, folder_name) ret = get_fattr(brick_server, folder_name, 'trusted.glusterfs.dht') self.assertTrue( ret, ("Failed to get trusted.glusterfs.dht for %s" % folder_name)) g.log.info("get trusted.glusterfs.dht xattr for %s successfully", folder_name)
def test_expanding_volume_when_io_in_progress(self): """Test expanding volume (Increase distribution) using existing servers bricks when IO is in progress. Description: - add bricks - starts rebalance - wait for rebalance to complete - validate IO """ # Log Volume Info and Status before expanding the volume. g.log.info("Logging volume info and Status before expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) g.log.info("Successful in logging volume info and status of volume %s", self.volname) # Expanding volume by adding bricks to the volume when IO in progress g.log.info("Start adding bricks to volume when IO in progress") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume when IO in " "progress on volume %s", self.volname)) g.log.info( "Expanding volume when IO in progress is successful on " "volume %s", self.volname) # Wait for gluster processes to come online time.sleep(30) # Log Volume Info and Status after expanding the volume g.log.info("Logging volume info and Status after expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) g.log.info("Successful in logging volume info and status of volume %s", self.volname) # Verify volume's all process are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume %s : All process are not online", self.volname)) g.log.info("Volume %s : All process are online", self.volname) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Log Rebalance status g.log.info("Log Rebalance status") _, _, _ = rebalance_status(self.mnode, self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Check Rebalance status after rebalance is complete g.log.info("Checking Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to get rebalance status for the " "volume %s", self.volname)) g.log.info("Successfully got rebalance status of the volume %s", self.volname) # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.io_validation_complete = True self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful")
def test_rebalance_with_quota_enabled(self): """ Test rebalance with quota enabled on root. 1. Create Volume of type distribute 2. Set Quota limit on the root directory 3. Do some IO to reach the Hard limit 4. After IO ends, compute arequal checksum 5. Add bricks to the volume. 6. Start rebalance 7. After rebalance is completed, check arequal checksum """ # Enable Quota ret, _, _ = quota_enable(self.mnode, self.volname) self.assertEqual( ret, 0, ("Failed to enable quota on the volume %s", self.volname)) g.log.info("Successfully enabled quota on volume %s", self.volname) # Set the Quota timeouts to 0 for strict accounting ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0) self.assertEqual( ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname)) ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0) self.assertEqual( ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname)) g.log.info("Quota soft and hard timeout has been set to 0 for %s", self.volname) # Set the quota limit of 1 GB on root dir of the volume ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "1GB") self.assertEqual(ret, 0, "Failed to set Quota for dir root") g.log.info("Successfully set quota limit for dir root") # Do some IO until hard limit is reached. cmd = ("/usr/bin/env python %s create_files " "-f 1024 --fixed-file-size 1M --base-file-name file %s" % (self.script_upload_path, self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) self.all_mounts_procs.append(proc) # Wait for IO to complete and validate IO self.assertTrue( wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]), "IO failed on some of the clients") g.log.info("IO completed on the clients") # Validate quota ret = quota_validate(self.mnode, self.volname, path='/', hard_limit=1073741824, sl_exceeded=True, hl_exceeded=True) self.assertTrue(ret, "Quota validate Failed for '/'") g.log.info("Quota Validated for path '/'") # Compute arequal checksum. arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts) # Log Volume info and status before expanding volume. log_volume_info_and_status(self.mnode, self.volname) # Expand the volume. ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) g.log.info("Expanding volume is successful on " "volume %s", self.volname) # Log volume info and status after expanding volume. log_volume_info_and_status(self.mnode, self.volname) # Perform rebalance start operation. ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Rebalance started.") # Check rebalance is in progress rebalance_status = get_rebalance_status(self.mnode, self.volname) ret = rebalance_status['aggregate']['statusStr'] self.assertEqual(ret, "in progress", ("Rebalance is not in " "'in progress' state, either " "rebalance is in completed state" " or failed to get rebalance " "status")) g.log.info("Rebalance is 'in progress' state") # Wait till rebalance ends. ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Validate quota ret = quota_validate(self.mnode, self.volname, path='/', hard_limit=1073741824, sl_exceeded=True, hl_exceeded=True) self.assertTrue(ret, "Quota validate Failed for '/'") g.log.info("Quota Validated for path '/'") # Compute arequal checksum. arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts) # Comparing arequals checksum before and after rebalance. self.assertEqual(arequal_checksum_before_rebalance, arequal_checksum_after_rebalance, "arequal checksum is NOT MATCHING") g.log.info("arequal checksum is SAME")
def test_rebalance_with_hidden_files(self): # pylint: disable=too-many-statements # Start IO on mounts g.log.info("Starting IO on all mounts...") self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("python %s create_files " "--base-file-name . " "-f 99 %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) # validate IO self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Verify DHT values across mount points for mount_obj in self.mounts: g.log.debug("Verifying hash layout values %s:%s", mount_obj.client_system, mount_obj.mountpoint) ret = validate_files_in_dir(mount_obj.client_system, mount_obj.mountpoint, test_type=FILE_ON_HASHED_BRICKS, file_type=FILETYPE_FILES) self.assertTrue( ret, "Expected - Files are created on only " "sub-volume according to its hashed value") g.log.info("Hash layout values are verified %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Getting areequal checksum before rebalance g.log.info("Getting areequal checksum before rebalance") arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts) # Log Volume Info and Status before expanding the volume. g.log.info("Logging volume info and Status before expanding volume") log_volume_info_and_status(self.mnode, self.volname) # Expanding volume by adding bricks to the volume g.log.info("Start adding bricks to volume") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) g.log.info("Expanding volume is successful on " "volume %s", self.volname) # Wait for gluster processes to come online g.log.info("Wait for gluster processes to come online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) g.log.info( "Successful in waiting for volume %s processes to be " "online", self.volname) # Verify volume's all process are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume %s : All process are not online ", self.volname)) g.log.info("Volume %s : All process are online", self.volname) # Log Volume Info and Status after expanding the volume g.log.info("Logging volume info and Status after expanding volume") log_volume_info_and_status(self.mnode, self.volname) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Checking if there are any migration failures status = get_rebalance_status(self.mnode, self.volname) for each_node in status['node']: failed_files_count = int(each_node['failures']) self.assertEqual( failed_files_count, 0, "Rebalance failed to migrate few files on %s" % each_node['nodeName']) g.log.info("There are no migration failures") # Getting areequal checksum after rebalance g.log.info("Getting areequal checksum after rebalance") arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts) # Comparing arequals checksum before and after rebalance g.log.info("Comparing arequals checksum before and after rebalance") self.assertEqual(arequal_checksum_before_rebalance, arequal_checksum_after_rebalance, "arequal checksum is NOT MATCHNG") g.log.info("arequal checksum is SAME")
def test_brick_removal_with_quota(self): """ Test Brick removal with quota in place 1. Create Volume of type distribute 2. Set Quota limit on the directory 3. Do some IO to reach the Hard limit 4. After IO ends, remove bricks 5. Quota validation should succeed. """ # Enable Quota ret, _, _ = quota_enable(self.mnode, self.volname) self.assertEqual( ret, 0, ("Failed to enable quota on the volume 5s", self.volname)) g.log.info("Successfully enabled quota on volume %s", self.volname) # Set the Quota timeouts to 0 for strict accounting ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0) self.assertEqual( ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname)) ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0) self.assertEqual( ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname)) g.log.info("Quota soft and hard timeout has been set to 0 for %s", self.volname) # Set the quota limit of 100 MB on root dir of the volume ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "100MB") self.assertEqual(ret, 0, "Failed to set Quota for dir root") g.log.info("Successfully set quota limit for dir root") # Do some IO until hard limit is reached. cmd = ("/usr/bin/env python %s create_files " "-f 100 --fixed-file-size 1M --base-file-name file %s" % (self.script_upload_path, self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) self.all_mounts_procs.append(proc) # Wait for IO to complete and validate IO self.assertTrue( wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]), "IO failed on some of the clients") g.log.info("IO completed on the clients") # Validate quota ret = quota_validate(self.mnode, self.volname, path='/', hard_limit=104857600, sl_exceeded=True, hl_exceeded=True) self.assertTrue(ret, "Quota validate Failed for '/'") g.log.info("Quota Validated for path '/'") # Log Volume info and status before shrinking volume. log_volume_info_and_status(self.mnode, self.volname) # Shrink the volume. ret = shrink_volume(self.mnode, self.volname) self.assertTrue(ret, ("Failed to shrink volume on " "volume %s", self.volname)) g.log.info("Shrinking volume is successful on " "volume %s", self.volname) # Log volume info and status after shrinking volume. log_volume_info_and_status(self.mnode, self.volname) # Perform rebalance start operation. ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Rebalance started.") # Wait till rebalance ends. ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Validate quota ret = quota_validate(self.mnode, self.volname, path='/', hard_limit=104857600, sl_exceeded=True, hl_exceeded=True) self.assertTrue(ret, "Quota validate Failed for '/'") g.log.info("Quota Validated for path '/'")
def test_stack_overflow(self): """ Description: Tests to check that there is no stack overflow in readdirp with parallel-readdir enabled. Steps : 1) Create a volume. 2) Mount the volume using FUSE. 3) Enable performance.parallel-readdir and performance.readdir-ahead on the volume. 4) Create 10000 files on the mount point. 5) Add-brick to the volume. 6) Perform fix-layout on the volume (not rebalance). 7) From client node, rename all the files, this will result in creation of linkto files on the newly added brick. 8) Do ls -l (lookup) on the mount-point. """ # pylint: disable=too-many-statements # Enable performance.parallel-readdir and # performance.readdir-ahead on the volume options = {"performance.parallel-readdir": "enable", "performance.readdir-ahead": "enable"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, "Failed to set volume options") g.log.info("Successfully set volume options") # Creating 10000 files on volume root m_point = self.mounts[0].mountpoint command = 'touch ' + m_point + '/file{1..10000}_0' ret, _, _ = g.run(self.clients[0], command) self.assertEqual(ret, 0, "File creation failed on %s" % m_point) g.log.info("Files successfully created on the mount point") # Add bricks to the volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) g.log.info("Expanding volume is successful on " "volume %s", self.volname) # Perform fix-layout on the volume ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True) self.assertEqual(ret, 0, 'Failed to start rebalance') g.log.info('Rebalance is started') # Wait for fix-layout to complete ret = wait_for_fix_layout_to_complete(self.mnode, self.volname, timeout=3000) self.assertTrue(ret, ("Fix-layout failed on volume %s", self.volname)) g.log.info("Fix-layout is successful on " "volume %s", self.volname) # Rename all files from client node for i in range(1, 10000): ret = move_file(self.clients[0], '{}/file{}_0'.format(m_point, i), '{}/file{}_1'.format(m_point, i)) self.assertTrue(ret, "Failed to rename files") g.log.info("Files renamed successfully") # Perform lookup from the mount-point cmd = "ls -lR " + m_point ret, _, _ = g.run(self.mounts[0].client_system, cmd) self.assertEqual(ret, 0, "Failed to lookup") g.log.info("Lookup successful")
def test_data_self_heal_algorithm_diff_heal_command(self): """ Test Volume Option - 'cluster.data-self-heal-algorithm' : 'diff' Description: - set the volume option "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" "data-self-heal-algorithm": "diff" "self-heal-daemon": "off" - create IO - calculate arequal - bring down all bricks processes from selected set - modify the data - get arequal before getting bricks online - bring bricks online - expand volume by adding bricks to the volume - do rebalance - set the volume option "self-heal-daemon": "on" and check for daemons - start healing - check if heal is completed - check for split-brain - calculate arequal and compare with arequal before bringing bricks offline and after bringing bricks online """ # pylint: disable=too-many-branches,too-many-statements # Setting options g.log.info('Setting options...') options = { "metadata-self-heal": "off", "entry-self-heal": "off", "data-self-heal": "off", "data-self-heal-algorithm": "diff" } ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Options " "'metadata-self-heal', " "'entry-self-heal', " "'data-self-heal', " "'self-heal-daemon' " "are set to 'off'," "'data-self-heal-algorithm' " "is set to 'diff' successfully") # Creating files on client side all_mounts_procs = [] g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Creating files command = "/usr/bin/env python %s create_files -f 100 %s" % ( self.script_upload_path, self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients") # Setting options g.log.info('Setting options...') options = {"self-heal-daemon": "off"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = list( filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] + bricks_to_bring_offline_dict['cold_tier_bricks'] + bricks_to_bring_offline_dict['volume_bricks']))) # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Modify the data all_mounts_procs = [] g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) command = ("/usr/bin/env python %s create_files -f 100 " "--fixed-file-size 1M %s" % (self.script_upload_path, self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients") # Get arequal before getting bricks online g.log.info('Getting arequal before getting bricks online...') ret, result_before_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks online ' 'is successful') # Bring brick online g.log.info('Bringing bricks %s online...', bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Expand volume by adding bricks to the volume g.log.info("Start adding bricks to volume...") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume when IO in " "progress on volume %s", self.volname)) g.log.info("Expanding volume is successful on volume %s", self.volname) # Do rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, 'Failed to start rebalance') g.log.info('Rebalance is started') ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, 'Rebalance is not completed') g.log.info('Rebalance is completed successfully') # Setting options g.log.info('Setting options...') options = {"self-heal-daemon": "on"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") # Wait for self-heal-daemons to be online g.log.info("Waiting for self-heal-daemons to be online") ret = is_shd_daemonized(self.all_servers) self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") # Start healing ret = trigger_heal(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not started') g.log.info('Healing is started') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal after getting bricks online g.log.info('Getting arequal after getting bricks online...') ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal after getting bricks online ' 'is successful') # Checking arequals before bringing bricks offline # and after bringing bricks online self.assertItemsEqual(result_before_online, result_after_online, 'Checksums are not equal') g.log.info('Checksums are equal')
def test_expanding_volume_when_io_in_progress(self): # pylint: disable=too-many-statements # Log Volume Info and Status before expanding the volume. g.log.info("Logging volume info and Status before expanding volume") log_volume_info_and_status(self.mnode, self.volname) # Expanding volume by adding bricks to the volume when IO in progress g.log.info("Start adding bricks to volume when IO in progress") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume while IO in " "progress on volume %s", self.volname)) g.log.info( "Expanding volume while IO in progress on " "volume %s : Success", self.volname) # Wait for gluster processes to come online g.log.info("Wait for gluster processes to come online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) g.log.info("Waiting for volume %s process to be online", self.volname) # Log Volume Info and Status after expanding the volume g.log.info("Logging volume info and Status after expanding volume") log_volume_info_and_status(self.mnode, self.volname) # Verify volume's all process are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume %s : All process are not online", self.volname)) g.log.info("Volume %s : All process are online", self.volname) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Started rebalance on the volume %s: Success", self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname, timeout=1800) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance status on volume %s: Complete", self.volname) # Check Rebalance status after rebalance is complete g.log.info("Checking Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to get rebalance status for the " "volume %s", self.volname)) g.log.info("Rebalance status on volume %s: Complete", self.volname) # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.io_validation_complete = True self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO on all mounts: Complete") # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") g.log.info("List all files and directories: Success") # DHT Layout validation g.log.debug("Verifying hash layout values %s:%s", self.clients[0], self.mounts[0].mountpoint) ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED") g.log.info("LAYOUT_IS_COMPLETE: PASS") # Checking if there are any migration failures status = get_rebalance_status(self.mnode, self.volname) for each_node in status['node']: self.assertEqual( 0, int(each_node['failures']), "Rebalance failed to migrate few files on %s" % each_node['nodeName']) g.log.info("No migration failures on %s", each_node['nodeName'])
def test_rebalance_with_brick_down(self): """ Rebalance with brick down in replica - Create a Replica volume. - Bring down one of the brick down in the replica pair - Do some IO and create files on the mount point - Add a pair of bricks to the volume - Initiate rebalance - Bring back the brick which was down. - After self heal happens, all the files should be present. """ # Log the volume info and status before brick is down. log_volume_info_and_status(self.mnode, self.volname) # Bring one fo the bricks offline brick_list = get_all_bricks(self.mnode, self.volname) ret = bring_bricks_offline(self.volname, choice(brick_list)) # Log the volume info and status after brick is down. log_volume_info_and_status(self.mnode, self.volname) # Create files at mountpoint. cmd = ( "/usr/bin/env python %s create_files " "-f 2000 --fixed-file-size 1k --base-file-name file %s" % (self.script_upload_path, self.mounts[0].mountpoint)) proc = g.run_async( self.mounts[0].client_system, cmd, user=self.mounts[0].user) self.all_mounts_procs.append(proc) # Wait for IO to complete. self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]), "IO failed on some of the clients") g.log.info("IO completed on the clients") # Compute the arequal checksum before bringing all bricks online arequal_before_all_bricks_online = collect_mounts_arequal(self.mounts) # Log the volume info and status before expanding volume. log_volume_info_and_status(self.mnode, self.volname) # Expand the volume. ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) g.log.info("Expanding volume is successful on " "volume %s", self.volname) # Log the voluem info after expanding volume. log_volume_info_and_status(self.mnode, self.volname) # Start Rebalance. ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Log the voluem info and status before bringing all bricks online log_volume_info_and_status(self.mnode, self.volname) # Bring all bricks online. ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Not able to start volume with force option") g.log.info("Volume start with force option successful.") # Log the volume info and status after bringing all beicks online log_volume_info_and_status(self.mnode, self.volname) # Monitor heal completion. ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, "heal has not yet completed") g.log.info("Self heal completed") # Compute the arequal checksum after all bricks online. arequal_after_all_bricks_online = collect_mounts_arequal(self.mounts) # Comparing arequal checksum before and after the operations. self.assertEqual(arequal_before_all_bricks_online, arequal_after_all_bricks_online, "arequal checksum is NOT MATCHING") g.log.info("arequal checksum is SAME")
def test_remove_brick_no_commit_followed_by_rebalance(self): """ Description: Tests to check that there is no data loss when remove-brick operation is stopped and then new bricks are added to the volume. Steps : 1) Create a volume. 2) Mount the volume using FUSE. 3) Create files and dirs on the mount-point. 4) Calculate the arequal-checksum on the mount-point 5) Start remove-brick operation on the volume. 6) While migration is in progress, stop the remove-brick operation. 7) Add-bricks to the volume and trigger rebalance. 8) Wait for rebalance to complete. 9) Calculate the arequal-checksum on the mount-point. """ # Start IO on mounts m_point = self.mounts[0].mountpoint cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 " "--num-of-files 50 --file-type empty-file %s" % (self.script_upload_path, m_point)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) g.log.info("IO on %s:%s is started successfully", self.mounts[0].client_system, m_point) # Validate IO self.assertTrue(validate_io_procs([proc], self.mounts[0]), "IO failed on some of the clients") # Calculate arequal-checksum before starting remove-brick ret, arequal_before = collect_mounts_arequal(self.mounts[0]) self.assertTrue(ret, "Collecting arequal-checksum failed") # Form bricks list for volume shrink remove_brick_list = form_bricks_list_to_remove_brick(self.mnode, self.volname, subvol_name=1) self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to " "form bricks list for " "shrink", self.volname)) g.log.info("Volume %s: Formed bricks list for shrink", self.volname) # Shrink volume by removing bricks ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list, "start") self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname)) g.log.info("Volume %s shrink started ", self.volname) # Log remove-brick status ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list, "status") self.assertEqual(ret, 0, ("Remove-brick status failed on %s ", self.volname)) # Check if migration is in progress if r'in progress' in out: # Stop remove-brick process g.log.info("Stop removing bricks from volume") ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list, "stop") self.assertEqual(ret, 0, "Failed to stop remove-brick process") g.log.info("Stopped remove-brick process successfully") else: g.log.error("Migration for remove-brick is complete") # Sleep for 30 secs so that any running remove-brick process stops sleep(30) # Add bricks to the volume ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname)) g.log.info("Volume %s: Add-brick successful", self.volname) # Tigger rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual( ret, 0, ("Volume %s: Failed to start rebalance", self.volname)) g.log.info("Volume %s: Rebalance started ", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, "Rebalance has not completed") g.log.info("Rebalance has completed successfully") # Calculate arequal-checksum on mount-point ret, arequal_after = collect_mounts_arequal(self.mounts[0]) self.assertTrue(ret, "Collecting arequal-checksum failed") # Check if there is any data loss self.assertEqual(set(arequal_before), set(arequal_after), ("There is data loss")) g.log.info("The checksum before and after rebalance is same." " There is no data loss.")
def test_subdir_with_addbrick(self): # pylint: disable=too-many-statements """ Mount the volume Create 2 subdir on mount point, subdir1 and subdir2 Auth allow - Client1(subdir1,subdir2),Client2(subdir1,subdir2) Mount the subdir1 on client 1 and subdir2 on client2 Start IO's on both subdirs Perform add-brick and rebalance """ # Create directories subdir1 and subdir2 on mount point ret = mkdir(self.mounts[0].client_system, "%s/subdir1" % self.mounts[0].mountpoint) self.assertTrue( ret, ("Failed to create directory 'subdir1' on" "volume %s from client %s" % (self.mounts[0].volname, self.mounts[0].client_system))) ret = mkdir(self.mounts[0].client_system, "%s/subdir2" % self.mounts[0].mountpoint) self.assertTrue( ret, ("Failed to create directory 'subdir2' on" "volume %s from client %s" % (self.mounts[0].volname, self.mounts[0].client_system))) # unmount volume ret = self.unmount_volume(self.mounts) self.assertTrue(ret, "Volumes Unmount failed") g.log.info("Volumes Unmounted successfully") # Set authentication on the subdirectory subdir1 # and subdir2 to access by 2 clients g.log.info( 'Setting authentication on subdir1 and subdir2' 'for client %s and %s', self.clients[0], self.clients[0]) ret = set_auth_allow( self.volname, self.mnode, { '/subdir1': [self.clients[0], self.clients[1]], '/subdir2': [self.clients[0], self.clients[1]] }) self.assertTrue( ret, 'Failed to set Authentication on volume %s' % self.volume) # Creating mount list for subdirectories self.subdir_mounts = [ copy.deepcopy(self.mounts[0]), copy.deepcopy(self.mounts[1]) ] self.subdir_mounts[0].volname = "%s/subdir1" % self.volname self.subdir_mounts[1].volname = "%s/subdir2" % self.volname # Mount Subdirectory "subdir1" on client 1 and "subdir2" on client 2 for mount_obj in self.subdir_mounts: ret = mount_obj.mount() self.assertTrue( ret, ("Failed to mount %s on client" " %s" % (mount_obj.volname, mount_obj.client_system))) g.log.info("Successfully mounted %s on client %s", mount_obj.volname, mount_obj.client_system) g.log.info("Successfully mounted subdirectories on client1" "and clients 2") # Start IO on all mounts. all_mounts_procs = [] count = 1 for mount_obj in self.subdir_mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Validate IO g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.subdir_mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Get stat of all the files/dirs created. g.log.info("Get stat of all the files/dirs created.") ret = get_mounts_stat(self.subdir_mounts) self.assertTrue(ret, "Stat failed on some of the clients") g.log.info("Successfully got stat of all files/dirs created") # Start add-brick (subvolume-increase) g.log.info("Start adding bricks to volume when IO in progress") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume when IO in " "progress on volume %s", self.volname)) g.log.info( "Expanding volume when IO in progress is successful on " "volume %s", self.volname) # Log Volume Info and Status after expanding the volume g.log.info("Logging volume info and Status after expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) g.log.info("Successful in logging volume info and status of volume %s", self.volname) # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("All process for volume %s are not" "online", self.volname)) g.log.info("All volume %s processes are now online", self.volname) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) g.log.info("Successfully started rebalance on the volume %s", self.volname) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname, 600) self.assertTrue( ret, "Rebalance did not complete " "despite waiting for 10 minutes") g.log.info("Rebalance successfully completed on the volume %s", self.volname) # Again validate if subdirectories are still mounted post add-brick for mount_obj in self.subdir_mounts: ret = mount_obj.is_mounted() self.assertTrue( ret, ("Subdirectory %s is not mounted on client" " %s" % (mount_obj.volname, mount_obj.client_system))) g.log.info("Subdirectory %s is mounted on client %s", mount_obj.volname, mount_obj.client_system) g.log.info("Successfully validated that subdirectories are mounted" "on client1 and clients 2 post add-brick operation")
def test_status_string(self): ''' -> Create Volume -> Start rebalance -> Check task type in volume status -> Check task status string in volume status -> Check task type in volume status xml -> Check task status string in volume status xml -> Start Remove brick operation -> Check task type in volume status -> Check task status string in volume status -> Check task type in volume status xml -> Check task status string in volume status xml ''' # Start rebalance ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to start rebalance for volume %s" % self.volname) g.log.info("Rebalance started successfully on volume %s", self.volname) # Wait for rebalance to complete ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, "Rebalance failed for volume %s" % self.volname) g.log.info("Rebalance completed successfully on volume %s", self.volname) # Getting volume status after rebalance start ret, out, _ = volume_status(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to get volume status for volume %s" % self.volname) g.log.info("Volume status successful on volume %s", self.volname) status_list = out.splitlines() # Verifying task type from volume status for rebalance self.assertIn('Rebalance', status_list[len(status_list) - 4], "Incorrect task type found in volume status for %s" % self.volname) g.log.info("Correct task type found in volume status for %s", self.volname) # Verifying task status string in volume status for rebalance self.assertIn('completed', status_list[len(status_list) - 2], "Incorrect task status found in volume status for %s" % self.volname) g.log.info("Correct task status found in volume status for %s", self.volname) # Getting volume status --xml after rebalance start vol_status = get_volume_status(self.mnode, self.volname, options='tasks') # Verifying task type from volume status --xml for rebalance self.assertEqual('Rebalance', vol_status[self.volname]['task_status'][0]['type'], "Incorrect task type found in volume status xml " "for %s" % self.volname) g.log.info("Correct task type found in volume status xml for %s", self.volname) # Verifying task status string from volume status --xml for rebalance self.assertEqual( 'completed', vol_status[self.volname]['task_status'][0]['statusStr'], "Incorrect task status found in volume status " "xml for %s" % self.volname) g.log.info("Correct task status found in volume status xml %s", self.volname) # Getting sub vols subvol_dict = get_subvols(self.mnode, self.volname) subvol = subvol_dict['volume_subvols'][1] # Perform remove brick start ret, _, _ = remove_brick(self.mnode, self.volname, subvol, 'start', replica_count=3) self.assertEqual(ret, 0, "Failed to start remove brick operation " "for %s" % self.volname) g.log.info("Remove brick operation started successfully on volume %s", self.volname) # Getting volume status after remove brick start ret, out, _ = volume_status(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to get volume status for volume %s" % self.volname) g.log.info("Volume status successful on volume %s", self.volname) status_list = out.splitlines() # Verifying task type from volume status after remove brick start self.assertIn('Remove brick', status_list[len(status_list) - 8], "Incorrect task type found in volume status for " "%s" % self.volname) g.log.info("Correct task type found in volume status task for %s", self.volname) # Verifying task status string in volume status after remove # brick start ret = False remove_status = ['completed', 'in progress'] if (status_list[len(status_list) - 2].split(':')[1].strip() in remove_status): ret = True self.assertTrue(ret, "Incorrect task status found in volume status " "task for %s" % self.volname) g.log.info("Correct task status found in volume status task for %s", self.volname) # Getting volume status --xml after remove brick start vol_status = get_volume_status(self.mnode, self.volname, options='tasks') # Verifying task type from volume status --xml after # remove brick start self.assertEqual('Remove brick', vol_status[self.volname]['task_status'][0]['type'], "Incorrect task type found in volume status xml for " "%s" % self.volname) g.log.info("Correct task type found in volume status xml for %s", self.volname) # Verifying task status string from volume status --xml # after remove brick start ret = False if (vol_status[self.volname]['task_status'][0]['statusStr'] in remove_status): ret = True self.assertTrue(ret, "Incorrect task status found in volume status " "xml for %s" % self.volname) g.log.info("Correct task status found in volume status xml %s", self.volname)
def test_snap_rebalance(self): # pylint: disable=too-many-statements, too-many-locals """ Snapshot rebalance contains tests which verifies snapshot clone, creating snapshot and performing I/O on mountpoints Steps: 1. Create snapshot of a volume 2. Activate snapshot 3. Clone snapshot and Activate 4. Mount Cloned volume 5. Perform I/O on mount point 6. Calculate areequal for bricks and mountpoints 7. Add-brick more brick to cloned volume 8. Initiate Re-balance 9. validate areequal of bricks and mountpoints """ # Creating snapshot: g.log.info("Starting to Create snapshot") ret, _, _ = snap_create(self.mnode, self.volname, self.snap) self.assertEqual( ret, 0, ("Failed to create snapshot for volume %s" % self.volname)) g.log.info("Snapshot %s created successfully for volume %s", self.snap, self.volname) # Activating snapshot g.log.info("Starting to Activate Snapshot") ret, _, _ = snap_activate(self.mnode, self.snap) self.assertEqual(ret, 0, ("Failed to Activate snapshot %s" % self.snap)) g.log.info("Snapshot %s activated successfully", self.snap) # Creating a Clone of snapshot: g.log.info("creating Clone Snapshot") ret, _, _ = snap_clone(self.mnode, self.snap, self.clone) self.assertEqual(ret, 0, ("Failed to clone volume %s" % self.clone)) g.log.info("clone volume %s created successfully", self.clone) # Starting clone volume g.log.info("starting clone volume") ret, _, _ = volume_start(self.mnode, self.clone) self.assertEqual(ret, 0, "Failed to start %s" % self.clone) g.log.info("clone volume %s started successfully", self.clone) # Mounting a clone volume g.log.info("Mounting created clone volume") ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1, self.mnode, self.clients[0]) self.assertEqual(ret, 0, "clone Volume mount failed for %s" % self.clone) g.log.info("cloned volume %s mounted Successfully", self.clone) # Validate clone volume mounted or not g.log.info("Validate clone volume mounted or not") ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0], self.mount_type) self.assertTrue( ret, "Cloned Volume not mounted on mount point: %s" % self.mount1) g.log.info("Cloned Volume %s mounted on %s", self.clone, self.mount1) # write files to mountpoint g.log.info("Starting IO on %s mountpoint...", self.mount1) all_mounts_procs = [] cmd = ("/usr/bin/env python %s create_files " "-f 10 --base-file-name file %s" % (self.script_upload_path, self.mount1)) proc = g.run(self.clients[0], cmd) all_mounts_procs.append(proc) self.check_arequal() # expanding volume g.log.info("Starting to expand volume") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to expand volume %s" % self.clone) g.log.info("Expand volume successful") ret, _, _ = rebalance_start(self.mnode, self.clone) self.assertEqual(ret, 0, "Failed to start rebalance") g.log.info("Successfully started rebalance on the " "volume %s", self.clone) # Log Rebalance status g.log.info("Log Rebalance status") _, _, _ = rebalance_status(self.mnode, self.clone) # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.clone) self.assertTrue(ret, ("Rebalance is not yet complete " "on the volume %s", self.clone)) g.log.info("Rebalance is successfully complete on " "the volume %s", self.clone) # Check Rebalance status after rebalance is complete g.log.info("Checking Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.clone) self.assertEqual(ret, 0, ("Failed to get rebalance status for " "the volume %s", self.clone)) g.log.info("Successfully got rebalance status of the " "volume %s", self.clone) self.check_arequal()
def test_snapshot_while_rebalance(self): # pylint: disable=too-many-statements, missing-docstring # Start IO on all mounts. all_mounts_procs = [] count = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Validate IO g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Get stat of all the files/dirs created. g.log.info("Get stat of all the files/dirs created.") ret = get_mounts_stat(self.mounts) self.assertTrue(ret, "Stat failed on some of the clients") g.log.info("Successfully got stat of all files/dirs created") # Create one snapshot of volume using no-timestamp option cmd_str = ("gluster snapshot create %s %s %s" % ("snapy", self.volname, "no-timestamp")) ret, _, _ = g.run(self.mnode, cmd_str) self.assertEqual(ret, 0, ("Failed to create snapshot for %s" % self.volname)) g.log.info("Snapshot snapy created successfully " "for volume %s", self.volname) # Check for no of snaps using snap_list it should be 1 snap_list = get_snap_list(self.mnode) self.assertEqual( 1, len(snap_list), "Expected 1 snapshot " "found %s snapshots" % len(snap_list)) g.log.info("Successfully validated number of snaps.") # validate snap name self.assertIn("snapy", snap_list, " snap not found") g.log.info("Successfully validated names of snap") # get the bricks for the volume g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) g.log.info("Brick List : %s", bricks_list) # expanding volume g.log.info("Start adding bricks to volume %s", self.volname) ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to add bricks to " "volume %s " % self.volname)) g.log.info("Add brick successful") # Log Volume Info and Status after expanding the volume g.log.info("Logging volume info and Status after expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed " "on volume %s", self.volname)) g.log.info( "Successful in logging volume info and status " "of volume %s", self.volname) # Verify volume's all process are online for 60 sec g.log.info("Verifying volume's all process are online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname, 60) self.assertTrue(ret, ("Volume %s : All process are not " "online", self.volname)) g.log.info("Successfully Verified volume %s " "processes are online", self.volname) # Start Rebalance g.log.info("Starting Rebalance on the volume") ret, _, err = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on " "the volume %s with error %s" % (self.volname, err))) g.log.info("Successfully started rebalance on the " "volume %s", self.volname) # Log Rebalance status g.log.info("Log Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to log rebalance status") g.log.info("successfully logged rebalance status") # Create one snapshot of volume during rebalance cmd_str = ("gluster snapshot create %s %s %s" % ("snapy_rebal", self.volname, "no-timestamp")) ret, _, _ = g.run(self.mnode, cmd_str) self.assertNotEqual(ret, 0, ("successfully created 'snapy_rebal'" " for %s" % self.volname)) g.log.info("Snapshot 'snapy_rebal' not created as rebalance is in " "progress check log") # Check for no of snaps using snap_list it should be 1 snap_list = get_snap_list(self.mnode) self.assertEqual( 1, len(snap_list), "Expected 1 snapshot " "found %s snapshot" % len(snap_list)) g.log.info("Successfully validated number of snaps.") # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname) self.assertTrue(ret, ("Rebalance is not yet complete " "on the volume %s", self.volname)) g.log.info("Rebalance is successfully complete on " "the volume %s", self.volname) # Check Rebalance status after rebalance is complete g.log.info("Checking Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to get rebalance status for " "the volume %s", self.volname)) g.log.info("Successfully got rebalance status of the " "volume %s", self.volname) # Create one snapshot of volume post rebalance with same name cmd_str = ("gluster snapshot create %s %s %s" % ("snapy_rebal", self.volname, "no-timestamp")) ret, _, _ = g.run(self.mnode, cmd_str) self.assertEqual(ret, 0, ("Failed to create snapshot for %s" % self.volname)) g.log.info( "Snapshot snapy_rebal created successfully " "for volume %s", self.volname) # Check for no of snaps using snap_list it should be 2 snap_list = get_snap_list(self.mnode) self.assertEqual( 2, len(snap_list), "Expected 2 snapshots " "found %s snapshot" % len(snap_list)) g.log.info("Successfully validated number of snaps.") # validate snap name self.assertIn("snapy_rebal", snap_list, " snap not found") g.log.info("Successfully validated names of snap")