def test_volume_start_force(self): # get the brick list and create a volume num_of_bricks = len(self.servers) bricks_list = form_bricks_list(self.mnode, self.volname, num_of_bricks, self.servers, self.all_servers_info) ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) self.assertEqual(ret, 0, "Failed to create volume") # remove brick path in one node and try to start the volume with force # and without force index_of_node = random.randint(0, len(bricks_list) - 1) brick_node = bricks_list[index_of_node] node = brick_node.split(":")[0] brick_path = brick_node.split(":")[1] cmd = "rm -rf %s" % brick_path ret, _, _ = g.run(node, cmd) self.assertEqual(ret, 0, "Failed to delete the brick") g.log.info("Deleted the brick successfully") ret, _, _ = volume_start(self.mnode, self.volname) self.assertNotEqual(ret, 0, "Volume start succeeded") ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Volume start with force failed") # volume start force should not bring the brick online ret = are_bricks_online(self.mnode, self.volname, [bricks_list[index_of_node]]) self.assertFalse(ret, "Volume start force brought the bricks online") g.log.info("Volume start force didn't bring the brick online")
def _validate_brick_down_scenario(self, validate_heal=False, monitor_heal=False): """ Refactor of common steps across volume type for validating brick down scenario """ if validate_heal: # Wait for ample amount of IO to be written to file sleep(180) # Validate heal info shows o/p and exit in <8s self._validate_heal() # Force start volume and verify all process are online ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, 'Unable to force start volume') ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue( ret, 'Not able to confirm all process of volume are online') if monitor_heal: # Wait for IO to be written to file sleep(30) # Monitor heal and validate data was appended successfully to file ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Self heal is not completed post brick online')
def test_ec_quota_errors_on_brick_down(self): """ Steps: - Create and mount EC volume on two clients - Create two dirs on the mount and perform parallel IO from clients - Simulate disk full to validate EIO errors when no space is left - Remove simulation and apply different quota limits on two dirs - Bring down redundant bricks from the volume - Validate EDQUOTE error on reaching quota limit and extend quota to validate absence of EDQUOTE error - Reduce the quota limit and validate EDQUOTE error upon reaching quota - Remove quota limits, unmount and cleanup the volume """ self._perform_quota_ops_before_brick_down() # Bring redundant bricks offline subvols = get_subvols(self.mnode, self.volname) self.assertTrue(subvols.get('volume_subvols'), 'Not able to get ' 'subvols of the volume') self.offline_bricks = [] for subvol in subvols['volume_subvols']: self.offline_bricks.extend( sample(subvol, self.volume.get('voltype')['redundancy_count'])) ret = bring_bricks_offline(self.volname, self.offline_bricks) self.assertTrue(ret, 'Not able to bring redundant bricks offline') self._perform_quota_ops_after_brick_down() # Bring offline bricks online ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, 'Not able to bring offline bricks online') self.offline_bricks *= 0 g.log.info('Pass: Validating quota errors on brick down is successful')
def test_nfs_ganesha_export_after_vol_restart(self): """ Tests script to check nfs-ganesha volume gets exported after multiple volume restarts. """ for i in range(1, 6): g.log.info( "Testing nfs ganesha export after volume stop/start." "Count : %s", str(i)) # Stopping volume ret = volume_stop(self.mnode, self.volname) self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) # Waiting for few seconds for volume unexport. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_unexported( self.mnode, self.volname) self.assertTrue(ret, ("Failed to unexport volume %s after " "stopping volume" % self.volname)) # Starting volume ret = volume_start(self.mnode, self.volname) self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) # Waiting for few seconds for volume export. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_exported( self.mnode, self.volname) self.assertTrue(ret, ("Failed to export volume %s after " "starting volume" % self.volname))
def snap_restore_complete(mnode, volname, snapname): """stops the volume restore the snapshot and starts the volume Args: mnode (str): Node on which cmd has to be executed. volname (str): volume name snapname (str): snapshot name Returns: bool: True on success, False on failure Example: snap_restore_complete(mnode, testvol, testsnap) """ # Stopping volume before snap restore ret = volume_stop(mnode, volname) if not ret: g.log.error("Failed to stop volume %s before restoring snapshot " "%s in node %s" % (volname, snapname, mnode)) return False ret, _, _ = snap_restore(mnode, snapname) if ret != 0: g.log.error("snapshot restore cli execution failed") return False # Starting volume after snap restore ret = volume_start(mnode, volname) if not ret: g.log.error("Failed to start volume %s after restoring snapshot " "%s in node %s" % (volname, snapname, mnode)) return False return True
def test_volume_status_xml(self): # create a two node cluster ret = peer_probe_servers(self.servers[0], self.servers[1]) self.assertTrue( ret, "Peer probe failed to %s from %s" % (self.mnode, self.servers[1])) # create a distributed volume with single node number_of_bricks = 1 servers_info_from_single_node = {} servers_info_from_single_node[self.servers[0]] = self.all_servers_info[ self.servers[0]] bricks_list = form_bricks_list(self.mnode, self.volname, number_of_bricks, self.servers[0], servers_info_from_single_node) ret, _, _ = volume_create(self.servers[0], self.volname, bricks_list) self.assertEqual(ret, 0, "Volume creation failed") g.log.info("Volume %s created successfully", self.volname) # Get volume status ret, _, err = volume_status(self.servers[1], self.volname) self.assertNotEqual(ret, 0, ("Unexpected: volume status is success for" " %s, even though volume is not started " "yet" % self.volname)) self.assertIn("is not started", err, ("volume status exited with" " incorrect error message")) # Get volume status with --xml vol_status = get_volume_status(self.servers[1], self.volname) self.assertIsNone(vol_status, ("Unexpected: volume status --xml for %s" " is success even though the volume is" " not stared yet" % self.volname)) # start the volume ret, _, _ = volume_start(self.servers[1], self.volname) self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname) # Get volume status ret, _, _ = volume_status(self.servers[1], self.volname) self.assertEqual(ret, 0, ("Failed to get volume status for %s" % self.volname)) # Get volume status with --xml vol_status = get_volume_status(self.servers[1], self.volname) self.assertIsNotNone(vol_status, ("Failed to get volume " "status --xml for %s" % self.volname)) # Verify there are no crashes while executing gluster volume status status = True glusterd_log = (self._get_test_specific_glusterd_log( self.mnode).split("\n")) for line in glusterd_log: if ' E ' in glusterd_log: status = False g.log.info("Unexpected! Error found %s", line) self.assertTrue(status, "Error found in glusterd logs")
def tearDown(self): # start the volume, it should succeed ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Volume stop failed") # start glusterd on all servers ret = start_glusterd(self.servers) if not ret: raise ExecutionError("Failed to start glusterd on all servers") for server in self.servers: ret = wait_for_peers_to_connect(server, self.servers) if not ret: ret = peer_probe_servers(server, self.servers) if not ret: raise ExecutionError("Failed to peer probe all " "the servers") # clean up all volumes vol_list = get_volume_list(self.mnode) if vol_list is None: raise ExecutionError("Failed to get the volume list") for volume in vol_list: ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Unable to delete volume % s" % volume) g.log.info("Volume deleted successfully : %s", volume) self.get_super_method(self, 'tearDown')()
def setUp(self): """Setup Volume""" # Calling GlusterBaseClass setUp self.get_super_method(self, 'setUp')() # Fetching all the parameters for volume_create list_of_three_servers = [] server_info_for_three_nodes = {} for server in self.servers[0:3]: list_of_three_servers.append(server) server_info_for_three_nodes[server] = self.all_servers_info[server] bricks_list = form_bricks_list(self.mnode, self.volname, 3, list_of_three_servers, server_info_for_three_nodes) # Creating 2nd volume self.volname_2 = "test_volume" ret, _, _ = volume_create(self.mnode, self.volname_2, bricks_list) self.assertFalse(ret, "Volume creation failed") g.log.info("Volume %s created successfully", self.volname_2) ret, _, _ = volume_start(self.mnode, self.volname_2) if ret: raise ExecutionError("Failed to start volume {}".format( self.volname_2)) # Setup and mount the volume ret = self.setup_volume_and_mount_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to setup volume and mount it")
def test_nfs_ganesha_exportID_after_vol_restart(self): """ Tests script to check nfs-ganesha volume gets exported with same Export ID after multiple volume restarts. Steps: 1. Create and Export the Volume 2. Stop and Start the volume multiple times 3. Check for export ID Export ID should not change """ for i in range(1, 4): g.log.info( "Testing nfs ganesha exportID after volume stop and " "start.\n Count : %s", str(i)) # Stopping volume ret = volume_stop(self.mnode, self.volname) self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) g.log.info("Volume is stopped") # Waiting for few seconds for volume unexport. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_unexported( self.mnode, self.volname) self.assertTrue(ret, ("Failed to unexport volume %s after " "stopping volume" % self.volname)) g.log.info("Volume is unexported via ganesha") # Starting volume ret = volume_start(self.mnode, self.volname) self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) g.log.info("Volume is started") # Waiting for few seconds for volume export. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_exported( self.mnode, self.volname) self.assertTrue(ret, ("Failed to export volume %s after " "starting volume" % self.volname)) g.log.info("Volume is exported via ganesha") # Check for Export ID cmd = ("cat /run/gluster/shared_storage/nfs-ganesha/exports/" "export.*.conf | grep Export_Id | grep -Eo '[0-9]'") ret, out, _ = g.run(self.mnode, cmd) self.assertEqual( ret, 0, "Unable to get export ID of the volume %s" % self.volname) g.log.info("Successful in getting volume export ID: %s " % out) self.assertEqual( out.strip("\n"), "2", "Export ID changed after export and unexport " "of volume: %s" % out) g.log.info("Export ID of volume is same after export " "and export: %s" % out)
def test_default_log_level_of_cli(self): """ Test Case: 1) Create and start a volume 2) Run volume info command 3) Run volume status command 4) Run volume stop command 5) Run volume start command 6) Check the default log level of cli.log """ # Check volume info operation ret, _, _ = volume_info(self.mnode) self.assertEqual( ret, 0, "Failed to execute volume info" " command on node: %s" % self.mnode) g.log.info( "Successfully executed the volume info command on" " node: %s", self.mnode) # Check volume status operation ret, _, _ = volume_status(self.mnode) self.assertEqual( ret, 0, "Failed to execute volume status command" " on node: %s" % self.mnode) g.log.info( "Successfully executed the volume status command" " on node: %s", self.mnode) # Check volume stop operation ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual( ret, 0, "Failed to stop the volume %s on node: %s" % (self.volname, self.mnode)) g.log.info("Successfully stopped the volume %s on node: %s", self.volname, self.mnode) # Check volume start operation ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual( ret, 0, "Failed to start the volume %s on node: %s" % (self.volname, self.mnode)) g.log.info("Successfully started the volume %s on node: %s", self.volname, self.mnode) # Check the default log level of cli.log cmd = 'cat /var/log/glusterfs/cli.log | grep -F "] D [" | wc -l' ret, out, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, "Failed to execute the command") self.assertEqual( int(out), 0, "Unexpected: Default log level of " "cli.log is not INFO") g.log.info("Default log level of cli.log is INFO as expected")
def test_volume_create_start_stop_start(self): """Tests volume create, start, status, stop, start. Also Validates whether all the brick process are running after the start of the volume. """ # Verify volume processes are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" % self.volname)) g.log.info("Successfully Verified volume %s processes are online", self.volname) # Stop Volume ret, _, _ = volume_stop(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname) g.log.info("Successfully stopped volume %s", self.volname) # Start Volume ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname) g.log.info("Successfully started volume %s", self.volname) # Wait for volume processes to be online ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) # Log Volume Info and Status ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Failed to Log volume %s info and status", self.volname)) g.log.info("Successfully logged Volume %s Info and Status", self.volname) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" % self.volname)) g.log.info("Successfully verified volume %s processes are online", self.volname) # Log Volume Info and Status ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Failed to Log volume %s info and status", self.volname)) g.log.info("Successfully logged Volume %s Info and Status", self.volname) # Check if glusterd is running on all servers(expected: active) ret = is_glusterd_running(self.servers) self.assertEqual(ret, 0, "Glusterd is not running on all servers") g.log.info("Glusterd is running on all the servers")
def test_volume_absent_bricks(self): """ Test Case: 1) Create Volume 2) Remove any one Brick directory 3) Start Volume and compare the failure message 4) Check the gluster volume status nad compare the status message """ # Fetching the brick list brick_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(brick_list, "Failed to get the bricks in" " the volume") # Command for removing brick directory random_brick = random.choice(brick_list) node, brick_path = random_brick.split(r':') cmd = 'rm -rf ' + brick_path # Removing brick directory of one node ret, _, _ = g.run(node, cmd) self.assertEqual(ret, 0, "Failed to remove brick dir") g.log.info("Brick directory removed successfully") # Starting volume ret, _, err = volume_start(self.mnode, self.volname) self.assertNotEqual( ret, 0, "Unexpected: Volume started successfully " "even though brick directory is removed " "for %s" % self.volname) g.log.info("Expected: Failed to start volume %s", self.volname) # Checking volume start failed message msg = "Failed to find brick directory" self.assertIn( msg, err, "Expected message is %s but volume start " "command failed with this " "message %s" % (msg, err)) g.log.info("Volume start failed with correct error message %s", err) # Checking Volume status ret, _, err = volume_status(self.mnode, self.volname) self.assertNotEqual( ret, 0, "Success in getting volume status, volume " "status should fail when volume is in " "not started state ") g.log.info("Failed to get volume status which is expected") # Checking volume status message msg = ' '.join(['Volume', self.volname, 'is not started']) self.assertIn(msg, err, 'Incorrect error message for gluster vol ' 'status') g.log.info("Correct error message for volume status")
def _guster_volume_cleanup(self, vol_name): # Check brick status. Restart vol if bricks are offline openshift_ops.switch_oc_project(self._master, self._registry_project_name) brick_list = brick_libs.get_all_bricks("auto_get_gluster_endpoint", vol_name) self.assertIsNotNone(brick_list, "Failed to get brick list") check_bricks = brick_libs.are_bricks_online( "auto_get_gluster_endpoint", vol_name, brick_list) if not check_bricks: start_vol, _, _ = volume_ops.volume_start( "auto_get_gluster_endpoint", vol_name, force=True) self.assertFalse(start_vol, "Failed to start volume using force")
def tearDown(self): if self.offline_bricks: ret, _, _ = volume_start(self.mnode, self.volname, force=True) if ret: raise ExecutionError('Not able to force start volume to bring ' 'offline bricks online') if self.all_mount_procs: ret = wait_for_io_to_complete(self.all_mount_procs, self.mounts) if not ret: raise ExecutionError('Wait for IO completion failed') ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: raise ExecutionError('Failed to unmount and cleanup volume') self.get_super_method(self, 'tearDown')()
def test_volume_status_xml(self): # create a two node cluster ret = peer_probe_servers(self.servers[0], self.servers[1]) self.assertTrue( ret, "Peer probe failed to %s from %s" % (self.mnode, self.servers[1])) # create a distributed volume with single node number_of_bricks = 1 servers_info_from_single_node = {} servers_info_from_single_node[self.servers[0]] = self.all_servers_info[ self.servers[0]] bricks_list = form_bricks_list(self.mnode, self.volname, number_of_bricks, self.servers[0], servers_info_from_single_node) ret, _, _ = volume_create(self.servers[0], self.volname, bricks_list) self.assertEqual(ret, 0, "Volume creation failed") g.log.info("Volume %s created successfully", self.volname) # Get volume status ret, _, err = volume_status(self.servers[1], self.volname) self.assertNotEqual(ret, 0, ("Unexpected: volume status is success for" " %s, even though volume is not started " "yet" % self.volname)) self.assertIn("is not started", err, ("volume status exited with" " incorrect error message")) # Get volume status with --xml vol_status = get_volume_status(self.servers[1], self.volname) self.assertIsNone(vol_status, ("Unexpected: volume status --xml for %s" " is success even though the volume is" " not stared yet" % self.volname)) # start the volume ret, _, _ = volume_start(self.servers[1], self.volname) self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname) # Get volume status ret, _, _ = volume_status(self.servers[1], self.volname) self.assertEqual(ret, 0, ("Failed to get volume status for %s" % self.volname)) # Get volume status with --xml vol_status = get_volume_status(self.servers[1], self.volname) self.assertIsNotNone(vol_status, ("Failed to get volume " "status --xml for %s" % self.volname))
def test_brickmux_brick_process(self): """ 1. Create a 3 node cluster. 2. Set cluster.brick-multiplex to enable. 3. Create 15 volumes of type replica 1x3. 4. Start all the volumes one by one. 5. While the volumes are starting reboot one node. 6. check for pifof glusterfsd single process should be visible """ volume_config = { 'name': 'test', 'servers': self.all_servers[:3], 'voltype': { 'type': 'replicated', 'replica_count': 3, 'transport': 'tcp' } } servers = self.all_servers[:3] # Volume Creation ret = bulk_volume_creation(self.mnode, 14, self.all_servers_info, volume_config, is_create_only=True) self.assertTrue(ret, "Volume creation Failed") ret = set_volume_options(self.mnode, 'all', {'cluster.brick-multiplex': 'enable'}) self.assertTrue(ret) vol_list = get_volume_list(self.mnode) for volname in vol_list: if vol_list.index(volname) == 2: g.run(servers[2], "reboot") ret, out, _ = volume_start(self.mnode, volname) self.assertFalse(ret, "Failed to start volume '{}'".format(volname)) for _ in range(10): sleep(1) _, node_result = are_nodes_online(servers[2]) self.assertTrue(node_result, "Node is not Online") for server in servers: ret, out, _ = g.run(server, "pgrep glusterfsd") out = out.split() self.assertFalse(ret, "Failed to get 'glusterfsd' pid") self.assertEqual(len(out), 1, "More then 1 brick process seen in glusterfsd")
def _volume_operations_in_loop(self): """ Create, start, stop and delete 100 volumes in a loop """ # Create and start 100 volumes in a loop self.volume_config = { 'name': 'volume-', 'servers': self.servers, 'voltype': { 'type': 'distributed-replicated', 'dist_count': 2, 'replica_count': 3 }, } ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info, self.volume_config, "", False, True) self.assertTrue(ret, "Failed to create volumes") self.volume_present = True g.log.info("Successfully created all the volumes") # Start 100 volumes in loop for i in range(100): self.volname = "volume-%d" % i ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname) g.log.info("Successfully started all the volumes") # Stop 100 volumes in loop for i in range(100): self.volname = "volume-%d" % i ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to stop volume: %s" % self.volname) g.log.info("Successfully stopped all the volumes") # Delete 100 volumes in loop for i in range(100): self.volname = "volume-%d" % i ret = volume_delete(self.mnode, self.volname) self.assertTrue(ret, "Failed to delete volume: %s" % self.volname) self.volume_present = False g.log.info("Successfully deleted all the volumes")
def create_snap(value, volname, snap, clone, counter): # Creating snapshots g.log.info("Starting to Create snapshot") for snap_count in value: ret, _, _ = snap_create(self.mnode, volname, "snap%s" % snap_count) self.assertEqual(ret, 0, ("Failed to create " "snapshot for volume %s" % volname)) g.log.info( "Snapshot snap%s created successfully" " for volume %s", snap_count, volname) # Validate snapshot list g.log.info("Starting to list all snapshots") ret, out, _ = snap_list(self.mnode) self.assertEqual( ret, 0, ("Failed to list snapshot of volume %s" % volname)) v_list = out.strip().split('\n') self.assertEqual(len(v_list), counter, "Failed to validate " "all snapshots") g.log.info( "Snapshot listed and Validated for volume %s" " successfully", volname) if counter == 40: return 0 # Creating a Clone of snapshot: g.log.info("Starting to Clone Snapshot") ret, _, _ = snap_clone(self.mnode, snap, clone) self.assertEqual(ret, 0, "Failed to clone %s" % clone) g.log.info("Clone volume %s created successfully", clone) # Start cloned volumes g.log.info("starting to Validate clone volumes are started") ret, _, _ = volume_start(self.mnode, clone) self.assertEqual(ret, 0, "Failed to start %s" % clone) g.log.info("%s started successfully", clone) # log Cloned Volume information g.log.info("Logging Volume info and Volume status") ret = log_volume_info_and_status(self.mnode, clone) self.assertTrue("Failed to Log Info and Status of Volume %s" % clone) g.log.info("Successfully Logged Info and Status") return counter + 10
def restart_block_hosting_volume(gluster_pod, block_hosting_vol, sleep_time=120, hostname=None): """restars block hosting volume service Args: hostname (str): hostname on which gluster pod exists gluster_pod (podcmd | str): gluster pod class object has gluster pod and ocp master node or gluster pod name block_hosting_vol (str): name of block hosting volume """ gluster_pod = _get_gluster_pod(gluster_pod, hostname) gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol) if not gluster_volume_status: raise AssertionError("failed to get gluster volume status") g.log.info("Gluster volume %s status\n%s : " % (block_hosting_vol, gluster_volume_status)) ret, out, err = volume_stop(gluster_pod, block_hosting_vol) if ret != 0: err_msg = "failed to stop gluster volume %s on pod %s error: %s" % ( block_hosting_vol, gluster_pod, err) g.log.error(err_msg) raise AssertionError(err_msg) # Explicit wait to stop ios and pvc creation for 2 mins time.sleep(sleep_time) ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) if ret != 0: err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( block_hosting_vol, gluster_pod, err) g.log.error(err_msg) raise AssertionError(err_msg) ret, out, err = volume_status(gluster_pod, block_hosting_vol) if ret != 0: err_msg = ("failed to get status for gluster volume %s on pod %s " "error: %s" % (block_hosting_vol, gluster_pod, err)) g.log.error(err_msg) raise AssertionError(err_msg)
def _enable_xlator(self, xlator, parent, xtype, xsfail=False): self.verified_bricks = [] option = '{0}{1}.{2}'.format(xtype, '.xlator' if xtype == 'user' else '', xlator) self._set_and_assert_volume_option(option, parent) ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual(ret, 0, 'Unable to stop volume') sleep(self.timeout) ret, _, _ = volume_start(self.mnode, self.volname) if xsfail: self.assertNotEqual(ret, 0, 'Expected volume start to fail') return self.assertEqual(ret, 0, 'Unable to start a stopped volume') self._verify_position(xlator, parent, xtype) ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue( ret, 'Not all volume processes are online after ' 'starting a stopped volume') sleep(self.timeout)
def restart_brick_process(hostname, gluster_pod, block_hosting_vol): """restarts brick process of block hosting volumes Args: hostname (str): hostname on which gluster pod exists gluster_pod (podcmd | str): gluster pod class object has gluster pod and ocp master node or gluster pod name block_hosting_vol (str): block hosting volume name """ pids = get_brick_pids(gluster_pod, block_hosting_vol, hostname) # using count variable to limit the max pod process kill to 2 count = 0 killed_process = {} pid_keys = pids.keys() oc_pods = oc_get_pods(hostname) for pod in oc_pods.keys(): if not (oc_pods[pod]["ip"] in pid_keys and count <= 1): continue ret, out, err = oc_rsh(hostname, pod, "kill -9 %s" % pids[oc_pods[pod]["ip"]]) if ret != 0: err_msg = "failed to kill process id %s error: %s" % ( pids[oc_pods[pod]["ip"]], err) g.log.error(err_msg) raise AssertionError(err_msg) killed_process[pod] = pids[oc_pods[pod]["ip"]] count += 1 for pod, pid in killed_process.items(): wait_for_process_to_kill_on_pod(pod, pid, hostname) ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) if ret != 0: err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( block_hosting_vol, gluster_pod, err) g.log.error(err_msg) raise AssertionError(err_msg)
def bricks_online_and_volume_reset(cls): """ reset the volume if any bricks are offline. waits for all bricks to be online and resets volume options set """ bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname) if bricks_offline is not None: ret = volume_start(cls.mnode, cls.volname, force=True) if not ret: raise ExecutionError("Failed to force start volume" "%s" % cls.volname) ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname) if not ret: raise ExecutionError("Failed to bring bricks online" "for volume %s" % cls.volname) ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True) if ret: raise ExecutionError("Failed to reset volume %s" % cls.volname) g.log.info("Successful in volume reset %s", cls.volname)
def test_volume_create_start_stop_start(self): """Tests volume create, start, status, stop, start. Also Validates whether all the brick process are running after the start of the volume. """ # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume %s : All process are not online" % self.volname)) # Stop Volume ret, _, _ = volume_stop(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname) # Start Volume ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Unable to start volume %s" % self.volname) time.sleep(15) # Log Volume Info and Status ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue( ret, ("Logging volume %s info and status failed" % self.volname)) # Verify volume's all process are online ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue( ret, ("Volume %s : All process are not online" % self.volname)) # Log Volume Info and Status ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue( ret, ("Logging volume %s info and status failed" % self.volname)) # Verify all glusterd's are running ret = is_glusterd_running(self.servers) self.assertEqual( ret, 0, ("glusterd not running on all servers: %s" % self.servers))
def restart_file_volume(file_vol, sleep_time=120): """Restart file volume (stop and start volume). Args: file_vol (str): name of a file volume """ gluster_volume_status = get_volume_status( "auto_get_gluster_endpoint", file_vol) if not gluster_volume_status: raise AssertionError("failed to get gluster volume status") g.log.info("Gluster volume %s status\n%s : " % ( file_vol, gluster_volume_status) ) ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol) if ret != 0: err_msg = "Failed to stop gluster volume %s. error: %s" % ( file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) # Explicit wait to stop ios and pvc creation for 2 mins time.sleep(sleep_time) ret, out, err = volume_start( "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: err_msg = "failed to start gluster volume %s error: %s" % ( file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol) if ret != 0: err_msg = ("Failed to get status for gluster volume %s error: %s" % ( file_vol, err)) g.log.error(err_msg) raise AssertionError(err_msg)
def restart_file_volume(file_vol, sleep_time=120): """Restars file volume service. Args: file_vol (str): name of a file volume """ gluster_volume_status = get_volume_status( "auto_get_gluster_endpoint", file_vol) if not gluster_volume_status: raise AssertionError("failed to get gluster volume status") g.log.info("Gluster volume %s status\n%s : " % ( file_vol, gluster_volume_status) ) ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol) if ret != 0: err_msg = "Failed to stop gluster volume %s. error: %s" % ( file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) # Explicit wait to stop ios and pvc creation for 2 mins time.sleep(sleep_time) ret, out, err = volume_start( "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: err_msg = "failed to start gluster volume %s error: %s" % ( file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol) if ret != 0: err_msg = ("Failed to get status for gluster volume %s error: %s" % ( file_vol, err)) g.log.error(err_msg) raise AssertionError(err_msg)
def tearDown(self): for number in range(1, 4): # Starting volumes. self.volume['name'] = ("test_volume_%s" % number) self.volname = ("test_volume_%s" % number) ret, _, _ = volume_start(self.mnode, self.volname) g.log.info("Volume %s started was successfully", self.volname) # Cleaning up volumes. ret = cleanup_volume(self.mnode, self.volname) if not ret: raise ExecutionError("Failed to cleanup %s" % self.volname) g.log.info("Successfully cleaned volume: %s", self.volname) # Setting cluster.brick-multiplex to disable. ret = set_volume_options(self.mnode, 'all', {'cluster.brick-multiplex': 'disable'}) if not ret: raise ExecutionError("Failed to disable cluster.brick-multiplex") g.log.info("Successfully set cluster.brick-multiplex to disable.") self.get_super_method(self, 'tearDown')()
def test_brick_process_not_started_on_read_only_node_disks(self): """ * create volume and start * kill one brick * start IO * unmount the brick directory from node * remount the brick directory with read-only option * start the volume with "force" option * check for error 'posix: initializing translator failed' in log file * remount the brick directory with read-write option * start the volume with "force" option * validate IO """ # pylint: disable=too-many-locals,too-many-statements # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = filter( None, (bricks_to_bring_offline_dict['hot_tier_bricks'] + bricks_to_bring_offline_dict['cold_tier_bricks'] + bricks_to_bring_offline_dict['volume_bricks'])) # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue( ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Creating files for all volumes for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("python %s create_files -f 100 %s/%s/test_dir" % (self.script_upload_path, mount_obj.mountpoint, mount_obj.client_system)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) # umount brick brick_node, volume_brick = bricks_to_bring_offline[0].split(':') node_brick = '/'.join(volume_brick.split('/')[0:3]) g.log.info('Start umount brick %s...', node_brick) ret, _, _ = g.run(brick_node, 'umount -l %s' % node_brick) self.assertFalse(ret, 'Failed to umount brick %s' % node_brick) g.log.info('Successfully umounted %s', node_brick) # get time before remount the directory and checking logs for error g.log.info('Getting time before remount the directory and ' 'checking logs for error...') _, time_before_checking_logs, _ = g.run(brick_node, 'date -u +%s') g.log.info('Time before remount the directory and checking logs - %s', time_before_checking_logs) # remount the directory with read-only option g.log.info('Start remount brick %s with read-only option...', node_brick) ret, _, _ = g.run(brick_node, 'mount -o ro %s' % node_brick) self.assertFalse(ret, 'Failed to remount brick %s' % node_brick) g.log.info('Successfully remounted %s with read-only option', node_brick) # start volume with "force" option g.log.info('starting volume with "force" option...') ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertFalse( ret, 'Failed to start volume %s with "force" option' % self.volname) g.log.info('Successfully started volume %s with "force" option', self.volname) # check logs for an 'initializing translator failed' error g.log.info( "Checking logs for an 'initializing translator failed' " "error for %s brick...", node_brick) error_msg = 'posix: initializing translator failed' cmd = ("cat /var/log/glusterfs/bricks/%s-%s-%s.log | " "grep '%s'" % (volume_brick.split('/')[-3], volume_brick.split('/')[-2], volume_brick.split('/')[-1], error_msg)) ret, log_msgs, _ = g.run(brick_node, cmd) log_msg = log_msgs.rstrip().split('\n')[-1] self.assertTrue(error_msg in log_msg, 'No errors in logs') g.log.info('EXPECTED: %s', error_msg) # get time from log message log_time_msg = log_msg.split('E')[0][1:-2].split('.')[0] log_time_msg_converted = calendar.timegm( time.strptime(log_time_msg, '%Y-%m-%d %H:%M:%S')) g.log.info('Time_msg from logs - %s ', log_time_msg) g.log.info('Time from logs - %s ', log_time_msg_converted) # get time after remount the directory checking logs for error g.log.info('Getting time after remount the directory and ' 'checking logs for error...') _, time_after_checking_logs, _ = g.run(brick_node, 'date -u +%s') g.log.info('Time after remount the directory and checking logs - %s', time_after_checking_logs) # check time periods g.log.info('Checking if an error is in right time period...') self.assertTrue( int(time_before_checking_logs) <= int(log_time_msg_converted) <= int(time_after_checking_logs), 'Expected error is not in right time period') g.log.info('Expected error is in right time period') # umount brick g.log.info('Start umount brick %s...', node_brick) ret, _, _ = g.run(brick_node, 'umount -l %s' % node_brick) self.assertFalse(ret, 'Failed to umount brick %s' % node_brick) g.log.info('Successfully umounted %s', node_brick) # remount the directory with read-write option g.log.info('Start remount brick %s with read-write option...', node_brick) ret, _, _ = g.run(brick_node, 'mount %s' % node_brick) self.assertFalse(ret, 'Failed to remount brick %s' % node_brick) g.log.info('Successfully remounted %s with read-write option', node_brick) # start volume with "force" option g.log.info('starting volume with "force" option...') ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertFalse( ret, 'Failed to start volume %s with "force" option' % self.volname) g.log.info('Successfully started volume %s with "force" option', self.volname) # Validate IO g.log.info('Validating IO on all mounts') self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") g.log.info('Successfully Validated IO on all mounts') self.io_validation_complete = True
def test_remove_brick(self): """ In this test case: 1. Trusted storage Pool of 4 nodes 2. Create a distributed-replicated volumes with 4 bricks 3. Start the volume 4. Fuse mount the gluster volume on out of trusted nodes 5. Create some data file 6. Start remove-brick operation for one replica pair 7. Restart glusterd on all nodes 8. Try to commit the remove-brick operation while rebalance is in progress, it should fail """ # pylint: disable=too-many-statements my_servers = self.servers[0:4] my_server_info = {} for server in self.servers[0:4]: my_server_info[server] = self.all_servers_info[server] for index in range(1, 4): ret, _, _ = peer_probe(self.servers[0], self.servers[index]) self.assertEqual(ret, 0, ("peer probe from %s to %s is failed", self.servers[0], self.servers[index])) g.log.info("peer probe is success from %s to " "%s", self.servers[0], self.servers[index]) # Validating whether the peer are connected or not # In jenkins This case is failing saying peers are not in connected # state, that is reason adding a check whether peers are connected # or not count = 0 while count < 30: ret = is_peer_connected(self.mnode, my_servers) if ret: g.log.info("Peers are in connected state") break sleep(3) count = count + 1 self.assertTrue(ret, "Some peers are not in connected state") self.volname = "testvol" bricks_list = form_bricks_list(self.mnode, self.volname, 4, my_servers, my_server_info) g.log.info("Creating a volume %s ", self.volname) kwargs = {} kwargs['replica_count'] = 2 ret = volume_create(self.mnode, self.volname, bricks_list, force=False, **kwargs) self.assertEqual(ret[0], 0, ("Unable" "to create volume %s" % self.volname)) g.log.info("Volume created successfully %s", self.volname) ret, _, _ = volume_start(self.mnode, self.volname, False) self.assertEqual(ret, 0, ("Failed to start the " "volume %s", self.volname)) g.log.info("Get all the bricks of the volume") bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Failed to get the brick list") g.log.info("Successfully got the list of bricks of volume") # Mounting a volume ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, mpoint=self.mounts[0].mountpoint, mserver=self.mnode, mclient=self.mounts[0].client_system) self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname) g.log.info("Volume mounted successfully : %s", self.volname) self.all_mounts_procs = [] # Creating files command = ("cd %s/ ; " "for i in `seq 1 10` ; " "do mkdir l1_dir.$i ; " "for j in `seq 1 5` ; " "do mkdir l1_dir.$i/l2_dir.$j ; " "for k in `seq 1 10` ; " "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k " "bs=128k count=$k ; " "done ; " "done ; " "done ; " % (self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.io_validation_complete = True self.assertTrue(ret, "IO failed on some of the clients") remove_brick_list = bricks_list[2:4] ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list, 'start') self.assertEqual(ret, 0, "Failed to start remove brick operation") g.log.info("Remove bricks operation started successfully") g.log.info("Restart glusterd on servers %s", self.servers) ret = restart_glusterd(self.servers) self.assertTrue( ret, ("Failed to restart glusterd on servers %s", self.servers)) g.log.info("Successfully restarted glusterd on servers %s", self.servers) ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list, 'commit') self.assertNotEqual(ret, 0, "Remove brick commit ops should be fail") g.log.info("Remove bricks commit operation failure is expected")
def restart_gluster_vol_brick_processes(ocp_client_node, file_vol, gluster_nodes): """Restarts brick process of a file volume. Args: ocp_client_node (str): Node to execute OCP commands on. file_vol (str): file volume name. gluster_nodes (str/list): One or several IPv4 addresses of Gluster nodes, where 'file_vol' brick processes must be recreated. """ if not isinstance(gluster_nodes, (list, set, tuple)): gluster_nodes = [gluster_nodes] # Get Gluster vol brick PIDs gluster_volume_status = get_gluster_vol_status(file_vol) pids = () for gluster_node in gluster_nodes: pid = None for g_node, g_node_data in gluster_volume_status.items(): if g_node != gluster_node: continue for process_name, process_data in g_node_data.items(): if not process_name.startswith("/var"): continue pid = process_data["pid"] # When birck is down, pid of the brick is returned as -1. # Which is unexepeted situation. So, add appropriate assertion. assert pid != "-1", ( "Got unexpected PID (-1) for '%s' gluster vol on '%s' " "node." % file_vol, gluster_node) assert pid, ("Could not find 'pid' in Gluster vol data for '%s' " "Gluster node. Data: %s" % ( gluster_node, gluster_volume_status)) pids.append((gluster_node, pid)) # Restart Gluster vol brick processes using found PIDs for gluster_node, pid in pids: cmd = "kill -9 %s" % pid cmd_run_on_gluster_pod_or_node(ocp_client_node, cmd, gluster_node) # Wait for Gluster vol brick processes to be recreated for gluster_node, pid in pids: killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'" _waiter = waiter.Waiter(timeout=60, interval=2) for w in _waiter: result = cmd_run_on_gluster_pod_or_node( ocp_client_node, killed_pid_cmd, gluster_node) if result.strip() == pid: continue g.log.info("Brick process '%s' was killed successfully on '%s'" % ( pid, gluster_node)) break if w.expired: error_msg = ("Process ID '%s' still exists on '%s' after waiting " "for it 60 seconds to get killed." % ( pid, gluster_node)) g.log.error(error_msg) raise exceptions.ExecutionError(error_msg) # Start volume after gluster vol brick processes recreation ret, out, err = volume_start( "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: err_msg = "Failed to start gluster volume %s on %s. error: %s" % ( file_vol, gluster_node, err) g.log.error(err_msg) raise AssertionError(err_msg)
def test_nfs_ganesha_subdirectory_mount_from_server_side(self): """ Tests script to verify nfs ganesha subdirectory mount from server side succeeds and able to write IOs. """ subdir_to_mount = self.subdir_path.replace(self.mounts[0].mountpoint, '') if not subdir_to_mount.startswith(os.path.sep): subdir_to_mount = os.path.sep + subdir_to_mount subdir = self.volname + subdir_to_mount for mount_obj in self.sub_dir_mounts: mount_obj.volname = subdir export_file = ("/var/run/gluster/shared_storage/nfs-ganesha/exports/" "export.%s.conf" % self.volname) cmd = (r"sed -i s/'Path = .*'/'Path = \"\/%s\";'/g %s" % (re.escape(subdir), export_file)) ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, ("Unable to change Path info to %s in %s" % ("/" + subdir, export_file))) cmd = ("sed -i 's/volume=.*/& \\n volpath=\"%s\";/g' %s" % (re.escape(subdir_to_mount), export_file)) ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, ("Unable to add volpath info to %s in %s" % ("/" + subdir, export_file))) cmd = (r"sed -i s/'Pseudo=.*'/'Pseudo=\"\/%s\";'/g %s" % (re.escape(subdir), export_file)) ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 0, ("Unable to change pseudo Path info to " "%s in %s" % ("/" + subdir, export_file))) # Stop and start volume to take the modified export file to effect. # Stopping volume ret = volume_stop(self.mnode, self.volname) self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) # Waiting for few seconds for volume unexport. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_unexported( self.mnode, self.volname) self.assertTrue(ret, ("Failed to unexport volume %s after " "stopping volume" % self.volname)) # Starting volume ret = volume_start(self.mnode, self.volname) self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) # Waiting for few seconds for volume export. Max wait time is # 120 seconds. ret = wait_for_nfs_ganesha_volume_to_get_exported(self.mnode, subdir) self.assertTrue(ret, ("Failed to export sub directory %s after " "starting volume" % subdir)) for mount_obj in self.sub_dir_mounts: if not mount_obj.is_mounted(): ret = mount_obj.mount() self.assertTrue( ret, ("Unable to mount volume '%s:%s' " "on '%s:%s'" % (mount_obj.server_system, mount_obj.volname, mount_obj.client_system, mount_obj.mountpoint))) ret = self.start_and_wait_for_io_to_complete(self.sub_dir_mounts) self.assertTrue(ret, ("Failed to write IOs when sub directory is" " mounted from server side")) g.log.info("IO successful on clients")
def test_volume_set_ops_sub_dirs_mounted(self): """ Check volume start/volume stop/volume reset operations while sub-dirs are mounted Steps: 1. Create two sub-directories on mounted volume. 2. Unmount volume from clients. 3. Mount each sub-directory to two different clients. 4. Perform IO on mounts. 5. Perform volume stop operation. 6. Perform volume start operation. 7. Perform volume reset operation. """ # Creating two sub directories on mounted volume ret = mkdir(self.mounts[0].client_system, "%s/d1" % self.mounts[0].mountpoint) self.assertTrue( ret, ("Failed to create directory 'd1' in volume %s " "from client %s" % (self.mounts[0].volname, self.mounts[0].client_system))) ret = mkdir(self.mounts[0].client_system, "%s/d2" % self.mounts[0].mountpoint) self.assertTrue( ret, ("Failed to create directory 'd2' in volume %s " "from client %s" % (self.mounts[0].volname, self.mounts[0].client_system))) # Unmounting volumes ret = self.unmount_volume(self.mounts) self.assertTrue(ret, "Failed to un mount one or more volumes") g.log.info("Successfully un mounted all volumes") # Mounting one sub directory on each client. self.subdir_mounts = [ copy.deepcopy(self.mounts[0]), copy.deepcopy(self.mounts[1]) ] self.subdir_mounts[0].volname = "%s/d1" % self.volname self.subdir_mounts[1].volname = "%s/d2" % self.volname for mount_obj in self.subdir_mounts: ret = mount_obj.mount() self.assertTrue( ret, ("Failed to mount sub directory %s on client" " %s" % (mount_obj.volname, mount_obj.client_system))) g.log.info("Successfully mounted sub directory %s on client %s", mount_obj.volname, mount_obj.client_system) g.log.info("Successfully mounted sub directories to clients.") # Start IO on all mounts. all_mounts_procs = [] count = 1 for mount_obj in self.subdir_mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s" % (self.script_upload_path, count, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) count = count + 10 # Validate IO g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.subdir_mounts) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("Successfully validated all io's") # Get stat of all the files/dirs created. g.log.info("Get stat of all the files/dirs created.") ret = get_mounts_stat(self.subdir_mounts) self.assertTrue(ret, "Stat failed on some of the clients") g.log.info("Successfully got stat of all files/dirs created") # Stop volume g.log.info("Stopping volume: %s", self.volname) ret, _, _ = volume_stop(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to stop volume: %s" % self.volname) # Start volume g.log.info("Starting volume again: %s", self.volname) ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname) # Reset volume g.log.info("Resetting volume: %s", self.volname) ret, _, _ = volume_reset(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to reset volume: %s" % self.volname)
def test_volume_create(self): # create and start a volume self.volume['name'] = "first_volume" self.volname = "first_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertTrue(ret, "Failed to create and start volume") # bring a brick down and volume start force should bring it to online g.log.info("Get all the bricks of the volume") bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Failed to get the brick list") g.log.info("Successfully got the list of bricks of volume") ret = bring_bricks_offline(self.volname, bricks_list[0:2]) self.assertTrue(ret, "Failed to bring down the bricks") g.log.info("Successfully brought the bricks down") ret, _, _ = volume_start(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to start the volume") g.log.info("Volume start with force is success") ret = wait_for_bricks_to_be_online(self.mnode, self.volname) self.assertTrue(ret, "Failed to bring the bricks online") g.log.info("Volume start with force successfully brought all the " "bricks online") # create volume with previously used bricks and different volume name self.volname = "second_volume" ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) self.assertNotEqual( ret, 0, "Expected: It should fail to create a " "volume with previously used bricks. Actual:" "Successfully created the volume with previously" " used bricks") g.log.info("Failed to create the volume with previously used bricks") # create a volume with already existing volume name self.volume['name'] = "first_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertTrue( ret, "Expected: It should fail to create a volume" " with already existing volume name. Actual: " "Successfully created the volume with " "already existing volname") g.log.info("Failed to create the volume with already existing volname") # creating a volume with non existing brick path should fail self.volname = "second_volume" bricks_list = form_bricks_list(self.mnode, self.volname, len(self.servers), self.servers, self.all_servers_info) nonexisting_brick_index = random.randint(0, len(bricks_list) - 1) non_existing_brick = bricks_list[nonexisting_brick_index].split(":")[0] non_existing_path = ":/brick/non_existing_path" non_existing_brick = non_existing_brick + non_existing_path bricks_list[nonexisting_brick_index] = non_existing_brick ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) self.assertNotEqual( ret, 0, "Expected: Creating a volume with non " "existing brick path should fail. Actual: " "Successfully created the volume with " "non existing brick path") g.log.info("Failed to create the volume with non existing brick path") # cleanup the volume and peer detach all servers. form two clusters,try # to create a volume with bricks whose nodes are in different clusters # cleanup volumes vol_list = get_volume_list(self.mnode) self.assertIsNotNone(vol_list, "Failed to get the volume list") for volume in vol_list: ret = cleanup_volume(self.mnode, volume) self.assertTrue(ret, "Unable to delete volume % s" % volume) # peer detach all servers ret = peer_detach_servers(self.mnode, self.servers) self.assertTrue(ret, "Peer detach to all servers is failed") g.log.info("Peer detach to all the servers is success") # form cluster 1 ret, _, _ = peer_probe(self.servers[0], self.servers[1]) self.assertEqual( ret, 0, "Peer probe from %s to %s is failed" % (self.servers[0], self.servers[1])) g.log.info("Peer probe is success from %s to %s" % (self.servers[0], self.servers[1])) # form cluster 2 ret, _, _ = peer_probe(self.servers[2], self.servers[3]) self.assertEqual( ret, 0, "Peer probe from %s to %s is failed" % (self.servers[2], self.servers[3])) g.log.info("Peer probe is success from %s to %s" % (self.servers[2], self.servers[3])) # Creating a volume with bricks which are part of another # cluster should fail ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertFalse( ret, "Expected: Creating a volume with bricks" " which are part of another cluster should fail." " Actual: Successfully created the volume with " "bricks which are part of another cluster") g.log.info("Failed to create the volume with bricks which are " "part of another cluster") # form a cluster, bring a node down. try to create a volume when one of # the brick node is down ret, _, _ = peer_detach(self.servers[2], self.servers[3]) self.assertEqual(ret, 0, "Peer detach is failed") g.log.info("Peer detach is success") ret = peer_probe_servers(self.mnode, self.servers) self.assertTrue(ret, "Peer probe is failed") g.log.info("Peer probe to all the servers is success") random_server = self.servers[random.randint(1, len(self.servers) - 1)] ret = stop_glusterd(random_server) self.assertTrue(ret, "Glusterd is stopped successfully") self.volume['name'] = "third_volume" ret = setup_volume(self.mnode, self.all_servers_info, self.volume) self.assertFalse( ret, "Expected: It should fail to create a volume " "when one of the node is down. Actual: Successfully " "created the volume with bbrick whose node is down") g.log.info("Failed to create the volume with brick whose node is down")