def power_on_gluster_node_vm(self, vm_name, gluster_hostname, timeout=300, wait_step=3): # NOTE(Nitin Goyal): Same timeout is used for all functions. # Bring up the target node power_on_vm_by_name(vm_name) # Wait for gluster node and pod to be ready if self.is_containerized_gluster(): wait_for_ocp_node_be_ready(self.node, gluster_hostname, timeout=timeout, wait_step=wait_step) wait_for_gluster_pod_be_ready_on_specific_node(self.node, gluster_hostname, timeout=timeout, wait_step=wait_step) # Wait for gluster services to be up for service in ('glusterd', 'gluster-blockd'): wait_for_service_status_on_gluster_pod_or_node( self.node, service, 'active', 'running', gluster_hostname, raise_on_error=False, timeout=timeout, wait_step=wait_step)
def test_targetcli_weak_permissions_config_files(self): """Validate permissions on config files""" ocp_node = self.ocp_master_node[0] gluster_node = self.gluster_servers[0] dir_perm_before, dir_perm_after = "drwxrwxrwx.", "drw-------." file_perm_before, file_perm_after = "-rwxrwxrwx.", "-rw-------." services = ("tcmu-runner", "gluster-block-target", "gluster-blockd") cmd = "chmod -R 777 /etc/target/" # Check the permissions on '/etc/target' and '/etc/target/backup' cmd_run_on_gluster_pod_or_node(ocp_node, cmd, gluster_node) for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, service, 'active', state, gluster_node) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, service, gluster_node) self._validate_permission(ocp_node, gluster_node, dir_perm_before, file_perm_before) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') restart_service_on_gluster_pod_or_node(ocp_node, service, gluster_node) wait_for_service_status_on_gluster_pod_or_node( ocp_node, service, 'active', state, gluster_node) # Permission on '/etc/target' should be changed to default self._validate_permission(ocp_node, gluster_node, dir_perm_after, file_perm_after)
def test_target_side_failures_brick_failure_on_block_hosting_volume(self): """Target side failures - Brick failure on block hosting volume""" skip_msg = ("Skipping this test case due to bugs " "BZ-1634745, BZ-1635736, BZ-1636477, BZ-1641668") # TODO(vamahaja): Add check for CRS version if not self.is_containerized_gluster(): self.skipTest(skip_msg + " and not impleted CRS version check") if get_openshift_storage_version() < "3.11.2": self.skipTest(skip_msg) self.deploy_and_verify_resouces() # get block hosting volume from pvc name block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) # restarts 2 brick processes of block hosting volume g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) restart_gluster_vol_brick_processes(self.oc_node, block_hosting_vol, g_nodes[:2]) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): state = "exited" if service == SERVICE_TARGET else "running" self.assertTrue( wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, 'active', state, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_restart_services_provision_volume_and_run_io(self, service): """Restart gluster service then validate volumes""" block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) # restarts glusterfs service restart_service_on_gluster_pod_or_node( self.oc_node, service, g_nodes[0]) # wait for deployed user pod to be in Running state after restarting # service wait_for_pod_be_ready( self.oc_node, self.pod_name, timeout=60, wait_step=5) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): status = "exited" if service == SERVICE_TARGET else "running" self.assertTrue(wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_restart_services_provision_volume_and_run_io(self, service): """Restart gluster service then validate volumes""" block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) # restarts glusterfs service restart_service_on_gluster_pod_or_node(self.oc_node, service, g_nodes[0]) # wait for deployed user pod to be in Running state after restarting # service wait_for_pod_be_ready(self.oc_node, self.pod_name, timeout=60, wait_step=5) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): status = "exited" if service == SERVICE_TARGET else "running" self.assertTrue( wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_restart_services_provision_volume_and_run_io(self, service): """Restart gluster service then validate volumes""" skip_msg = ("Skipping this test case due to bugs " "BZ-1634745, BZ-1635736, BZ-1636477, BZ-1641668") # TODO(vamahaja): Add check for CRS version if not self.is_containerized_gluster(): self.skipTest(skip_msg + " and not implemented CRS version check") if get_openshift_storage_version() < "3.11.2": self.skipTest(skip_msg) self.deploy_and_verify_resouces() block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) # restarts glusterfs service restart_service_on_gluster_pod_or_node(self.oc_node, service, g_nodes[0]) # wait for deployed user pod to be in Running state after restarting # service wait_for_pod_be_ready(self.oc_node, self.pod_name, timeout=60, wait_step=5) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): state = "exited" if service == SERVICE_TARGET else "running" self.assertTrue( wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, 'active', state, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_target_side_failures_brick_failure_on_block_hosting_volume(self): """Target side failures - Brick failure on block hosting volume""" # get block hosting volume from pvc name block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) # restarts 2 brick processes of block hosting volume g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) restart_gluster_vol_brick_processes( self.oc_node, block_hosting_vol, g_nodes[:2]) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): status = "exited" if service == SERVICE_TARGET else "running" self.assertTrue(wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_target_side_failures_brick_failure_on_block_hosting_volume(self): """Target side failures - Brick failure on block hosting volume""" # get block hosting volume from pvc name block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( self.pvc_name) # restarts 2 brick processes of block hosting volume g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) self.assertGreater(len(g_nodes), 2) restart_gluster_vol_brick_processes(self.oc_node, block_hosting_vol, g_nodes[:2]) # checks if all glusterfs services are in running state for g_node in g_nodes: for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): status = "exited" if service == SERVICE_TARGET else "running" self.assertTrue( wait_for_service_status_on_gluster_pod_or_node( self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts self.validate_volumes_and_blocks()
def test_heketi_manual_cleanup_operation_in_bhv(self): """Validate heketi db cleanup will resolve the mismatch in the free size of the block hosting volume with failed block device create operations. """ bhv_size_before, bhv_size_after, vol_count = [], [], 5 ocp_node, g_node = self.ocp_master_node[0], self.gluster_servers[0] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Get existing heketi volume list existing_volumes = heketi_volume_list(h_node, h_url, json=True) # Add function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Get nodes id list node_id_list = heketi_node_list(h_node, h_url) # Disable 4th and other nodes for node_id in node_id_list[3:]: heketi_node_disable(h_node, h_url, node_id) self.addCleanup(heketi_node_enable, h_node, h_url, node_id) # Calculate heketi volume size free_space, nodenum = get_total_free_space(h_node, h_url) free_space_available = int(free_space / nodenum) if free_space_available > vol_count: h_volume_size = int(free_space_available / vol_count) if h_volume_size > 50: h_volume_size = 50 else: h_volume_size, vol_count = 1, free_space_available # Create BHV in case blockvolume size is greater than default BHV size default_bhv_size = get_default_block_hosting_volume_size( h_node, self.heketi_dc_name) if default_bhv_size < h_volume_size: h_volume_name = "autotest-{}".format(utils.get_random_str()) bhv_info = self.create_heketi_volume_with_name_and_wait( h_volume_name, free_space_available, raise_on_cleanup_error=False, block=True, json=True) free_space_available -= ( int(bhv_info.get("blockinfo").get("reservedsize")) + 1) h_volume_size = int(free_space_available / vol_count) # Get BHV list h_bhv_list = get_block_hosting_volume_list(h_node, h_url).keys() self.assertTrue(h_bhv_list, "Failed to get the BHV list") # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_before = vol_info.get("freesize") bhv_size_before.append(bhv_vol_size_before) # Kill Tcmu-runner service services = ("tcmu-runner", "gluster-block-target", "gluster-blockd") kill_service_on_gluster_pod_or_node(ocp_node, "tcmu-runner", g_node) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, service, 'active', state, g_node) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, service, g_node) def run_async(cmd, hostname, raise_on_error=True): return g.run_async(host=hostname, command=cmd) # Create stale block volumes in async for count in range(vol_count): with mock.patch.object(json, 'loads', side_effect=(lambda j: j)): with mock.patch.object(command, 'cmd_run', side_effect=run_async): heketi_blockvolume_create(h_node, h_url, h_volume_size, json=True) # Wait for pending operation to get generated self._check_for_pending_operations(h_node, h_url) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') restart_service_on_gluster_pod_or_node(ocp_node, service, g_node) wait_for_service_status_on_gluster_pod_or_node( ocp_node, service, 'active', state, g_node) # Cleanup pending operation heketi_server_operation_cleanup(h_node, h_url) # wait for pending operation to get cleaned up for w in waiter.Waiter(timeout=120, interval=10): # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_after = vol_info.get("freesize") bhv_size_after.append(bhv_vol_size_after) if (set(bhv_size_before) == set(bhv_size_after)): break if w.expired: raise exceptions.ExecutionError( "Failed to Validate volume size Actual:{}," " Expected:{}".format(set(bhv_size_before), set(bhv_size_after)))
def test_targetcli_when_block_hosting_volume_down(self): """Validate no inconsistencies occur in targetcli when block volumes are created with one block hosting volume down.""" h_node, h_server = self.heketi_client_node, self.heketi_server_url cmd = ("targetcli ls | egrep '%s' || echo unavailable") error_msg = ("targetcli has inconsistencies when block devices are " "created with one block hosting volume %s is down") # Delete BHV which has no BV or fill it completely bhv_list = get_block_hosting_volume_list(h_node, h_server).keys() for bhv in bhv_list: bhv_info = heketi_volume_info(h_node, h_server, bhv, json=True) if not bhv_info["blockinfo"].get("blockvolume", []): heketi_volume_delete(h_node, h_server, bhv) continue free_size = bhv_info["blockinfo"].get("freesize", 0) if free_size: bv = heketi_volume_create(h_node, h_server, free_size, json=True) self.addCleanup(heketi_volume_delete, h_node, h_server, bv["id"]) # Create BV bv = heketi_blockvolume_create(h_node, h_server, 2, json=True) self.addCleanup(heketi_blockvolume_delete, h_node, h_server, bv["id"]) # Bring down BHV bhv_name = get_block_hosting_volume_name(h_node, h_server, bv["id"]) ret, out, err = volume_stop("auto_get_gluster_endpoint", bhv_name) if ret != 0: err_msg = "Failed to stop gluster volume %s. error: %s" % ( bhv_name, err) g.log.error(err_msg) raise AssertionError(err_msg) self.addCleanup(podcmd.GlustoPod()(volume_start), "auto_get_gluster_endpoint", bhv_name) ocp_node = self.ocp_master_node[0] gluster_block_svc = "gluster-block-target" self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, gluster_block_svc, "active", "exited", gluster_node=self.gluster_servers[0]) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, gluster_block_svc, self.gluster_servers[0]) for condition in ("continue", "break"): restart_service_on_gluster_pod_or_node( ocp_node, gluster_block_svc, gluster_node=self.gluster_servers[0]) wait_for_service_status_on_gluster_pod_or_node( ocp_node, gluster_block_svc, "active", "exited", gluster_node=self.gluster_servers[0]) targetcli = cmd_run_on_gluster_pod_or_node(ocp_node, cmd % bv["id"], self.gluster_servers[0]) if condition == "continue": self.assertEqual(targetcli, "unavailable", error_msg % bhv_name) else: self.assertNotEqual(targetcli, "unavailable", error_msg % bhv_name) break # Bring up the same BHV ret, out, err = volume_start("auto_get_gluster_endpoint", bhv_name) if ret != 0: err = "Failed to start gluster volume %s on %s. error: %s" % ( bhv_name, h_node, err) raise exceptions.ExecutionError(err)
def test_create_max_num_blockhostingvolumes(self): num_of_bv = 10 new_bhv_list, bv_list, g_nodes = [], [], [] free_space, nodenum = get_total_free_space(self.heketi_client_node, self.heketi_server_url) if nodenum < 3: self.skipTest("Skip the test case since number of" "online nodes is less than 3.") free_space_available = int(free_space / nodenum) default_bhv_size = get_default_block_hosting_volume_size( self.heketi_client_node, self.heketi_dc_name) # Get existing list of BHV's existing_bhv_list = get_block_hosting_volume_list( self.heketi_client_node, self.heketi_server_url) # Skip the test if available space is less than default_bhv_size if free_space_available < default_bhv_size: self.skipTest("Skip the test case since free_space_available %s" "is less than space_required_for_bhv %s ." % (free_space_available, default_bhv_size)) # Create BHV's while free_space_available > default_bhv_size: block_host_create_info = heketi_volume_create( self.heketi_client_node, self.heketi_server_url, default_bhv_size, json=True, block=True) if block_host_create_info["id"] not in existing_bhv_list.keys(): new_bhv_list.append(block_host_create_info["id"]) self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, block_host_create_info["id"], raise_on_error=False) block_vol_size = int( block_host_create_info["blockinfo"]["freesize"] / num_of_bv) # Create specified number of BV's in BHV's created for i in range(0, num_of_bv): block_vol = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, block_vol_size, json=True, ha=3, auth=True) self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, block_vol["id"], raise_on_error=False) bv_list.append(block_vol["id"]) free_space_available = int(free_space_available - default_bhv_size) # Get gluster node ips h_nodes_ids = heketi_node_list(self.heketi_client_node, self.heketi_server_url) for h_node in h_nodes_ids[:2]: g_node = heketi_node_info(self.heketi_client_node, self.heketi_server_url, h_node, json=True) g_nodes.append(g_node['hostnames']['manage'][0]) # Check if there is no crash in gluster related services & heketi services = (("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for g_node in g_nodes: for service, state in services: wait_for_service_status_on_gluster_pod_or_node( self.ocp_client[0], service, 'active', state, g_node, raise_on_error=False) out = hello_heketi(self.heketi_client_node, self.heketi_server_url) self.assertTrue( out, "Heketi server %s is not alive" % self.heketi_server_url) # Delete all the BHV's and BV's created for bv_volume in bv_list: heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, bv_volume) # Check if any blockvolume exist in heketi & gluster for bhv_volume in new_bhv_list[:]: heketi_vol_info = heketi_volume_info(self.heketi_client_node, self.heketi_server_url, bhv_volume, json=True) self.assertNotIn("blockvolume", heketi_vol_info["blockinfo"].keys()) gluster_vol_info = get_block_list('auto_get_gluster_endpoint', volname="vol_%s" % bhv_volume) self.assertIsNotNone(gluster_vol_info, "Failed to get volume info %s" % bhv_volume) new_bhv_list.remove(bhv_volume) for blockvol in gluster_vol_info: self.assertNotIn("blockvol_", blockvol) heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, bhv_volume) # Check if all blockhosting volumes are deleted from heketi self.assertFalse(new_bhv_list)