def test_start_fio_job( tmp_path, fio_pvc_dict, fio_job_dict, fio_configmap_dict, ): """ Start a fio job performing IO load, check that it's running, and keep it running even after the test finishes. """ # creating project directly to set it's name and prevent it's deletion project = ocp.OCP(kind="Project", namespace=TEST_NS) project.new_project(TEST_NS) # size of the volume for fio pvc_size = 10 # GiB # test uses cephfs based volume, could be either parametrized or we can # try to start more jobs storage_class_name = "ocs-storagecluster-cephfs" # fio config file: random mixed read and write IO will be running for one # day (we expect that the other test will stop it), only 1/2 of the volume # is used, we don't need to utilize the PV 100% fio_size = int(pvc_size / 2) # GiB fio_conf = textwrap.dedent(f""" [readwrite] readwrite=randrw buffered=1 blocksize=4k ioengine=libaio directory=/mnt/target size={fio_size}G time_based runtime=24h """) # put the dicts together into yaml file of the Job fio_configmap_dict["data"]["workload.fio"] = fio_conf fio_pvc_dict["spec"]["storageClassName"] = storage_class_name fio_pvc_dict["spec"]["resources"]["requests"]["storage"] = f"{pvc_size}Gi" fio_objs = [fio_pvc_dict, fio_configmap_dict, fio_job_dict] job_file = ObjectConfFile("fio_continuous", fio_objs, project, tmp_path) # deploy the Job to the cluster and start it job_file.create() # wait for a pod for the job to be deployed and running ocp_pod = ocp.OCP(kind="Pod", namespace=project.namespace) try: ocp_pod.wait_for_resource(resource_count=1, condition=constants.STATUS_RUNNING, timeout=300, sleep=30) except TimeoutExpiredError: logger.error("pod for fio job wasn't deployed properly") raise
def test_scale_obc_creation_noobaa_pod_respin(self, tmp_path, pod_name, sc_name, mcg_job_factory): """ OBC creation using RGW storage class This test case only runs on vSphere cluster deployment """ # Create OBCs with FIO running using mcg_job_factory() for i in range(self.scale_obc_count_io): exec(f"job{i} = mcg_job_factory()") log.info(f"Start creating {self.scale_obc_count} " f"OBC in a batch of {self.num_obc_batch}") for i in range(int(self.scale_obc_count / self.num_obc_batch)): obc_dict_list = ( scale_noobaa_lib.construct_obc_creation_yaml_bulk_for_kube_job( no_of_obc=self.num_obc_batch, sc_name=sc_name, namespace=self.namespace, )) # Create job profile job_file = ObjectConfFile( name="job_profile", obj_dict_list=obc_dict_list, project=self.namespace, tmp_path=tmp_path, ) # Create kube_job job_file.create(namespace=self.namespace) # Check all the OBCs reached Bound state obc_bound_list = ( scale_noobaa_lib.check_all_obc_reached_bound_state_in_kube_job( kube_job_obj=job_file, namespace=self.namespace, no_of_obc=self.num_obc_batch, )) log.info(f"Number of OBCs in Bound state: {len(obc_bound_list)}") # Reset node which noobaa pods is running on # And validate noobaa pods are re-spinned and in running state scale_noobaa_lib.noobaa_running_node_restart(pod_name=pod_name) # Verify all OBCs are in Bound state after node restart log.info("Verify all OBCs are in Bound state after node restart.....") obc_status_list = scale_noobaa_lib.check_all_obcs_status( namespace=self.namespace) log.info(f"Number of OBCs in Bound state after node reset: " f"{len(obc_status_list[0])}") assert (len(obc_status_list[0]) == self.scale_obc_count ), "Not all OBCs in Bound state"
def test_bulk_clone_performance(self, tmp_path, interface_iterate): """ Creates number of PVCs in a bulk using kube job Write 60% of PVC capacity to each one of the created PVCs Creates 1 clone per each PVC altogether in a bulk Measuring total and csi creation times for bulk of clones """ self.interface = interface_iterate job_pod_file, job_pvc_file, job_clone_file = [None, None, None] log.info(f"Start creating {self.interface} {self.pvc_count} PVC") try: pvc_dict_list = scale_lib.construct_pvc_creation_yaml_bulk_for_kube_job( no_of_pvc=self.pvc_count, access_mode=Interfaces_info[self.interface]["accessmode"], sc_name=Interfaces_info[self.interface]["sc_name"], pvc_size=self.vol_size, ) job_pvc_file = ObjectConfFile( name="job_profile_pvc", obj_dict_list=pvc_dict_list, project=self.namespace, tmp_path=tmp_path, ) # Create kube_job job_pvc_file.create(namespace=self.namespace) # Check all the PVC reached Bound state performance_lib.wait_for_resource_bulk_status( resource="pvc", resource_count=self.pvc_count, namespace=self.namespace, status=constants.STATUS_BOUND, timeout=120, sleep_time=5, ) log.info( f"All the PVCs ({self.pvc_count}) was created and are in Bound state" ) # Getting the list of the PVC names pvc_bound_list = [ p.name for p in pvc.get_all_pvc_objs(namespace=self.namespace) ] # Kube_job to Create pod log.info( "Attaching PODs to the PVCs and filling them with data (60%)") pod_dict_list = self.attach_pvcs_to_pod_dict(pvc_bound_list) job_pod_file = ObjectConfFile( name="job_profile_pod", obj_dict_list=pod_dict_list, project=self.namespace, tmp_path=tmp_path, ) job_pod_file.create(namespace=self.namespace) # Check all PODs are in Completed state performance_lib.wait_for_resource_bulk_status( resource="pod", resource_count=self.pvc_count, namespace=self.namespace, status=constants.STATUS_COMPLETED, timeout=1200, sleep_time=30, ) log.info("All the PODs completed writing data to the PVC's") clone_dict_list = scale_lib.construct_pvc_clone_yaml_bulk_for_kube_job( pvc_dict_list, Interfaces_info[self.interface]["clone_yaml"], Interfaces_info[self.interface]["sc_name"], ) log.info("Created clone dict list") csi_bulk_start_time = self.get_time(time_format="csi") job_clone_file = ObjectConfFile( name="job_profile_clone", obj_dict_list=clone_dict_list, project=self.namespace, tmp_path=tmp_path, ) # Create kube_job that creates clones job_clone_file.create(namespace=self.namespace) log.info("Going to check bound status for clones") # Check all the clones reached Bound state try: performance_lib.wait_for_resource_bulk_status( resource="pvc", resource_count=self.pvc_count * 2, namespace=self.namespace, status=constants.STATUS_BOUND, timeout=1200, sleep_time=30, ) except Exception as ex: log.error("Failed to cvreate clones for PVCs") raise ex log.info( f"All the Clones ({self.pvc_count}) was created and are in Bound state" ) all_pvc_objs = pvc.get_all_pvc_objs(namespace=self.namespace) clone_objs = [ cl for cl in all_pvc_objs if re.match("clone", cl.name) ] for clone_yaml in clone_dict_list: name = clone_yaml["metadata"]["name"] size = clone_yaml["spec"]["resources"]["requests"]["storage"] log.info(f"Clone {name} of size {size} created") start_time = get_provision_time(self.interface, clone_objs, status="start") end_time = get_provision_time(self.interface, clone_objs, status="end") total_time = (end_time - start_time).total_seconds() speed = round(self.total_files_size / total_time, 2) csi_creation_time = performance_lib.csi_bulk_pvc_time_measure( self.interface, clone_objs, "create", csi_bulk_start_time) log.info( f"Total creation time = {total_time} secs, csi creation time = {csi_creation_time}," f" data size = {self.total_files_size} MB, speed = {speed} MB/sec " f"for {self.interface} clone in bulk of {self.pvc_count} clones." ) # Produce ES report # Collecting environment information self.get_env_info() # Initialize the results' doc file. full_results = self.init_full_results( ResultsAnalyse( self.uuid, self.crd_data, self.full_log_path, "bulk_clone_perf_fullres", )) full_results.add_key("interface", self.interface) full_results.add_key("bulk_size", self.pvc_count) full_results.add_key("clone_size", self.vol_size) full_results.add_key("bulk_creation_time", total_time) full_results.add_key("bulk_csi_creation_time", csi_creation_time) full_results.add_key("data_size(MB)", self.total_files_size) full_results.add_key("speed", speed) full_results.add_key("es_results_link", full_results.results_link()) # Write the test results into the ES server full_results.es_write() self.results_path = get_full_test_logs_path(cname=self) res_link = full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtest (3 - according to the parameters) self.write_result_to_file(res_link) # Finally, is used to clean up the resources created # Irrespective of try block pass/fail finally will be executed. finally: # Cleanup activities log.info( "Cleanup of all the resources created during test execution") for object_file in [job_pod_file, job_clone_file, job_pvc_file]: if object_file: object_file.delete(namespace=self.namespace) try: object_file.wait_for_delete( resource_name=object_file.name, namespace=self.namespace) except Exception: log.error(f"{object_file['name']} didnt deleted !") # Check ceph health status utils.ceph_health_check(tries=20)
def workload_fio_storageutilization( fixture_name, target_percentage, project, fio_pvc_dict, fio_job_dict, fio_configmap_dict, measurement_dir, tmp_path, ): """ This function implements core functionality of fio storage utilization workload fixture. This is necessary because we can't parametrize single general fixture over multiple parameters (it would mess with test case id and polarion test case tracking). """ if fixture_name.endswith("rbd"): storage_class_name = "ocs-storagecluster-ceph-rbd" ceph_pool_name = "ocs-storagecluster-cephblockpool" elif fixture_name.endswith("cephfs"): storage_class_name = "ocs-storagecluster-cephfs" ceph_pool_name = "ocs-storagecluster-cephfilesystem-data0" else: raise UnexpectedVolumeType( "unexpected volume type, ocs-ci code is wrong") # make sure we communicate what is going to happen logger.info((f"starting {fixture_name} fixture, " f"using {storage_class_name} storage class " f"backed by {ceph_pool_name} ceph pool")) pvc_size = get_storageutilization_size(target_percentage, ceph_pool_name) # For cephfs we can't use fill_fs because of BZ 1763808 (the process # will get *Disk quota exceeded* error instead of *No space left on # device* error). # On the other hand, we can't use size={pvc_size} for rbd, as we can't # write pvc_size bytes to a filesystem on a block device of {pvc_size} # size (obviously, some space is used by filesystem metadata). if fixture_name.endswith("rbd"): fio_conf = textwrap.dedent(""" [simple-write] readwrite=write buffered=1 blocksize=4k ioengine=libaio directory=/mnt/target fill_fs=1 """) else: fio_conf = textwrap.dedent(f""" [simple-write] readwrite=write buffered=1 blocksize=4k ioengine=libaio directory=/mnt/target size={pvc_size}G """) # put the dicts together into yaml file of the Job fio_configmap_dict["data"]["workload.fio"] = fio_conf fio_pvc_dict["spec"]["storageClassName"] = storage_class_name fio_pvc_dict["spec"]["resources"]["requests"]["storage"] = f"{pvc_size}Gi" fio_objs = [fio_pvc_dict, fio_configmap_dict, fio_job_dict] fio_job_file = ObjectConfFile(fixture_name, fio_objs, project, tmp_path) # how long do we let the job running while writing data to the volume # TODO: increase this value or make it configurable write_timeout = pvc_size * 30 # seconds logger.info((f"fixture will wait {write_timeout} seconds for the Job " f"to write {pvc_size} Gi data on OCS backed volume")) def write_data(): """ Write data via fio Job (specified in ``tf`` tmp file) to reach desired utilization level, and keep this level for ``minimal_time`` seconds. """ # deploy the fio Job to the cluster fio_job_file.create() # This is a WORKAROUND of particular ocsci design choices: I just wait # for one pod in the namespace, and then ask for the pod again to get # it's name (but it would be much better to just wait for the job to # finish instead, then ask for a name of the successful pod and use it # to get logs ...) ocp_pod = ocp.OCP(kind="Pod", namespace=project.namespace) ocp_pod.wait_for_resource(resource_count=1, condition=constants.STATUS_COMPLETED, timeout=write_timeout, sleep=30) pod_data = ocp_pod.get() # explicit list of assumptions, if these assumptions are not met, the # code won't work and it either means that something went terrible # wrong or that the code needs to be changed assert pod_data['kind'] == "List" pod_dict = pod_data['items'][0] assert pod_dict['kind'] == "Pod" pod_name = pod_dict['metadata']['name'] logger.info(f"Identified pod name of the finished fio Job: {pod_name}") fio_output = ocp_pod.exec_oc_cmd(f"logs {pod_name}", out_yaml_format=False) # parse fio output fio_report = fio_to_dict(fio_output) logger.info(fio_report) # data which will be available to the test via: # fixture_name['result'] result = { 'fio': fio_report, 'pvc_size': pvc_size, 'target_p': target_percentage, 'namespace': project.namespace } return result test_file = os.path.join(measurement_dir, f"{fixture_name}.json") measured_op = measure_operation(write_data, test_file, measure_after=True, minimal_time=480) # we don't need to delete anything if this fixture has been already # executed if measured_op['first_run']: # make sure we communicate what is going to happen logger.info(f"going to delete {fixture_name} Job") fio_job_file.delete() return measured_op
class TestBulkPodAttachPerformance(PASTest): """ Test to measure performance of attaching pods to pvc in a bulk """ pvc_size = "1Gi" def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") super(TestBulkPodAttachPerformance, self).setup() self.benchmark_name = "bulk_pod_attach_time" self.create_test_project() # Pulling the pod image to the worker node, so pull image will not calculate # in the total attach time helpers.pull_images(constants.PERF_IMAGE) # Initializing some parameters self.pvc_objs = list() self.pods_obj = locals() def teardown(self): """ Cleanup the test environment """ log.info("Starting the test environment celanup") # Deleting All POD(s) log.info("Try to delete all created PODs") try: self.pods_obj.delete(namespace=self.namespace) except Exception as ex: log.warn(f"Failed to delete POD(s) [{ex}]") log.info("Wait for all PODs to be deleted") performance_lib.wait_for_resource_bulk_status("pod", 0, self.namespace, constants.STATUS_BOUND, len(self.pvc_objs) * 2, 10) log.info("All POD(s) was deleted") # Deleting PVC(s) for deletion time mesurment log.info("Try to delete all created PVCs") for pvc_obj in self.pvc_objs: pvc_obj.delete() log.info("Wait for all PVC(s) to be deleted") performance_lib.wait_for_resource_bulk_status("pvc", 0, self.namespace, constants.STATUS_BOUND, len(self.pvc_objs) * 2, 10) log.info("All PVC(s) was deleted") log.info("Wait for all PVC(s) backed PV(s) to be deleted") # Timeout for each PV to be deleted is 20 sec. performance_lib.wait_for_resource_bulk_status("pv", 0, self.namespace, self.namespace, len(self.pvc_objs) * 20, 10) log.info("All backed PV(s) was deleted") # Delete the test project (namespace) self.delete_test_project() super(TestBulkPodAttachPerformance, self).teardown() @pytest.mark.parametrize( argnames=["interface_type", "bulk_size"], argvalues=[ pytest.param(*[constants.CEPHBLOCKPOOL, 120], ), pytest.param(*[constants.CEPHBLOCKPOOL, 240], ), pytest.param(*[constants.CEPHFILESYSTEM, 120], ), pytest.param(*[constants.CEPHFILESYSTEM, 240], ), ], ) @polarion_id("OCS-1620") def test_bulk_pod_attach_performance(self, interface_type, bulk_size): """ Measures pods attachment time in bulk_size bulk Args: interface_type (str): The interface type to be tested - CephBlockPool / CephFileSystem. bulk_size (int): Size of the bulk to be tested Returns: """ self.interface = interface_type if self.dev_mode: bulk_size = 3 # Initialize some variables timeout = bulk_size * 5 pvc_names_list = list() pod_data_list = list() # Getting the test start time test_start_time = self.get_time() csi_start_time = self.get_time("csi") log.info(f"Start creating bulk of new {bulk_size} PVCs") self.pvc_objs, _ = helpers.create_multiple_pvcs( sc_name=Interfaces_info[self.interface]["sc"], namespace=self.namespace, number_of_pvc=bulk_size, size=self.pvc_size, burst=True, do_reload=False, ) log.info("Wait for all of the PVCs to be in Bound state") performance_lib.wait_for_resource_bulk_status("pvc", bulk_size, self.namespace, constants.STATUS_BOUND, timeout, 10) # in case of creation faliure, the wait_for_resource_bulk_status function # will raise an exception. so in this point the creation succeed log.info("All PVCs was created and in Bound state.") # Reload all PVC(s) information for pvc_obj in self.pvc_objs: pvc_obj.reload() pvc_names_list.append(pvc_obj.name) log.debug(f"The PVCs names are : {pvc_names_list}") # Create kube_job for pod creation pod_data_list.extend( scale_lib.attach_multiple_pvc_to_pod_dict( pvc_list=pvc_names_list, namespace=self.namespace, pvcs_per_pod=1, )) self.pods_obj = ObjectConfFile( name="pod_kube_obj", obj_dict_list=pod_data_list, project=self.namespace, tmp_path=pathlib.Path(ocsci_log_path()), ) log.debug(f"PODs data list is : {json.dumps(pod_data_list, indent=3)}") log.info(f"{self.interface} : Before pod attach") bulk_start_time = time.time() self.pods_obj.create(namespace=self.namespace) # Check all the PODs reached Running state log.info("Checking that pods are running") performance_lib.wait_for_resource_bulk_status("pod", bulk_size, self.namespace, constants.STATUS_RUNNING, timeout, 2) log.info("All POD(s) are in Running State.") bulk_end_time = time.time() bulk_total_time = bulk_end_time - bulk_start_time log.info( f"Bulk attach time of {bulk_size} pods is {bulk_total_time} seconds" ) csi_bulk_total_time = performance_lib.pod_bulk_attach_csi_time( self.interface, self.pvc_objs, csi_start_time, self.namespace) # Collecting environment information self.get_env_info() # Initialize the results doc file. full_results = self.init_full_results( ResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, "pod_bulk_attachtime")) full_results.add_key("storageclass", Interfaces_info[self.interface]["name"]) full_results.add_key("pod_bulk_attach_time", bulk_total_time) full_results.add_key("pod_csi_bulk_attach_time", csi_bulk_total_time) full_results.add_key("pvc_size", self.pvc_size) full_results.add_key("bulk_size", bulk_size) # Getting the test end time test_end_time = self.get_time() # Add the test time to the ES report full_results.add_key("test_time", { "start": test_start_time, "end": test_end_time }) # Write the test results into the ES server self.results_path = helpers.get_full_test_logs_path(cname=self) if full_results.es_write(): res_link = full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtests (4 - according to the parameters) self.write_result_to_file(res_link) def test_bulk_pod_attach_results(self): """ This is not a test - it is only check that previous test ran and finish as expected and reporting the full results (links in the ES) of previous tests (4) """ self.add_test_to_results_check( test="test_bulk_pod_attach_performance", test_count=4, test_name="Bulk Pod Attach Time", ) self.check_results_and_push_to_dashboard() def init_full_results(self, full_results): """ Initialize the full results object which will send to the ES server Args: full_results (obj): an empty ResultsAnalyse object Returns: ResultsAnalyse (obj): the input object filled with data """ for key in self.environment: full_results.add_key(key, self.environment[key]) return full_results
def test_bulk_pod_attach_performance(self, interface_type, bulk_size): """ Measures pods attachment time in bulk_size bulk Args: interface_type (str): The interface type to be tested - CephBlockPool / CephFileSystem. bulk_size (int): Size of the bulk to be tested Returns: """ self.interface = interface_type if self.dev_mode: bulk_size = 3 # Initialize some variables timeout = bulk_size * 5 pvc_names_list = list() pod_data_list = list() # Getting the test start time test_start_time = self.get_time() csi_start_time = self.get_time("csi") log.info(f"Start creating bulk of new {bulk_size} PVCs") self.pvc_objs, _ = helpers.create_multiple_pvcs( sc_name=Interfaces_info[self.interface]["sc"], namespace=self.namespace, number_of_pvc=bulk_size, size=self.pvc_size, burst=True, do_reload=False, ) log.info("Wait for all of the PVCs to be in Bound state") performance_lib.wait_for_resource_bulk_status("pvc", bulk_size, self.namespace, constants.STATUS_BOUND, timeout, 10) # in case of creation faliure, the wait_for_resource_bulk_status function # will raise an exception. so in this point the creation succeed log.info("All PVCs was created and in Bound state.") # Reload all PVC(s) information for pvc_obj in self.pvc_objs: pvc_obj.reload() pvc_names_list.append(pvc_obj.name) log.debug(f"The PVCs names are : {pvc_names_list}") # Create kube_job for pod creation pod_data_list.extend( scale_lib.attach_multiple_pvc_to_pod_dict( pvc_list=pvc_names_list, namespace=self.namespace, pvcs_per_pod=1, )) self.pods_obj = ObjectConfFile( name="pod_kube_obj", obj_dict_list=pod_data_list, project=self.namespace, tmp_path=pathlib.Path(ocsci_log_path()), ) log.debug(f"PODs data list is : {json.dumps(pod_data_list, indent=3)}") log.info(f"{self.interface} : Before pod attach") bulk_start_time = time.time() self.pods_obj.create(namespace=self.namespace) # Check all the PODs reached Running state log.info("Checking that pods are running") performance_lib.wait_for_resource_bulk_status("pod", bulk_size, self.namespace, constants.STATUS_RUNNING, timeout, 2) log.info("All POD(s) are in Running State.") bulk_end_time = time.time() bulk_total_time = bulk_end_time - bulk_start_time log.info( f"Bulk attach time of {bulk_size} pods is {bulk_total_time} seconds" ) csi_bulk_total_time = performance_lib.pod_bulk_attach_csi_time( self.interface, self.pvc_objs, csi_start_time, self.namespace) # Collecting environment information self.get_env_info() # Initialize the results doc file. full_results = self.init_full_results( ResultsAnalyse(self.uuid, self.crd_data, self.full_log_path, "pod_bulk_attachtime")) full_results.add_key("storageclass", Interfaces_info[self.interface]["name"]) full_results.add_key("pod_bulk_attach_time", bulk_total_time) full_results.add_key("pod_csi_bulk_attach_time", csi_bulk_total_time) full_results.add_key("pvc_size", self.pvc_size) full_results.add_key("bulk_size", bulk_size) # Getting the test end time test_end_time = self.get_time() # Add the test time to the ES report full_results.add_key("test_time", { "start": test_start_time, "end": test_end_time }) # Write the test results into the ES server self.results_path = helpers.get_full_test_logs_path(cname=self) if full_results.es_write(): res_link = full_results.results_link() # write the ES link to the test results in the test log. log.info(f"The result can be found at : {res_link}") # Create text file with results of all subtests (4 - according to the parameters) self.write_result_to_file(res_link)
def setup_netsplit(tmp_path, master_zones, worker_zones, x_addr_list=None, arbiter_zone=None): """ Deploy machineconfig with network split scripts and configuration, tailored for the current cluster state. Args: tmp_path(pathlib.Path): Directory where a temporary yaml file will be created. In test context, use pytest fixture ``tmp_path``. master_zones(list[str]): zones where master nodes are placed worker_zones(list[str]): zones where worker nodes are placed x_addr_list(list[str]): IP addressess of external services (zone x) arbiter_zone(str): name of arbiter zone if arbiter deployment is used Raises: UnexpectedDeploymentConfiguration: in case of invalid cluster configuration, which prevents deployment of network split scripts ValueError: in case given zone configuration doesn't make any sense """ logger.info("going to deploy ocpnetsplit scripts") # checking assumptions: each node has a zone label if not are_zone_labels_present(): msg = "to use network_split_setup, all nodes needs a zone label" logger.error(msg) raise exceptions.UnexpectedDeploymentConfiguration(msg) # check zone assummtions: all worker zones are master zones as well worker_zones_without_master = set(worker_zones).difference( set(master_zones)) if len(worker_zones_without_master) != 0: msg = ("there are zones which contains worker nodes, " f"but no master nodes: {worker_zones_without_master}") logger.error(msg) raise exceptions.UnexpectedDeploymentConfiguration(msg) if (arbiter_zone is not None) and (arbiter_zone not in master_zones): msg = "given arbiter zone not found among master zones" logger.error(msg) raise ValueError(msg) if len(master_zones) == 3: zone_a, zone_b, zone_c = master_zones # handle arbiter (so that zone a is always arbiter) if specified if arbiter_zone is not None: zone_a = arbiter_zone other_zones = master_zones.copy() other_zones.remove(arbiter_zone) zone_b, zone_c = other_zones else: msg = "ocpnetsplit can handle only 3 zones, setup can't continue" logger.error(msg) raise exceptions.UnexpectedDeploymentConfiguration(msg) # we assume that there are just 2 machine config pools: master and worker mcp_h = OCP(kind="MachineConfigPool", namespace="openshift-config") mcp_objects = mcp_h.get() mcp_names = [i["metadata"]["name"] for i in mcp_objects["items"]] if len(mcp_names) != 2: msg = ("ocpnetsplit can handle only 2 machine config pools, " f"but there are {mcp_names}") logger.error(msg) raise exceptions.UnexpectedDeploymentConfiguration(msg) for exp_pool in ("master", "worker"): if exp_pool not in mcp_names: msg = f"MachineConfigPool/{exp_pool} not found" logger.error(msg) raise exceptions.UnexpectedDeploymentConfiguration(msg) # generate zone config (list of node ip addressess for each zone) zone_config = ocpnetsplit.main.get_zone_config(zone_a, zone_b, zone_c, x_addr_list) zone_env = zone_config.get_env_file() # get machinecofnig for network split firewall scripts mc = ocpnetsplit.main.get_networksplit_mc_spec(zone_env) # deploy it within openshift-config namespace mc_file = ObjectConfFile("network-split", mc, None, tmp_path) mc_file.create(namespace="openshift-config") # now let's make sure the MCO (machine config operator) noticed just # deployed network-split machine config and started to process it logger.info("waiting for both machineconfigpools to be updating " "as a result of deployment of network-split machineconfig") mcp_h.wait_for_resource( resource_count=2, condition="True", column="UPDATING", sleep=5, timeout=120, ) # and now wait for MachineConfigPools to be updated and ready logger.info("waiting for both machineconfigpools to be updated and ready") mcp_h.wait_for_resource( resource_count=2, condition="True", column="UPDATED", sleep=60, timeout=1800, ) # also check that no pools are degraded mcp_h.wait_for_resource( resource_count=2, condition="False", column="DEGRADED", sleep=10, timeout=120, )