def test_run_pgsql_node_drain(self, transactions=900, node_type='master'): """ Test pgsql workload """ # Create pgbench benchmark log.info("Create resource file for pgbench workload") pg_trans = transactions timeout = pg_trans * 3 pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML) pg_data['spec']['workload']['args']['transactions'] = pg_trans pg_obj = OCS(**pg_data) pg_obj.create() # Getting pgbench podname for pgbench_pod in TimeoutSampler( pg_trans, 3, get_pod_name_by_pattern, 'pgbench', 'my-ripsaw' ): try: if pgbench_pod[0] is not None: pgbench_client_pod = pgbench_pod[0] break except IndexError: log.info("Bench pod is not found") # Wait for pg_bench pod to be in running state log.info("Waiting for pgbench_pod to be in running state") pod_obj = OCP(kind='pod') pod_obj.wait_for_resource( condition='Running', resource_name=pgbench_client_pod, timeout=timeout, sleep=5, ) # Node drain with specific node type typed_nodes = node.get_typed_nodes(node_type=node_type, num_of_nodes=1) typed_node_name = typed_nodes[0].name # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([typed_node_name]) # Make the node schedulable again node.schedule_nodes([typed_node_name]) # Perform cluster and Ceph health checks self.sanity_helpers.health_check() # Wait for pg_bench pod to complete workload log.info("Waiting for pgbench_client to complete") pod_obj.wait_for_resource( condition='Completed', resource_name=pgbench_client_pod, timeout=timeout, sleep=10, ) # Parsing the results output = run_cmd(f'oc logs {pgbench_client_pod}') pg_output = utils.parse_pgsql_logs(output) log.info( "*******PGBench output log*********\n" f"{pg_output}" ) for data in pg_output: latency_avg = data['latency_avg'] if not latency_avg: raise UnexpectedBehaviour( "PGBench failed to run, no data found on latency_avg" ) log.info("PGBench has completed successfully") # Collect data and export to Google doc spreadsheet g_sheet = GoogleSpreadSheetAPI(sheet_name="OCS PGSQL", sheet_index=3) for lat in pg_output: lat_avg = lat['latency_avg'] lat_stddev = lat['lat_stddev'] tps_incl = lat['tps_incl'] tps_excl = lat['tps_excl'] g_sheet.insert_row( [int(lat_avg), int(lat_stddev), int(tps_incl), int(tps_excl)], 2 ) # Clean up pgbench benchmark log.info("Deleting PG bench benchmark") pg_obj.delete()
def subscribe_ocs(self): """ This method subscription manifest and subscribe to OCS operator. """ live_deployment = config.DEPLOYMENT.get("live_deployment") if ( config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment ): link_all_sa_and_secret_and_delete_pods(constants.OCS_SECRET, self.namespace) operator_selector = get_selector_for_ocs_operator() # wait for package manifest # For OCS version >= 4.9, we have odf-operator ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: ocs_operator_name = defaults.ODF_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_ODF_YAML else: ocs_operator_name = defaults.OCS_OPERATOR_NAME subscription_file = constants.SUBSCRIPTION_YAML package_manifest = PackageManifest( resource_name=ocs_operator_name, selector=operator_selector, ) # Wait for package manifest is ready package_manifest.wait_for_resource(timeout=300) default_channel = package_manifest.get_default_channel() subscription_yaml_data = templating.load_yaml(subscription_file) subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") if subscription_plan_approval: subscription_yaml_data["spec"][ "installPlanApproval" ] = subscription_plan_approval custom_channel = config.DEPLOYMENT.get("ocs_csv_channel") if custom_channel: logger.info(f"Custom channel will be used: {custom_channel}") subscription_yaml_data["spec"]["channel"] = custom_channel else: logger.info(f"Default channel will be used: {default_channel}") subscription_yaml_data["spec"]["channel"] = default_channel if config.DEPLOYMENT.get("stage"): subscription_yaml_data["spec"]["source"] = constants.OPERATOR_SOURCE_NAME if config.DEPLOYMENT.get("live_deployment"): subscription_yaml_data["spec"]["source"] = config.DEPLOYMENT.get( "live_content_source", defaults.LIVE_CONTENT_SOURCE ) subscription_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="subscription_manifest", delete=False ) templating.dump_data_to_temp_yaml( subscription_yaml_data, subscription_manifest.name ) run_cmd(f"oc create -f {subscription_manifest.name}") logger.info("Sleeping for 90 seconds after subscribing OCS") time.sleep(90) if subscription_plan_approval == "Manual": wait_for_install_plan_and_approve(self.namespace) csv_name = package_manifest.get_current_csv(channel=custom_channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Installing", timeout=60)
def deploy_ocs_via_operator(self, image=None): """ Method for deploy OCS via OCS operator Args: image (str): Image of ocs registry. """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment") if ui_deployment and ui_deployment_conditions(): self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if not live_deployment: create_catalog_source(image) if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") # create multus network if config.ENV_DATA.get("is_multus_enabled"): logger.info("Creating multus network") multus_data = templating.load_yaml(constants.MULTUS_YAML) multus_config_str = multus_data["spec"]["config"] multus_config_dct = json.loads(multus_config_str) if config.ENV_DATA.get("multus_public_network_interface"): multus_config_dct["master"] = config.ENV_DATA.get( "multus_public_network_interface" ) multus_data["spec"]["config"] = json.dumps(multus_config_dct) multus_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="multus", delete=False ) templating.dump_data_to_temp_yaml(multus_data, multus_data_yaml.name) run_cmd(f"oc create -f {multus_data_yaml.name}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get("subscription_plan_approval") ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9: ocs_operator_names = [ defaults.ODF_OPERATOR_NAME, defaults.OCS_OPERATOR_NAME, ] build_number = version.get_semantic_version(get_ocs_build_number()) if build_number >= version.get_semantic_version("4.9.0-231"): ocs_operator_names.append(defaults.MCG_OPERATOR) else: ocs_operator_names.append(defaults.NOOBAA_OPERATOR) else: ocs_operator_names = [defaults.OCS_OPERATOR_NAME] channel = config.DEPLOYMENT.get("ocs_csv_channel") is_ibm_sa_linked = False for ocs_operator_name in ocs_operator_names: package_manifest = PackageManifest( resource_name=ocs_operator_name, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if ( config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment ): if not is_ibm_sa_linked: logger.info("Sleeping for 60 seconds before applying SA") time.sleep(60) link_all_sa_and_secret_and_delete_pods( constants.OCS_SECRET, self.namespace ) is_ibm_sa_linked = True csv.wait_for_phase("Succeeded", timeout=720) # create storage system if ocs_version >= version.VERSION_4_9: exec_cmd(f"oc apply -f {constants.STORAGE_SYSTEM_ODF_YAML}") ocp_version = version.get_semantic_ocp_version_from_config() if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: config_map = ocp.OCP( kind="configmap", namespace=self.namespace, resource_name=constants.ROOK_OPERATOR_CONFIGMAP, ) config_map.get(retry=10, wait=5) config_map_patch = ( '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\'' ) logger.info("Patching config map to change KUBLET DIR PATH") exec_cmd( f"oc patch configmap -n {self.namespace} " f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}" ) if config.DEPLOYMENT.get("create_ibm_cos_secret", True): logger.info("Creating secret for IBM Cloud Object Storage") with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd: cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader) key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"] key_secret = config.AUTH["ibmcloud"]["ibm_cos_secret_access_key"] cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id cos_secret_data["data"]["IBM_COS_SECRET_ACCESS_KEY"] = key_secret cos_secret_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cos_secret", delete=False ) templating.dump_data_to_temp_yaml( cos_secret_data, cos_secret_data_yaml.name ) exec_cmd(f"oc create -f {cos_secret_data_yaml.name}") # Modify the CSV with custom values if required if all( key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to") ): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # Set rook log level self.set_rook_log_level() # creating StorageCluster if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.deploy() if config.ENV_DATA["mcg_only_deployment"]: mcg_only_deployment() return cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) # Figure out all the OCS modules enabled/disabled # CLI parameter --disable-components takes the precedence over # anything which comes from config file if config.ENV_DATA.get("disable_components"): for component in config.ENV_DATA["disable_components"]: config.COMPONENTS[f"disable_{component}"] = True logger.warning(f"disabling: {component}") # Update cluster_data with respective component enable/disable for key in config.COMPONENTS.keys(): comp_name = constants.OCS_COMPONENTS_MAP[key.split("_")[1]] if config.COMPONENTS[key]: if "noobaa" in key: merge_dict( cluster_data, { "spec": { "multiCloudGateway": {"reconcileStrategy": "ignore"} } }, ) else: merge_dict( cluster_data, { "spec": { "managedResources": { f"{comp_name}": {"reconcileStrategy": "ignore"} } } }, ) if arbiter_deployment: cluster_data["spec"]["arbiter"] = {} cluster_data["spec"]["nodeTopologies"] = {} cluster_data["spec"]["arbiter"]["enable"] = True cluster_data["spec"]["nodeTopologies"][ "arbiterLocation" ] = self.get_arbiter_location() cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 4 cluster_data["metadata"]["name"] = config.ENV_DATA["storage_cluster_name"] deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int(config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) logger.info( "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones" ) zone_num = get_az_count() if ( config.DEPLOYMENT.get("local_storage") and ocs_version >= version.VERSION_4_7 and zone_num < 3 and not config.DEPLOYMENT.get("arbiter_deployment") ): cluster_data["spec"]["flexibleScaling"] = True # https://bugzilla.redhat.com/show_bug.cgi?id=1921023 cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3 cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1 # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO ) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage" ] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage" ] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName" ] = self.DEFAULT_STORAGECLASS # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName" ] = self.DEFAULT_STORAGECLASS_LSO lso_type = config.DEPLOYMENT.get("type") if ( self.platform.lower() == constants.AWS_PLATFORM and not lso_type == constants.AWS_EBS ): deviceset_data["count"] = 2 # setting resource limits for AWS i3 # https://access.redhat.com/documentation/en-us/red_hat_openshift_container_storage/4.6/html-single/deploying_openshift_container_storage_using_amazon_web_services/index#creating-openshift-container-storage-cluster-on-amazon-ec2_local-storage if ( ocs_version >= version.VERSION_4_5 and config.ENV_DATA.get("worker_instance_type") == constants.AWS_LSO_WORKER_INSTANCE ): deviceset_data["resources"] = { "limits": {"cpu": 2, "memory": "5Gi"}, "requests": {"cpu": 1, "memory": "5Gi"}, } if (ocp_version >= version.VERSION_4_6) and ( ocs_version >= version.VERSION_4_6 ): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } count = config.DEPLOYMENT.get("local_storage_storagedeviceset_count") if count is not None: deviceset_data["count"] = count # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= version.VERSION_4_5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= version.VERSION_4_5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": {"cpu": "100m", "memory": "100Mi"}, "requests": {"cpu": "100m", "memory": "100Mi"}, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": {"cpu": 3, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } } if ocs_version < version.VERSION_4_5: resources["noobaa-core"] = { "limits": {"cpu": 2, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } resources["noobaa-db"] = { "limits": {"cpu": 2, "memory": "8Gi"}, "requests": {"cpu": 1, "memory": "8Gi"}, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": {"requests": {"storage": "20Gi"}}, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < version.VERSION_4_6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } if config.DEPLOYMENT.get("kms_deployment"): cluster_data["spec"]["encryption"]["kms"] = { "enable": True, } if config.DEPLOYMENT.get("ceph_debug"): setup_ceph_debug() cluster_data["spec"]["managedResources"] = { "cephConfig": {"reconcileStrategy": "ignore"} } if config.ENV_DATA.get("is_multus_enabled"): cluster_data["spec"]["network"] = { "provider": "multus", "selectors": { "public": f"{defaults.ROOK_CLUSTER_NAMESPACE}/ocs-public" }, } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False ) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}" )
def deploy_ocs_via_operator(self, image=None): """ Method for deploy OCS via OCS operator Args: image (str): Image of ocs registry. """ ui_deployment = config.DEPLOYMENT.get("ui_deployment") live_deployment = config.DEPLOYMENT.get("live_deployment") arbiter_deployment = config.DEPLOYMENT.get("arbiter_deployment") if ui_deployment: self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() if config.DEPLOYMENT.get("local_storage"): setup_local_storage(storageclass=self.DEFAULT_STORAGECLASS_LSO) logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: ibmcloud.add_deployment_dependencies() if not live_deployment: create_ocs_secret(self.namespace) create_ocs_secret(constants.MARKETPLACE_NAMESPACE) if not live_deployment: self.create_ocs_operator_source(image) self.subscribe_ocs() operator_selector = get_selector_for_ocs_operator() subscription_plan_approval = config.DEPLOYMENT.get( "subscription_plan_approval") package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=subscription_plan_approval, ) package_manifest.wait_for_resource(timeout=300) channel = config.DEPLOYMENT.get("ocs_csv_channel") csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) if (config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM and not live_deployment): csv.wait_for_phase("Installing", timeout=720) logger.info("Sleeping for 30 seconds before applying SA") time.sleep(30) link_all_sa_and_secret(constants.OCS_SECRET, self.namespace) logger.info("Deleting all pods in openshift-storage namespace") exec_cmd(f"oc delete pod --all -n {self.namespace}") csv.wait_for_phase("Succeeded", timeout=720) ocp_version = float(get_ocp_version()) if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: config_map = ocp.OCP( kind="configmap", namespace=self.namespace, resource_name=constants.ROOK_OPERATOR_CONFIGMAP, ) config_map.get(retry=10, wait=5) config_map_patch = ( '\'{"data": {"ROOK_CSI_KUBELET_DIR_PATH": "/var/data/kubelet"}}\'' ) logger.info("Patching config map to change KUBLET DIR PATH") exec_cmd( f"oc patch configmap -n {self.namespace} " f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}") if config.DEPLOYMENT.get("create_ibm_cos_secret", True): logger.info("Creating secret for IBM Cloud Object Storage") with open(constants.IBM_COS_SECRET_YAML, "r") as cos_secret_fd: cos_secret_data = yaml.load(cos_secret_fd, Loader=yaml.SafeLoader) key_id = config.AUTH["ibmcloud"]["ibm_cos_access_key_id"] key_secret = config.AUTH["ibmcloud"][ "ibm_cos_secret_access_key"] cos_secret_data["data"]["IBM_COS_ACCESS_KEY_ID"] = key_id cos_secret_data["data"][ "IBM_COS_SECRET_ACCESS_KEY"] = key_secret cos_secret_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cos_secret", delete=False) templating.dump_data_to_temp_yaml(cos_secret_data, cos_secret_data_yaml.name) exec_cmd(f"oc create -f {cos_secret_data_yaml.name}") # Modify the CSV with custom values if required if all(key in config.DEPLOYMENT for key in ("csv_change_from", "csv_change_to")): modify_csv( csv=csv_name, replace_from=config.DEPLOYMENT["csv_change_from"], replace_to=config.DEPLOYMENT["csv_change_to"], ) # create custom storage class for StorageCluster CR if necessary if self.CUSTOM_STORAGE_CLASS_PATH is not None: with open(self.CUSTOM_STORAGE_CLASS_PATH, "r") as custom_sc_fo: custom_sc = yaml.load(custom_sc_fo, Loader=yaml.SafeLoader) # set value of DEFAULT_STORAGECLASS to mach the custom storage cls self.DEFAULT_STORAGECLASS = custom_sc["metadata"]["name"] run_cmd(f"oc create -f {self.CUSTOM_STORAGE_CLASS_PATH}") # creating StorageCluster if config.DEPLOYMENT.get("kms_deployment"): kms = KMS.get_kms_deployment() kms.deploy() cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) if arbiter_deployment: cluster_data["spec"]["arbiter"] = {} cluster_data["spec"]["nodeTopologies"] = {} cluster_data["spec"]["arbiter"]["enable"] = True cluster_data["spec"]["nodeTopologies"][ "arbiterLocation"] = self.get_arbiter_location() cluster_data["spec"]["storageDeviceSets"][0][ "replica"] = config.DEPLOYMENT.get( "ocs_operator_nodes_to_label", 4) cluster_data["metadata"]["name"] = config.ENV_DATA[ "storage_cluster_name"] deviceset_data = cluster_data["spec"]["storageDeviceSets"][0] device_size = int( config.ENV_DATA.get("device_size", defaults.DEVICE_SIZE)) logger.info( "Flexible scaling is available from version 4.7 on LSO cluster with less than 3 zones" ) ocs_version = config.ENV_DATA["ocs_version"] zone_num = get_az_count() if (config.DEPLOYMENT.get("local_storage") and Version.coerce(ocs_version) >= Version.coerce("4.7") and zone_num < 3): cluster_data["spec"]["flexibleScaling"] = True # https://bugzilla.redhat.com/show_bug.cgi?id=1921023 cluster_data["spec"]["storageDeviceSets"][0]["count"] = 3 cluster_data["spec"]["storageDeviceSets"][0]["replica"] = 1 # set size of request for storage if self.platform.lower() == constants.BAREMETAL_PLATFORM: pv_size_list = helpers.get_pv_size( storageclass=self.DEFAULT_STORAGECLASS_LSO) pv_size_list.sort() deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{pv_size_list[0]}" else: deviceset_data["dataPVCTemplate"]["spec"]["resources"]["requests"][ "storage"] = f"{device_size}Gi" # set storage class to OCS default on current platform if self.DEFAULT_STORAGECLASS: deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS ocs_version = float(config.ENV_DATA["ocs_version"]) # StorageCluster tweaks for LSO if config.DEPLOYMENT.get("local_storage"): cluster_data["spec"]["manageNodes"] = False cluster_data["spec"]["monDataDirHostPath"] = "/var/lib/rook" deviceset_data["name"] = constants.DEFAULT_DEVICESET_LSO_PVC_NAME deviceset_data["portable"] = False deviceset_data["dataPVCTemplate"]["spec"][ "storageClassName"] = self.DEFAULT_STORAGECLASS_LSO lso_type = config.DEPLOYMENT.get("type") if (self.platform.lower() == constants.AWS_PLATFORM and not lso_type == constants.AWS_EBS): deviceset_data["count"] = 2 if ocs_version >= 4.5: deviceset_data["resources"] = { "limits": { "cpu": 2, "memory": "5Gi" }, "requests": { "cpu": 1, "memory": "5Gi" }, } if (ocp_version >= 4.6) and (ocs_version >= 4.6): cluster_data["metadata"]["annotations"] = { "cluster.ocs.openshift.io/local-devices": "true" } # Allow lower instance requests and limits for OCS deployment # The resources we need to change can be found here: # https://github.com/openshift/ocs-operator/blob/release-4.5/pkg/deploy-manager/storagecluster.go#L88-L116 if config.DEPLOYMENT.get("allow_lower_instance_requirements"): none_resources = {"Requests": None, "Limits": None} deviceset_data["resources"] = deepcopy(none_resources) resources = [ "mon", "mds", "rgw", "mgr", "noobaa-core", "noobaa-db", ] if ocs_version >= 4.5: resources.append("noobaa-endpoint") cluster_data["spec"]["resources"] = { resource: deepcopy(none_resources) for resource in resources } if ocs_version >= 4.5: cluster_data["spec"]["resources"]["noobaa-endpoint"] = { "limits": { "cpu": 1, "memory": "500Mi" }, "requests": { "cpu": 1, "memory": "500Mi" }, } else: local_storage = config.DEPLOYMENT.get("local_storage") platform = config.ENV_DATA.get("platform", "").lower() if local_storage and platform == "aws": resources = { "mds": { "limits": { "cpu": 3, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } } if ocs_version < 4.5: resources["noobaa-core"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } resources["noobaa-db"] = { "limits": { "cpu": 2, "memory": "8Gi" }, "requests": { "cpu": 1, "memory": "8Gi" }, } cluster_data["spec"]["resources"] = resources # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get("host_network"): cluster_data["spec"]["hostNetwork"] = True cluster_data["spec"]["storageDeviceSets"] = [deviceset_data] if self.platform == constants.IBMCLOUD_PLATFORM: mon_pvc_template = { "spec": { "accessModes": ["ReadWriteOnce"], "resources": { "requests": { "storage": "20Gi" } }, "storageClassName": self.DEFAULT_STORAGECLASS, "volumeMode": "Filesystem", } } cluster_data["spec"]["monPVCTemplate"] = mon_pvc_template # Need to check if it's needed for ibm cloud to set manageNodes cluster_data["spec"]["manageNodes"] = False if config.ENV_DATA.get("encryption_at_rest"): if ocs_version < 4.6: error_message = "Encryption at REST can be enabled only on OCS >= 4.6!" logger.error(error_message) raise UnsupportedFeatureError(error_message) logger.info("Enabling encryption at REST!") cluster_data["spec"]["encryption"] = { "enable": True, } if config.DEPLOYMENT.get("kms_deployment"): cluster_data["spec"]["encryption"]["kms"] = { "enable": True, } if config.DEPLOYMENT.get("ceph_debug"): setup_ceph_debug() cluster_data["spec"]["managedResources"] = { "cephConfig": { "reconcileStrategy": "ignore" } } cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=1200) if config.DEPLOYMENT["infra_nodes"]: _ocp = ocp.OCP(kind="node") _ocp.exec_oc_cmd( command=f"annotate namespace {defaults.ROOK_CLUSTER_NAMESPACE} " f"{constants.NODE_SELECTOR_ANNOTATION}")
def create_catalog_source(image=None, ignore_upgrade=False): """ This prepare catalog source manifest for deploy OCS operator from quay registry. Args: image (str): Image of ocs registry. ignore_upgrade (bool): Ignore upgrade parameter. """ # Because custom catalog source will be called: redhat-operators, we need to disable # default sources. This should not be an issue as OCS internal registry images # are now based on OCP registry image disable_specific_source(constants.OPERATOR_CATALOG_SOURCE_NAME) logger.info("Adding CatalogSource") if not image: image = config.DEPLOYMENT.get("ocs_registry_image", "") if config.DEPLOYMENT.get("stage_rh_osbs"): image = config.DEPLOYMENT.get("stage_index_image", constants.OSBS_BOUNDLE_IMAGE) ocp_version = version.get_semantic_ocp_version_from_config() osbs_image_tag = config.DEPLOYMENT.get( "stage_index_image_tag", f"v{ocp_version}" ) image += f":{osbs_image_tag}" run_cmd( "oc patch image.config.openshift.io/cluster --type merge -p '" '{"spec": {"registrySources": {"insecureRegistries": ' '["registry-proxy.engineering.redhat.com", "registry.stage.redhat.io"]' "}}}'" ) run_cmd(f"oc apply -f {constants.STAGE_IMAGE_CONTENT_SOURCE_POLICY_YAML}") logger.info("Sleeping for 60 sec to start update machineconfigpool status") time.sleep(60) wait_for_machineconfigpool_status("all", timeout=1800) if not ignore_upgrade: upgrade = config.UPGRADE.get("upgrade", False) else: upgrade = False image_and_tag = image.rsplit(":", 1) image = image_and_tag[0] image_tag = image_and_tag[1] if len(image_and_tag) == 2 else None if not image_tag and config.REPORTING.get("us_ds") == "DS": image_tag = get_latest_ds_olm_tag( upgrade, latest_tag=config.DEPLOYMENT.get("default_latest_tag", "latest") ) catalog_source_data = templating.load_yaml(constants.CATALOG_SOURCE_YAML) if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM: create_ocs_secret(constants.MARKETPLACE_NAMESPACE) catalog_source_data["spec"]["secrets"] = [constants.OCS_SECRET] cs_name = constants.OPERATOR_CATALOG_SOURCE_NAME change_cs_condition = ( (image or image_tag) and catalog_source_data["kind"] == "CatalogSource" and catalog_source_data["metadata"]["name"] == cs_name ) if change_cs_condition: default_image = config.DEPLOYMENT["default_ocs_registry_image"] image = image if image else default_image.rsplit(":", 1)[0] catalog_source_data["spec"][ "image" ] = f"{image}:{image_tag if image_tag else 'latest'}" catalog_source_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="catalog_source_manifest", delete=False ) templating.dump_data_to_temp_yaml(catalog_source_data, catalog_source_manifest.name) run_cmd(f"oc apply -f {catalog_source_manifest.name}", timeout=2400) catalog_source = CatalogSource( resource_name=constants.OPERATOR_CATALOG_SOURCE_NAME, namespace=constants.MARKETPLACE_NAMESPACE, ) # Wait for catalog source is ready catalog_source.wait_for_state("READY")
def run_amq_benchmark(self, benchmark_pod_name="benchmark", kafka_namespace=constants.AMQ_NAMESPACE, tiller_namespace=AMQ_BENCHMARK_NAMESPACE, num_of_clients=8, worker=None, timeout=1800, amq_workload_yaml=None, run_in_bg=False): """ Run benchmark pod and get the results Args: benchmark_pod_name (str): Name of the benchmark pod kafka_namespace (str): Namespace where kafka cluster created tiller_namespace (str): Namespace where tiller pod needs to be created num_of_clients (int): Number of clients to be created worker (str) : Loads to create on workloads separated with commas e.g http://benchmark-worker-0.benchmark-worker:8080, http://benchmark-worker-1.benchmark-worker:8080 timeout (int): Time to complete the run amq_workload_yaml (dict): Contains amq workloads information keys and values :name (str): Name of the workloads :topics (int): Number of topics created :partitions_per_topic (int): Number of partitions per topic :message_size (int): Message size :payload_file (str): Load to run on workload :subscriptions_per_topic (int): Number of subscriptions per topic :consumer_per_subscription (int): Number of consumers per subscription :producers_per_topic (int): Number of producers per topic :producer_rate (int): Producer rate :consumer_backlog_sizegb (int): Size of block in gb :test_duration_minutes (int): Time to run the workloads run_in_bg (bool): On true the workload will run in background Return: result (str/Thread obj): Returns benchmark run information if run_in_bg is False. Otherwise a thread of the amq workload execution """ # Namespace for to helm/tiller try: self.create_namespace(tiller_namespace) except CommandFailed as ef: if f'project.project.openshift.io "{tiller_namespace}" already exists' not in str( ef): raise ef # Create rbac file try: sa_tiller = list( templating.load_yaml(constants.AMQ_RBAC_YAML, multi_document=True)) sa_tiller[0]["metadata"]["namespace"] = tiller_namespace sa_tiller[1]["subjects"][0]["namespace"] = tiller_namespace self.sa_tiller = OCS(**sa_tiller[0]) self.crb_tiller = OCS(**sa_tiller[1]) self.sa_tiller.create() self.crb_tiller.create() except (CommandFailed, CalledProcessError) as cf: log.error('Failed during creation of service account tiller') raise cf # Install helm cli (version v2.16.0 as we need tiller component) # And create tiller pods wget_cmd = f"wget -c --read-timeout=5 --tries=0 {URL}" untar_cmd = "tar -zxvf helm-v2.16.1-linux-amd64.tar.gz" tiller_cmd = ( f"linux-amd64/helm init --tiller-namespace {tiller_namespace}" f" --service-account {tiller_namespace}") exec_cmd(cmd=wget_cmd, cwd=self.dir) exec_cmd(cmd=untar_cmd, cwd=self.dir) exec_cmd(cmd=tiller_cmd, cwd=self.dir) # Validate tiller pod is running log.info("Waiting for 30s for tiller pod to come up") time.sleep(30) if self.is_amq_pod_running(pod_pattern="tiller", expected_pods=1, namespace=tiller_namespace): log.info("Tiller pod is running") else: raise ResourceWrongStatusException( "Tiller pod is not in running state") # Create benchmark pods log.info("Create benchmark pods") values = templating.load_yaml(constants.AMQ_BENCHMARK_VALUE_YAML) values["numWorkers"] = num_of_clients benchmark_cmd = ( f"linux-amd64/helm install {constants.AMQ_BENCHMARK_POD_YAML}" f" --name {benchmark_pod_name} --tiller-namespace {tiller_namespace}" ) exec_cmd(cmd=benchmark_cmd, cwd=self.dir) # Making sure the benchmark pod and clients are running if self.is_amq_pod_running(pod_pattern="benchmark", expected_pods=(1 + num_of_clients), namespace=tiller_namespace): log.info("All benchmark pod is up and running") else: raise ResourceWrongStatusException( "Benchmark pod is not getting to running state") # Update commonConfig with kafka-bootstrap server details driver_kafka = templating.load_yaml(constants.AMQ_DRIVER_KAFKA_YAML) driver_kafka['commonConfig'] = ( f'bootstrap.servers=my-cluster-kafka-bootstrap.{kafka_namespace}.svc.cluster.local:9092' ) json_file = f'{self.dir}/driver_kafka' templating.dump_data_to_json(driver_kafka, json_file) cmd = f'cp {json_file} {benchmark_pod_name}-driver:/' self.pod_obj.exec_oc_cmd(cmd) # Update the workload yaml if not amq_workload_yaml: amq_workload_yaml = templating.load_yaml( constants.AMQ_WORKLOAD_YAML) yaml_file = f'{self.dir}/amq_workload.yaml' templating.dump_data_to_temp_yaml(amq_workload_yaml, yaml_file) cmd = f'cp {yaml_file} {benchmark_pod_name}-driver:/' self.pod_obj.exec_oc_cmd(cmd) self.benchmark = True # Run the benchmark if worker: cmd = f"bin/benchmark --drivers /driver_kafka --workers {worker} /amq_workload.yaml" else: cmd = "bin/benchmark --drivers /driver_kafka /amq_workload.yaml" log.info( f"Run benchmark and running command {cmd} inside the benchmark pod " ) if run_in_bg: executor = ThreadPoolExecutor(1) result = executor.submit(self.run_amq_workload, cmd, benchmark_pod_name, tiller_namespace, timeout) return result pod_obj = get_pod_obj(name=f"{benchmark_pod_name}-driver", namespace=tiller_namespace) result = pod_obj.exec_cmd_on_pod(command=cmd, out_yaml_format=False, timeout=timeout) return result
def create_custom_machineset( role="app", instance_type=None, labels=None, taints=None, zone="a", ): """ Function to create custom machineset works only for AWS i.e. Using this user can create nodes with different instance type and role. https://docs.openshift.com/container-platform/4.1/machine_management/creating-machineset.html Args: role (str): Role type to be added for node eg: it will be app,worker instance_type (str): Type of instance labels (list): List of Labels (key, val) to be added to the node taints (list): List of taints to be applied zone (str): Machineset zone for node creation. Returns: machineset (str): Created machineset name Raise: ResourceNotFoundError: Incase machineset creation failed UnsupportedPlatformError: Incase of wrong platform """ # check for aws and IPI platform if config.ENV_DATA["platform"].lower() == "aws": machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE, ) m4_xlarge = "m4.xlarge" aws_instance = instance_type if instance_type else m4_xlarge for machine in machinesets_obj.get()["items"]: # Get inputs from existing machineset config. region = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("placement").get("region")) aws_zone = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("placement").get( "availabilityZone")) cls_id = (machine.get("spec").get("selector").get( "matchLabels").get("machine.openshift.io/cluster-api-cluster")) ami_id = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("ami").get("id")) if aws_zone == f"{region}{zone}": machineset_yaml = templating.load_yaml( constants.MACHINESET_YAML) # Update machineset_yaml with required values. machineset_yaml["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["metadata"][ "name"] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-role"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-type"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["ami"]["id"] = ami_id machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["iamInstanceProfile"][ "id"] = f"{cls_id}-worker-profile" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["instanceType"] = aws_instance machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["placement"]["availabilityZone"] = aws_zone machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["placement"]["region"] = region machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["securityGroups"][0]["filters"][0]["values"][ 0] = f"{cls_id}-worker-sg" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["subnet"]["filters"][0]["values"][ 0] = f"{cls_id}-private-{aws_zone}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["tags"][0][ "name"] = f"kubernetes.io/cluster/{cls_id}" # Apply the labels if labels: for label in labels: machineset_yaml["spec"]["template"]["spec"][ "metadata"]["labels"][label[0]] = label[1] # Remove app label in case of infra nodes if role == "infra": machineset_yaml["spec"]["template"]["spec"][ "metadata"]["labels"].pop(constants.APP_LABEL, None) # Apply the Taints # ex taint list looks like: # [ {'effect': 'NoSchedule', # 'key': 'node.ocs.openshift.io/storage', # 'value': 'true', # }, {'effect': 'Schedule', 'key': 'xyz', 'value': 'False'} ] if taints: machineset_yaml["spec"]["template"]["spec"].update( {"taints": taints}) # Create new custom machineset ms_obj = OCS(**machineset_yaml) ms_obj.create() if check_machineset_exists(f"{cls_id}-{role}-{aws_zone}"): log.info(f"Machineset {cls_id}-{role}-{aws_zone} created") return f"{cls_id}-{role}-{aws_zone}" else: raise ResourceNotFoundError( "Machineset resource not found") # check for azure and IPI platform elif config.ENV_DATA["platform"] == "azure": machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE, ) vmsize = constants.AZURE_PRODUCTION_INSTANCE_TYPE azure_instance = instance_type if instance_type else vmsize for machine in machinesets_obj.get()["items"]: # Get inputs from existing machineset config. region = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("location")) azure_zone = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("zone")) cls_id = (machine.get("spec").get("selector").get( "matchLabels").get("machine.openshift.io/cluster-api-cluster")) if azure_zone == zone: az_zone = f"{region}{zone}" machineset_yaml = templating.load_yaml( constants.MACHINESET_YAML_AZURE) # Update machineset_yaml with required values. machineset_yaml["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["metadata"][ "name"] = f"{cls_id}-{role}-{az_zone}" machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{az_zone}" machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-role"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-type"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{az_zone}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["image"][ "resourceID"] = f"/resourceGroups/{cls_id}-rg/providers/Microsoft.Compute/images/{cls_id}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["location"] = region machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["managedIdentity"] = f"{cls_id}-identity" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["resourceGroup"] = f"{cls_id}-rg" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["subnet"] = f"{cls_id}-worker-subnet" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["vmSize"] = azure_instance machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["vnet"] = f"{cls_id}-vnet" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["zone"] = zone # Apply the labels if labels: for label in labels: machineset_yaml["spec"]["template"]["spec"][ "metadata"]["labels"][label[0]] = label[1] # Remove app label in case of infra nodes if role == "infra": machineset_yaml["spec"]["template"]["spec"][ "metadata"]["labels"].pop(constants.APP_LABEL, None) if taints: machineset_yaml["spec"]["template"]["spec"].update( {"taints": taints}) # Create new custom machineset ms_obj = OCS(**machineset_yaml) ms_obj.create() if check_machineset_exists(f"{cls_id}-{role}-{az_zone}"): log.info(f"Machineset {cls_id}-{role}-{az_zone} created") return f"{cls_id}-{role}-{az_zone}" else: raise ResourceNotFoundError( "Machineset resource not found") # check for RHV and IPI platform elif config.ENV_DATA["platform"] == "rhv": machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE, ) for machine in machinesets_obj.get()["items"]: # Get inputs from existing machineset config. cls_uuid = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("cluster_id")) template_name = (machine.get("spec").get("template").get( "spec").get("providerSpec").get("value").get("template_name")) cls_id = (machine.get("spec").get("selector").get( "matchLabels").get("machine.openshift.io/cluster-api-cluster")) socket = (machine.get("spec").get("template").get("spec").get( "providerSpec").get("value").get("cpu").get("sockets")) machineset_yaml = templating.load_yaml( constants.MACHINESET_YAML_RHV) # Update machineset_yaml with required values. machineset_yaml["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["metadata"]["name"] = f"{cls_id}-{role}-{zone}" machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{zone}" machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-role"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-type"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}-{zone}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["cluster_id"] = cls_uuid machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["template_name"] = template_name machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["cpu"]["sockets"] = socket # Apply the labels if labels: for label in labels: machineset_yaml["spec"]["template"]["spec"]["metadata"][ "labels"][label[0]] = label[1] # Remove app label in case of infra nodes if role == "infra": machineset_yaml["spec"]["template"]["spec"]["metadata"][ "labels"].pop(constants.APP_LABEL, None) if taints: machineset_yaml["spec"]["template"]["spec"].update( {"taints": taints}) # Create new custom machineset ms_obj = OCS(**machineset_yaml) ms_obj.create() if check_machineset_exists(f"{cls_id}-{role}-{zone}"): log.info(f"Machineset {cls_id}-{role}-{zone} created") return f"{cls_id}-{role}-{zone}" else: raise ResourceNotFoundError("Machineset resource not found") # check for vmware and IPI platform elif config.ENV_DATA["platform"] == constants.VSPHERE_PLATFORM: machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE, ) for machine in machinesets_obj.get()["items"]: # Get inputs from existing machineset config. cls_id = machine.get("spec")["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-cluster"] disk_size = machine.get( "spec")["template"]["spec"]["providerSpec"]["value"]["diskGiB"] memory = machine.get("spec")["template"]["spec"]["providerSpec"][ "value"]["memoryMiB"] network_name = machine.get( "spec")["template"]["spec"]["providerSpec"]["value"][ "network"]["devices"][0]["networkName"] num_cpu = machine.get( "spec")["template"]["spec"]["providerSpec"]["value"]["numCPUs"] num_core = machine.get("spec")["template"]["spec"]["providerSpec"][ "value"]["numCoresPerSocket"] vm_template = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["template"] datacenter = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["workspace"]["datacenter"] datastore = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["workspace"]["datastore"] ds_folder = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["workspace"]["folder"] ds_resourcepool = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["workspace"]["resourcePool"] ds_server = machine.get("spec")["template"]["spec"][ "providerSpec"]["value"]["workspace"]["server"] machineset_yaml = templating.load_yaml( constants.MACHINESET_YAML_VMWARE) # Update machineset_yaml with required values. machineset_yaml["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["metadata"]["name"] = f"{cls_id}-{role}" machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["selector"]["matchLabels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}" machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-cluster"] = cls_id machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-role"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machine-type"] = role machineset_yaml["spec"]["template"]["metadata"]["labels"][ "machine.openshift.io/cluster-api-machineset"] = f"{cls_id}-{role}" machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["diskGiB"] = disk_size machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["memoryMiB"] = memory machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["network"]["devices"][0]["networkName"] = network_name machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["numCPUs"] = num_cpu machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["numCoresPerSocket"] = num_core machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["template"] = vm_template machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["workspace"]["datacenter"] = datacenter machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["workspace"]["datastore"] = datastore machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["workspace"]["folder"] = ds_folder machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["workspace"]["resourcepool"] = ds_resourcepool machineset_yaml["spec"]["template"]["spec"]["providerSpec"][ "value"]["workspace"]["server"] = ds_server # Apply the labels if labels: for label in labels: machineset_yaml["spec"]["template"]["spec"]["metadata"][ "labels"][label[0]] = label[1] # Remove app label in case of infra nodes if role == "infra": machineset_yaml["spec"]["template"]["spec"]["metadata"][ "labels"].pop(constants.APP_LABEL, None) if taints: machineset_yaml["spec"]["template"]["spec"].update( {"taints": taints}) # Create new custom machineset ms_obj = OCS(**machineset_yaml) ms_obj.create() if check_machineset_exists(f"{cls_id}-{role}"): log.info(f"Machineset {cls_id}-{role} created") return f"{cls_id}-{role}" else: raise ResourceNotFoundError("Machineset resource not found") else: raise UnsupportedPlatformError( "Functionality not supported in this platform")
def request_aws_credentials(self): """ Uses a CredentialsRequest CR to create an AWS IAM that allows the program to interact with S3 Returns: OCS: The CredentialsRequest resource """ awscreds_data = templating.load_yaml(constants.MCG_AWS_CREDS_YAML) req_name = create_unique_resource_name('awscredreq', 'credentialsrequests') awscreds_data['metadata']['name'] = req_name awscreds_data['metadata']['namespace'] = self.namespace awscreds_data['spec']['secretRef']['name'] = req_name awscreds_data['spec']['secretRef']['namespace'] = self.namespace creds_request = create_resource(**awscreds_data) sleep(5) secret_ocp_obj = OCP(kind='secret', namespace=self.namespace) try: cred_req_secret_dict = secret_ocp_obj.get( resource_name=creds_request.name, retry=5) except CommandFailed: logger.error('Failed to retrieve credentials request secret') raise CredReqSecretNotFound( 'Please make sure that the cluster used is an AWS cluster, ' 'or that the `platform` var in your config is correct.') aws_access_key_id = base64.b64decode( cred_req_secret_dict.get('data').get('aws_access_key_id')).decode( 'utf-8') aws_access_key = base64.b64decode( cred_req_secret_dict.get('data').get( 'aws_secret_access_key')).decode('utf-8') def _check_aws_credentials(): try: s3_res = boto3.resource( 's3', endpoint_url="https://s3.amazonaws.com", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_access_key) test_bucket = s3_res.create_bucket( Bucket=create_unique_resource_name('cred-verify', 's3-bucket')) test_bucket.delete() return True except ClientError: logger.info('Credentials are still not active. Retrying...') return False try: for api_test_result in TimeoutSampler(40, 5, _check_aws_credentials): if api_test_result: logger.info('AWS credentials created successfully.') break except TimeoutExpiredError: logger.error('Failed to create credentials') assert False return creds_request, aws_access_key_id, aws_access_key
def test_vdbench_workload(self, template, with_ocs, load, label_nodes, ripsaw, servers, threads, blocksize, fileio, samples, width, depth, files, file_size, runtime, pause): """ Run VDBench Workload Args : template (str) : Name of yaml file that will used as a template with_ocs (bool) : This parameter will indicate if the test will run on the same nodes as the OCS load (int) : load to run on the storage in percentage of the capacity. label_nodes (fixture) : This fixture is labeling the worker(s) that will used for App. pod(s) ripsaw (fixture) : Fixture to deploy the ripsaw benchmarking operator servers (int) : Number of servers (pods) that will run the IO threads (int) : Number of threads that will run on each server blocksize (list - str): List of BlockSize - must add the 'K' to it fileio (str) : How to select file for the IO : random / sequential samples (int) : Number of time(s) to run each test width (int) : Width of directory tree to create depth (int) : Depth of directory tree to create files (int) : Number of files to create in each directory file_size (int) : File size (in MB) to create runtime (int) : Time (in Sec.) for each test iteration pause (int) : Time (in Min.) to pause between each test iteration. """ log.info(f'going to use {template} as template') log.info("Apply Operator CRD") crd = 'resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml' ripsaw.apply_crd(crd) log.info('Running vdbench benchmark') if template: template = os.path.join(constants.TEMPLATE_VDBENCH_DIR, template) else: template = constants.VDBENCH_BENCHMARK_YAML sf_data = templating.load_yaml(template) target_results = template + 'Results' log.info('Calculating Storage size....') ceph_cluster = CephCluster() total_capacity = ceph_cluster.get_ceph_capacity() assert total_capacity > constants.VDBENCH_MIN_CAPACITY, ( "Storage capacity is too low for performance testing") log.info(f'The Total usable capacity is {total_capacity}') if load: width = constants.VDBENCH_WIDTH depth = constants.VDBENCH_DEPTH file_size = constants.VDBENCH_FILE_SIZE capacity_per_pod = constants.VDBENCH_CAP_PER_POD total_dirs = width**depth log.info(f'The total dirs in the tree {total_dirs}') log.info(f'Going to run with {load} % of the capacity load.') tested_capacity = round(total_capacity * 1024 * load / 100) log.info(f'Tested capacity is {tested_capacity} MB') servers = round(tested_capacity / capacity_per_pod) """ To spread the application pods evenly on all workers or application nodes and at least 2 app pods per node. """ nodes = len( node.get_typed_nodes(node_type=constants.WORKER_MACHINE)) if not with_ocs: nodes = len( machine.get_labeled_nodes( f'node-role.kubernetes.io/app={constants.APP_NODE_LABEL}' )) log.info(f'Going to use {nodes} nodes for the test !') servers = round(servers / nodes) * nodes if servers < (nodes * 2): servers = nodes * 2 files = round(tested_capacity / servers / total_dirs) total_files = round(files * servers * total_dirs) log.info(f'number of pods is {servers}') log.info(f'Going to create {total_files} files !') log.info(f'number of files in dir is {files}') """ Setting up the parameters for this test """ if servers: sf_data['spec']['workload']['args']['servers'] = servers target_results = target_results + '-' + str(servers) if threads: sf_data['spec']['workload']['args']['threads'] = threads target_results = target_results + '-' + str(threads) if fileio: sf_data['spec']['workload']['args']['fileio'] = fileio target_results = target_results + '-' + str(fileio) if samples: sf_data['spec']['workload']['args']['samples'] = samples target_results = target_results + '-' + str(samples) if width: sf_data['spec']['workload']['args']['width'] = width target_results = target_results + '-' + str(width) if depth: sf_data['spec']['workload']['args']['depth'] = depth target_results = target_results + '-' + str(depth) if files: sf_data['spec']['workload']['args']['files'] = files target_results = target_results + '-' + str(files) if file_size: sf_data['spec']['workload']['args']['file_size'] = file_size target_results = target_results + '-' + str(file_size) if runtime: sf_data['spec']['workload']['args']['runtime'] = runtime target_results = target_results + '-' + str(runtime) if pause: sf_data['spec']['workload']['args']['pause'] = pause target_results = target_results + '-' + str(pause) if len(blocksize) > 0: sf_data['spec']['workload']['args']['bs'] = blocksize target_results = target_results + '-' + '_'.join(blocksize) if with_ocs: if sf_data['spec']['workload']['args']['pin_server']: del sf_data['spec']['workload']['args']['pin_server'] """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ vol_size = int((files * total_dirs) * file_size * 1.3) log.info('number of files to create : {}'.format( int(files * (width**depth)))) log.info(f'The size of all files is : {vol_size}MB') vol_size = int(vol_size / 1024) if vol_size < 100: vol_size = 100 sf_data['spec']['workload']['args']['storagesize'] = f'{vol_size}Gi' log.debug(f'output of configuration file is {sf_data}') timeout = 86400 # 3600 (1H) * 24 (1D) = one days sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern, 'vdbench-client', 'my-ripsaw'): try: if bench_pod[0] is not None: vdbench_client_pod = bench_pod[0] break except IndexError: log.info('Benchmark client pod not ready yet') bench_pod = OCP(kind='pod', namespace='my-ripsaw') log.info('Waiting for VDBench benchmark to Run') assert bench_pod.wait_for_resource(condition=constants.STATUS_RUNNING, resource_name=vdbench_client_pod, sleep=30, timeout=600) start_time = time.time() while True: logs = bench_pod.exec_oc_cmd(f'logs {vdbench_client_pod}', out_yaml_format=False) if 'Test Run Finished' in logs: log.info('VdBench Benchmark Completed Successfully') break if timeout < (time.time() - start_time): raise TimeoutError( 'Timed out waiting for benchmark to complete') time.sleep(30) # Getting the results file from the benchmark pod and put it with the # test logs. # TODO: find the place of the actual test log and not in the parent # logs path target_results = '{}/{}.tgz'.format(ocsci_log_path(), target_results) pod_results = constants.VDBENCH_RESULTS_FILE retrive_files_from_pod(vdbench_client_pod, target_results, pod_results)
def create_storage_class(interface_type, interface_name, secret_name, reclaim_policy=constants.RECLAIM_POLICY_DELETE, sc_name=None, provisioner=None): """ Create a storage class Args: interface_type (str): The type of the interface (e.g. CephBlockPool, CephFileSystem) interface_name (str): The name of the interface secret_name (str): The name of the secret sc_name (str): The name of storage class to create reclaim_policy (str): Type of reclaim policy. Defaults to 'Delete' (eg., 'Delete', 'Retain') Returns: OCS: An OCS instance for the storage class """ sc_data = dict() if interface_type == constants.CEPHBLOCKPOOL: sc_data = templating.load_yaml(constants.CSI_RBD_STORAGECLASS_YAML) sc_data['parameters'][ 'csi.storage.k8s.io/node-stage-secret-name'] = secret_name sc_data['parameters'][ 'csi.storage.k8s.io/node-stage-secret-namespace'] = defaults.ROOK_CLUSTER_NAMESPACE interface = constants.RBD_INTERFACE sc_data['provisioner'] = (provisioner if provisioner else defaults.RBD_PROVISIONER) elif interface_type == constants.CEPHFILESYSTEM: sc_data = templating.load_yaml(constants.CSI_CEPHFS_STORAGECLASS_YAML) sc_data['parameters'][ 'csi.storage.k8s.io/node-stage-secret-name'] = secret_name sc_data['parameters'][ 'csi.storage.k8s.io/node-stage-secret-namespace'] = defaults.ROOK_CLUSTER_NAMESPACE interface = constants.CEPHFS_INTERFACE sc_data['parameters']['fsName'] = get_cephfs_name() sc_data['provisioner'] = (provisioner if provisioner else defaults.CEPHFS_PROVISIONER) sc_data['parameters']['pool'] = interface_name sc_data['metadata']['name'] = (sc_name if sc_name else create_unique_resource_name( f'test-{interface}', 'storageclass')) sc_data['metadata']['namespace'] = defaults.ROOK_CLUSTER_NAMESPACE sc_data['parameters'][ 'csi.storage.k8s.io/provisioner-secret-name'] = secret_name sc_data['parameters'][ 'csi.storage.k8s.io/provisioner-secret-namespace'] = defaults.ROOK_CLUSTER_NAMESPACE sc_data['parameters']['clusterID'] = defaults.ROOK_CLUSTER_NAMESPACE sc_data['reclaimPolicy'] = reclaim_policy try: del sc_data['parameters']['userid'] except KeyError: pass return create_resource(**sc_data)
def create_pod(interface_type=None, pvc_name=None, do_reload=True, namespace=defaults.ROOK_CLUSTER_NAMESPACE, node_name=None, pod_dict_path=None, sa_name=None, dc_deployment=False, raw_block_pv=False, raw_block_device=constants.RAW_BLOCK_DEVICE, replica_count=1): """ Create a pod Args: interface_type (str): The interface type (CephFS, RBD, etc.) pvc_name (str): The PVC that should be attached to the newly created pod do_reload (bool): True for reloading the object after creation, False otherwise namespace (str): The namespace for the new resource creation node_name (str): The name of specific node to schedule the pod pod_dict_path (str): YAML path for the pod sa_name (str): Serviceaccount name dc_deployment (bool): True if creating pod as deploymentconfig raw_block_pv (bool): True for creating raw block pv based pod, False otherwise raw_block_device (str): raw block device for the pod replica_count (int): Replica count for deployment config Returns: Pod: A Pod instance Raises: AssertionError: In case of any failure """ if interface_type == constants.CEPHBLOCKPOOL: pod_dict = pod_dict_path if pod_dict_path else constants.CSI_RBD_POD_YAML interface = constants.RBD_INTERFACE else: pod_dict = pod_dict_path if pod_dict_path else constants.CSI_CEPHFS_POD_YAML interface = constants.CEPHFS_INTERFACE if dc_deployment: pod_dict = pod_dict_path if pod_dict_path else constants.FEDORA_DC_YAML pod_data = templating.load_yaml(pod_dict) pod_name = create_unique_resource_name(f'test-{interface}', 'pod') pod_data['metadata']['name'] = pod_name pod_data['metadata']['namespace'] = namespace if dc_deployment: pod_data['metadata']['labels']['app'] = pod_name pod_data['spec']['template']['metadata']['labels']['name'] = pod_name pod_data['spec']['replicas'] = replica_count if pvc_name: if dc_deployment: pod_data['spec']['template']['spec']['volumes'][0][ 'persistentVolumeClaim']['claimName'] = pvc_name else: pod_data['spec']['volumes'][0]['persistentVolumeClaim'][ 'claimName'] = pvc_name if interface_type == constants.CEPHBLOCKPOOL and raw_block_pv: pod_data['spec']['containers'][0]['volumeDevices'][0][ 'devicePath'] = raw_block_device pod_data['spec']['containers'][0]['volumeDevices'][0][ 'name'] = pod_data.get('spec').get('volumes')[0].get('name') if node_name: pod_data['spec']['nodeName'] = node_name else: if 'nodeName' in pod_data.get('spec'): del pod_data['spec']['nodeName'] if sa_name and dc_deployment: pod_data['spec']['template']['spec']['serviceAccountName'] = sa_name if dc_deployment: ocs_obj = create_resource(**pod_data) logger.info(ocs_obj.name) assert (ocp.OCP(kind='pod', namespace=namespace)).wait_for_resource( condition=constants.STATUS_COMPLETED, resource_name=pod_name + '-1-deploy', resource_count=0, timeout=180, sleep=3) dpod_list = pod.get_all_pods(namespace=namespace) for dpod in dpod_list: if '-1-deploy' not in dpod.name: if pod_name in dpod.name: return dpod else: pod_obj = pod.Pod(**pod_data) pod_name = pod_data.get('metadata').get('name') logger.info(f'Creating new Pod {pod_name} for test') created_resource = pod_obj.create(do_reload=do_reload) assert created_resource, (f"Failed to create Pod {pod_name}") return pod_obj
def create_pgbench_benchmark( self, replicas, pgbench_name=None, postgres_name=None, clients=None, threads=None, transactions=None, scaling_factor=None, timeout=None, wait=True, ): """ Create pgbench benchmark pods Args: replicas (int): Number of pgbench pods to be deployed pgbench_name (str): Name of pgbench bechmark postgres_name (str): Name of postgres pod clients (int): Number of clients threads (int): Number of threads transactions (int): Number of transactions scaling_factor (int): scaling factor timeout (int): Time in seconds to wait wait (bool): On true waits till pgbench reaches Completed state Returns: List: pgbench pod objects list """ pg_obj_list = [] pgbench_name = pgbench_name if pgbench_name else "pgbench-benchmark" postgres_name = postgres_name if postgres_name else "postgres" for i in range(replicas): log.info("Create resource file for pgbench workload") pg_data = templating.load_yaml(constants.PGSQL_BENCHMARK_YAML) pg_data["metadata"]["name"] = f"{pgbench_name}" + f"{i}" pg_data["spec"]["workload"]["args"]["databases"][0]["host"] = ( f"{postgres_name}-" + f"{i}" + ".postgres") if clients is not None: pg_data["spec"]["workload"]["args"]["clients"][0] = clients if threads is not None: pg_data["spec"]["workload"]["args"]["threads"] = threads if transactions is not None: pg_data["spec"]["workload"]["args"][ "transactions"] = transactions if scaling_factor is not None: pg_data["spec"]["workload"]["args"][ "scaling_factor"] = scaling_factor pg_obj = OCS(**pg_data) pg_obj_list.append(pg_obj) pg_obj.create() if wait: # Confirm that expected pgbench pods are spinned log.info("Searching the pgbench pods by its name pattern") timeout = timeout if timeout else 300 for pgbench_pods in TimeoutSampler( timeout, replicas, get_pod_name_by_pattern, "pgbench-1-dbs-client", RIPSAW_NAMESPACE, ): try: if len(pgbench_pods) == replicas: log.info(f"Expected number of pgbench pods are " f"found: {replicas}") break except IndexError: log.info(f"Expected number of pgbench pods are {replicas} " f"but only found {len(pgbench_pods)}") return pg_obj_list
def attach_pgsql_pod_to_claim_pvc(self, pvc_objs, postgres_name, run_benchmark=True, pgbench_name=None): """ Attaches pgsql pod to created claim PVC Args: pvc_objs (list): List of PVC objs which needs to attached to pod postgres_name (str): Name of the postgres pod run_benchmark (bool): On true, runs pgbench benchmark on postgres pod pgbench_name (str): Name of pgbench benchmark Returns: pgsql_obj_list (list): List of pod objs created """ pgsql_obj_list = [] for pvc_obj in pvc_objs: try: pgsql_sset = templating.load_yaml( constants.PGSQL_STATEFULSET_YAML) del pgsql_sset["spec"]["volumeClaimTemplates"] pgsql_sset["metadata"]["name"] = (f"{postgres_name}" + f"{pvc_objs.index(pvc_obj)}") pgsql_sset["spec"]["template"]["spec"]["containers"][0][ "volumeMounts"][0]["name"] = pvc_obj.name pgsql_sset["spec"]["template"]["spec"]["volumes"] = [{ "name": f"{pvc_obj.name}", "persistentVolumeClaim": { "claimName": f"{pvc_obj.name}" }, }] pgsql_sset = OCS(**pgsql_sset) pgsql_sset.create() pgsql_obj_list.append(pgsql_sset) self.wait_for_postgres_status(status=constants.STATUS_RUNNING, timeout=300) if run_benchmark: pg_data = templating.load_yaml( constants.PGSQL_BENCHMARK_YAML) pg_data["metadata"]["name"] = ( f"{pgbench_name}" + f"{pvc_objs.index(pvc_obj)}" if pgbench_name else create_unique_resource_name("benchmark", "pgbench")) pg_data["spec"]["workload"]["args"]["databases"][0][ "host"] = (f"{postgres_name}" + f"{pvc_objs.index(pvc_obj)}-0" + ".postgres") pg_obj = OCS(**pg_data) pg_obj.create() pgsql_obj_list.append(pg_obj) wait_time = 120 log.info(f"Wait {wait_time} seconds before mounting pod") time.sleep(wait_time) except (CommandFailed, CalledProcessError) as cf: log.error("Failed during creation of postgres pod") raise cf if run_benchmark: log.info("Checking all pgbench benchmark reached Completed state") self.wait_for_pgbench_status(status=constants.STATUS_COMPLETED, timeout=1800) return pgsql_obj_list
def setup_cb(self): """ Creating admission parts,couchbase operator pod, couchbase worker secret """ # Create admission controller log.info("Create admission controller process for Couchbase") switch_to_project("default") self.up_adm_chk = OCP(namespace="default") self.up_check = OCP(namespace=constants.COUCHBASE_OPERATOR) for adm_yaml in self.admission_parts: adm_data = templating.load_yaml(adm_yaml) adm_obj = OCS(**adm_data) adm_obj.create() # Wait for admission pod to be created for adm_pod in TimeoutSampler( self.WAIT_FOR_TIME, 3, get_pod_name_by_pattern, "couchbase-operator-admission", "default", ): try: if self.is_up_and_running(adm_pod[0], self.up_adm_chk): self.admission_pod = adm_pod[0] break except IndexError: log.info("Admission pod is not ready yet") # Wait for admission pod to be running log.info("Waiting for admission pod to be running") self.pod_obj.wait_for_resource( condition="Running", resource_name=self.admission_pod, timeout=self.WAIT_FOR_TIME, sleep=10, ) self.ns_obj.new_project(constants.COUCHBASE_OPERATOR) couchbase_data = templating.load_yaml(constants.COUCHBASE_CRD_YAML) self.couchbase_obj = OCS(**couchbase_data) self.couchbase_obj.create() op_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_ROLE) self.operator_role = OCS(**op_data) self.operator_role.create() self.serviceaccount = OCP(namespace=constants.COUCHBASE_OPERATOR) self.serviceaccount.exec_oc_cmd( "create serviceaccount couchbase-operator") dockercfgs = self.serviceaccount.exec_oc_cmd("get secrets") startloc = dockercfgs.find("couchbase-operator-dockercfg") newdockerstr = dockercfgs[startloc:] endloc = newdockerstr.find(" ") dockerstr = newdockerstr[:endloc] self.secretsadder.exec_oc_cmd( f"secrets link serviceaccount/couchbase-operator secrets/{dockerstr}" ) self.rolebinding = OCP(namespace=constants.COUCHBASE_OPERATOR) rolebind_cmd = "".join([ "create rolebinding couchbase-operator-rolebinding ", "--role couchbase-operator ", "--serviceaccount couchbase-operator-namespace:couchbase-operator", ]) self.rolebinding.exec_oc_cmd(rolebind_cmd) dep_data = templating.load_yaml(constants.COUCHBASE_OPERATOR_DEPLOY) self.cb_deploy = OCS(**dep_data) self.cb_deploy.create() # Wait for couchbase operator pod to be running for couchbase_pod in TimeoutSampler( self.WAIT_FOR_TIME, 3, get_pod_name_by_pattern, "couchbase-operator", constants.COUCHBASE_OPERATOR, ): try: if self.is_up_and_running(couchbase_pod[0], self.up_check): break except IndexError: log.info("Couchbase operator is not up") cb_work = templating.load_yaml(constants.COUCHBASE_WORKER_SECRET) self.cb_worker = OCS(**cb_work) self.cb_worker.create()
def create_custom_machineset(role='app', instance_type='m4.xlarge', label='app-scale', zone='a'): """ Function to create custom machineset works only for AWS i.e. Using this user can create nodes with different instance type and role. https://docs.openshift.com/container-platform/4.1/machine_management/creating-machineset.html Args: role (str): Role type to be added for node eg: it will be app,worker instance_type (str): Type of aws instance label (str): Label to be added to the node zone (str): Machineset zone for node creation. Returns: machineset (str): Created machineset name Raise: ResourceNotFoundError: Incase machineset creation failed UnsupportedPlatformError: Incase of wrong platform """ # check for platform, since it's supported only for IPI if config.ENV_DATA['deployment_type'] == 'ipi': machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) for machine in machinesets_obj.get()['items']: # Get inputs from existing machineset config. region = machine.get('spec').get('template').get('spec').get( 'providerSpec').get('value').get('placement').get('region') aws_zone = machine.get('spec').get('template').get('spec').get( 'providerSpec').get('value').get('placement').get( 'availabilityZone') cls_id = machine.get('spec').get('selector').get( 'matchLabels').get('machine.openshift.io/cluster-api-cluster') ami_id = machine.get('spec').get('template').get('spec').get( 'providerSpec').get('value').get('ami').get('id') if aws_zone == f"{region}{zone}": machineset_yaml = templating.load_yaml( constants.MACHINESET_YAML) # Update machineset_yaml with required values. machineset_yaml['metadata']['labels'][ 'machine.openshift.io/cluster-api-cluster'] = cls_id machineset_yaml['metadata'][ 'name'] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml['spec']['selector']['matchLabels'][ 'machine.openshift.io/cluster-api-cluster'] = cls_id machineset_yaml['spec']['selector']['matchLabels'][ 'machine.openshift.io/cluster-api-machineset'] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml['spec']['template']['metadata']['labels'][ 'machine.openshift.io/cluster-api-cluster'] = cls_id machineset_yaml['spec']['template']['metadata']['labels'][ 'machine.openshift.io/cluster-api-machine-role'] = role machineset_yaml['spec']['template']['metadata']['labels'][ 'machine.openshift.io/cluster-api-machine-type'] = role machineset_yaml['spec']['template']['metadata']['labels'][ 'machine.openshift.io/cluster-api-machineset'] = f"{cls_id}-{role}-{aws_zone}" machineset_yaml['spec']['template']['spec']['metadata'][ 'labels'][f"node-role.kubernetes.io/{role}"] = f"{label}" machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['ami']['id'] = ami_id machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['iamInstanceProfile'][ 'id'] = f"{cls_id}-worker-profile" machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['instanceType'] = instance_type machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['placement']['availabilityZone'] = aws_zone machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['placement']['region'] = region machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['securityGroups'][0]['filters'][0]['values'][ 0] = f"{cls_id}-worker-sg" machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['subnet']['filters'][0]['values'][ 0] = f"{cls_id}-private-{aws_zone}" machineset_yaml['spec']['template']['spec']['providerSpec'][ 'value']['tags'][0][ 'name'] = f"kubernetes.io/cluster/{cls_id}" # Create new custom machineset ms_obj = OCS(**machineset_yaml) ms_obj.create() if check_machineset_exists(f"{cls_id}-{role}-{aws_zone}"): logging.info( f"Machineset {cls_id}-{role}-{aws_zone} created") return f"{cls_id}-{role}-{aws_zone}" else: raise ResourceNotFoundError( f"Machineset resource not found") else: raise UnsupportedPlatformError("Functionality not supported in UPI")
def deploy_ocs_via_operator(self): """ Method for deploy OCS via OCS operator """ ui_deployment = config.DEPLOYMENT.get('ui_deployment') if ui_deployment: self.create_operator_catalog_source() self.deployment_with_ui() # Skip the rest of the deployment when deploy via UI return else: logger.info("Deployment of OCS via OCS operator") self.label_and_taint_nodes() logger.info("Creating namespace and operator group.") run_cmd(f"oc create -f {constants.OLM_YAML}") self.create_operator_catalog_source() self.subscribe_ocs() package_manifest = PackageManifest( resource_name=defaults.OCS_OPERATOR_NAME) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name = package_manifest.get_current_csv(channel=channel) csv = CSV(resource_name=csv_name, namespace=self.namespace) csv.wait_for_phase("Succeeded", timeout=720) cluster_data = templating.load_yaml(constants.STORAGE_CLUSTER_YAML) cluster_data['metadata']['name'] = config.ENV_DATA[ 'storage_cluster_name'] deviceset_data = cluster_data['spec']['storageDeviceSets'][0] device_size = int( config.ENV_DATA.get('device_size', defaults.DEVICE_SIZE)) deviceset_data['dataPVCTemplate']['spec']['resources']['requests'][ 'storage'] = f"{device_size}Gi" # Allow lower instance requests and limits for OCS deployment if config.DEPLOYMENT.get('allow_lower_instance_requirements'): none_resources = {'Requests': None, 'Limits': None} deviceset_data["resources"] = deepcopy(none_resources) cluster_data['spec']['resources'] = { resource: deepcopy(none_resources) for resource in [ 'mon', 'mds', 'rgw', 'mgr', 'noobaa-core', 'noobaa-db', ] } if self.platform.lower() == constants.VSPHERE_PLATFORM: deviceset_data['dataPVCTemplate']['spec'][ 'storageClassName'] = constants.DEFAULT_SC_VSPHERE # Enable host network if enabled in config (this require all the # rules to be enabled on underlaying platform). if config.DEPLOYMENT.get('host_network'): cluster_data['spec']['hostNetwork'] = True cluster_data['spec']['storageDeviceSets'] = [deviceset_data] cluster_data_yaml = tempfile.NamedTemporaryFile( mode='w+', prefix='cluster_storage', delete=False) templating.dump_data_to_temp_yaml(cluster_data, cluster_data_yaml.name) run_cmd(f"oc create -f {cluster_data_yaml.name}", timeout=2400)
def test_pvc_snapshot_performance_multiple_files(self, ripsaw, file_size, files, threads, interface): """ Run SmallFile Workload and the take snapshot. test will run with 1M, 2M and 4M of file on the volume - total data set is the same for all tests, ~30GiB, and then take snapshot and measure the time it takes. the test will run 3 time to check consistency. Args: ripsaw : benchmark operator fixture which will run the workload file_size (int): the size of the file to be create - in KiB files (int): number of files each thread will create threads (int): number of threads will be used in the workload interface (str): the volume interface that will be used CephBlockPool / CephFileSystem Raises: TimeoutError : in case of creation files take too long time more then 2 Hours """ # Loading the main template yaml file for the benchmark and update some # fields with new values sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML) if interface == constants.CEPHBLOCKPOOL: storageclass = constants.DEFAULT_STORAGECLASS_RBD else: storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS log.info(f"Using {storageclass} Storageclass") # Setting up the parameters for this test sf_data["spec"]["workload"]["args"]["samples"] = 1 sf_data["spec"]["workload"]["args"]["operation"] = ["create"] sf_data["spec"]["workload"]["args"]["file_size"] = file_size sf_data["spec"]["workload"]["args"]["files"] = files sf_data["spec"]["workload"]["args"]["threads"] = threads sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass """ Calculating the size of the volume that need to be test, it should be at least twice in the size then the size of the files, and at least 100Gi. Since the file_size is in Kb and the vol_size need to be in Gb, more calculation is needed. """ total_files = int(files * threads) total_data = int(files * threads * file_size / constants.GB2KB) data_set = int(total_data * 3) # calculate data with replica vol_size = data_set if data_set >= 100 else 100 sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi" environment = get_environment_info() if not environment["user"] == "": sf_data["spec"]["test_user"] = environment["user"] else: # since full results object need this parameter, initialize it from CR file environment["user"] = sf_data["spec"]["test_user"] sf_data["spec"]["clustername"] = environment["clustername"] log.debug(f"The smallfile yaml file is {sf_data}") # Deploy the ripsaw operator log.info("Apply Operator CRD") ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml") all_results = [] for test_num in range(self.tests_numbers): # deploy the smallfile workload log.info("Running SmallFile bench") sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", constants.RIPSAW_NAMESPACE, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) for item in bench_pod.get()["items"][1]["spec"]["volumes"]: if "persistentVolumeClaim" in item: pvc_name = item["persistentVolumeClaim"]["claimName"] break log.info(f"Benchmark PVC name is : {pvc_name}") # Creation of 4M files on CephFS can take a lot of time timeout = 7200 while timeout >= 0: logs = bench_pod.get_logs(name=small_file_client_pod) if "RUN STATUS DONE" in logs: break timeout -= 30 if timeout == 0: raise TimeoutError( "Timed out waiting for benchmark to complete") time.sleep(30) log.info(f"Smallfile test ({test_num + 1}) finished.") snap_name = pvc_name.replace("claim", "snapshot-") log.info(f"Taking snapshot of the PVC {pvc_name}") log.info(f"Snapshot name : {snap_name}") creation_time = self.measure_create_snapshot_time( pvc_name=pvc_name, snap_name=snap_name, interface=interface) log.info(f"Snapshot creation time is {creation_time} seconds") all_results.append(creation_time) # Delete the smallfile workload log.info("Deleting the smallfile workload") if sf_obj.delete(wait=True): log.info("The smallfile workload was deleted successfully") # Delete VolumeSnapshots log.info("Deleting the snapshots") if self.snap_obj.delete(wait=True): log.info("The snapshot deleted successfully") log.info(f"Full test report for {interface}:") log.info(f"Test ran {self.tests_numbers} times, " f"All results are {all_results}") log.info( f"The average creation time is : {statistics.mean(all_results)}") log.info(f"Number of Files on the volume : {total_files:,}, " f"Total dataset : {int(data_set / 3)} GiB")
def prepare_disconnected_ocs_deployment(): """ Prepare disconnected ocs deployment: - get related images from OCS operator bundle csv - mirror related images to mirror registry - create imageContentSourcePolicy for the mirrored images - disable the default OperatorSources Returns: str: OCS registry image prepared for disconnected installation (with sha256 digest) or None (for live deployment) """ logger.info("Prepare for disconnected OCS installation") if config.DEPLOYMENT.get("live_deployment"): get_opm_tool() pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret") ocp_version = get_ocp_version() index_image = f"{config.DEPLOYMENT['cs_redhat_operators_image']}:v{ocp_version}" mirrored_index_image = ( f"{config.DEPLOYMENT['mirror_registry']}/{constants.MIRRORED_INDEX_IMAGE_NAMESPACE}/" f"{constants.MIRRORED_INDEX_IMAGE_NAME}:v{ocp_version}" ) # prune an index image logger.info( f"Prune index image {index_image} -> {mirrored_index_image} " f"(packages: {', '.join(constants.DISCON_CL_REQUIRED_PACKAGES)})" ) cmd = ( f"opm index prune -f {index_image} " f"-p {','.join(constants.DISCON_CL_REQUIRED_PACKAGES)} " f"-t {mirrored_index_image}" ) # opm tool doesn't have --atuhfile parameter, we have to suply auth # file through env variable os.environ["REGISTRY_AUTH_FILE"] = pull_secret_path exec_cmd(cmd) # login to mirror registry login_to_mirror_registry(pull_secret_path) # push pruned index image to mirror registry logger.info( f"Push pruned index image to mirror registry: {mirrored_index_image}" ) cmd = f"podman push --authfile {pull_secret_path} --tls-verify=false {mirrored_index_image}" exec_cmd(cmd) # mirror related images (this might take very long time) logger.info(f"Mirror images related to index image: {mirrored_index_image}") cmd = ( f"oc adm catalog mirror {mirrored_index_image} -a {pull_secret_path} --insecure " f"{config.DEPLOYMENT['mirror_registry']} --index-filter-by-os='.*'" ) oc_acm_result = exec_cmd(cmd, timeout=7200) for line in oc_acm_result.stdout.decode("utf-8").splitlines(): if "wrote mirroring manifests to" in line: break else: raise NotFoundError( "Manifests directory not printed to stdout of 'oc adm catalog mirror ...' command." ) mirroring_manifests_dir = line.replace("wrote mirroring manifests to ", "") logger.debug(f"Mirrored manifests directory: {mirroring_manifests_dir}") # create ImageContentSourcePolicy icsp_file = os.path.join( f"{mirroring_manifests_dir}", "imageContentSourcePolicy.yaml", ) exec_cmd(f"oc apply -f {icsp_file}") # Disable the default OperatorSources exec_cmd( """oc patch OperatorHub cluster --type json """ """-p '[{"op": "add", "path": "/spec/disableAllDefaultSources", "value": true}]'""" ) # create redhat-operators CatalogSource catalog_source_data = templating.load_yaml(constants.CATALOG_SOURCE_YAML) catalog_source_manifest = tempfile.NamedTemporaryFile( mode="w+", prefix="catalog_source_manifest", delete=False ) catalog_source_data["spec"]["image"] = f"{mirrored_index_image}" catalog_source_data["metadata"]["name"] = "redhat-operators" catalog_source_data["spec"]["displayName"] = "Red Hat Operators - Mirrored" templating.dump_data_to_temp_yaml( catalog_source_data, catalog_source_manifest.name ) exec_cmd(f"oc apply -f {catalog_source_manifest.name}") catalog_source = CatalogSource( resource_name="redhat-operators", namespace=constants.MARKETPLACE_NAMESPACE, ) # Wait for catalog source is ready catalog_source.wait_for_state("READY") return if config.DEPLOYMENT.get("stage_rh_osbs"): raise NotImplementedError( "Disconnected installation from stage is not implemented!" ) ocs_registry_image = config.DEPLOYMENT.get("ocs_registry_image", "") logger.debug(f"ocs-registry-image: {ocs_registry_image}") ocs_registry_image_and_tag = ocs_registry_image.split(":") ocs_registry_image = ocs_registry_image_and_tag[0] image_tag = ( ocs_registry_image_and_tag[1] if len(ocs_registry_image_and_tag) == 2 else None ) if not image_tag and config.REPORTING.get("us_ds") == "DS": image_tag = get_latest_ds_olm_tag( upgrade=False, latest_tag=config.DEPLOYMENT.get("default_latest_tag", "latest"), ) ocs_registry_image = f"{config.DEPLOYMENT['default_ocs_registry_image'].split(':')[0]}:{image_tag}" bundle_image = f"{constants.OCS_OPERATOR_BUNDLE_IMAGE}:{image_tag}" logger.debug(f"ocs-operator-bundle image: {bundle_image}") csv_yaml = get_csv_from_image(bundle_image) ocs_operator_image = ( csv_yaml.get("spec", {}) .get("install", {}) .get("spec", {}) .get("deployments", [{}])[0] .get("spec", {}) .get("template", {}) .get("spec", {}) .get("containers", [{}])[0] .get("image") ) logger.debug(f"ocs-operator-image: {ocs_operator_image}") # prepare list related images (bundle, registry and operator images and all # images from relatedImages section from csv) ocs_related_images = [] ocs_related_images.append(get_image_with_digest(bundle_image)) ocs_registry_image_with_digest = get_image_with_digest(ocs_registry_image) ocs_related_images.append(ocs_registry_image_with_digest) ocs_related_images.append(get_image_with_digest(ocs_operator_image)) ocs_related_images += [ image["image"] for image in csv_yaml.get("spec").get("relatedImages") ] logger.debug(f"OCS Related Images: {ocs_related_images}") mirror_registry = config.DEPLOYMENT["mirror_registry"] # prepare images mapping file for mirroring mapping_file_content = [ f"{image}={mirror_registry}{image[image.index('/'):image.index('@')]}\n" for image in ocs_related_images ] logger.debug(f"Mapping file content: {mapping_file_content}") name = "ocs-images" mapping_file = os.path.join(config.ENV_DATA["cluster_path"], f"{name}-mapping.txt") # write mapping file to disk with open(mapping_file, "w") as f: f.writelines(mapping_file_content) # prepare ImageContentSourcePolicy for OCS images with open(constants.TEMPLATE_IMAGE_CONTENT_SOURCE_POLICY_YAML) as f: ocs_icsp = yaml.safe_load(f) ocs_icsp["metadata"]["name"] = name ocs_icsp["spec"]["repositoryDigestMirrors"] = [] for image in ocs_related_images: ocs_icsp["spec"]["repositoryDigestMirrors"].append( { "mirrors": [ f"{mirror_registry}{image[image.index('/'):image.index('@')]}" ], "source": image[: image.index("@")], } ) logger.debug(f"OCS imageContentSourcePolicy: {yaml.safe_dump(ocs_icsp)}") ocs_icsp_file = os.path.join( config.ENV_DATA["cluster_path"], f"{name}-imageContentSourcePolicy.yaml" ) with open(ocs_icsp_file, "w+") as fs: yaml.safe_dump(ocs_icsp, fs) # create ImageContentSourcePolicy exec_cmd(f"oc apply -f {ocs_icsp_file}") # mirror images based on mapping file with prepare_customized_pull_secret(ocs_related_images) as authfile_fo: login_to_mirror_registry(authfile_fo.name) exec_cmd( f"oc image mirror --filter-by-os='.*' -f {mapping_file} --insecure " f"--registry-config={authfile_fo.name} --max-per-registry=2", timeout=3600, ) # Disable the default OperatorSources exec_cmd( """oc patch OperatorHub cluster --type json """ """-p '[{"op": "add", "path": "/spec/disableAllDefaultSources", "value": true}]'""" ) # wait for newly created imageContentSourcePolicy is applied on all nodes wait_for_machineconfigpool_status("all") return ocs_registry_image_with_digest