def _deploy_es(self): """ Deploying the Elasticsearch server """ # Creating PVC for the elasticsearch server and wait until it bound log.info("Creating 10 GiB PVC for the ElasticSearch cluster on") self.pvc_obj = create_pvc( sc_name=constants.CEPHBLOCKPOOL_SC, namespace=self.namespace, pvc_name="elasticsearch-data-quickstart-es-default-0", access_mode=constants.ACCESS_MODE_RWO, size="10Gi", ) wait_for_resource_state(self.pvc_obj, constants.STATUS_BOUND) self.pvc_obj.reload() log.info("Deploy the ElasticSearch cluster") self.ocp.apply(self.crd) sample = TimeoutSampler( timeout=300, sleep=10, func=self._pod_is_found, pattern="quickstart-es-default", ) if not sample.wait_for_func_status(True): raise Exception("The ElasticSearch pod deployment Failed") self.espod = get_pod_name_by_pattern("quickstart-es-default", self.namespace)[0] log.info(f"The ElasticSearch pod {self.espod} Started") es_pod = OCP(kind="pod", namespace=self.namespace) log.info("Waiting for ElasticSearch to Run") assert es_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=self.espod, sleep=30, timeout=600, ) log.info("Elastic Search is ready !!!")
def verify_storage_system(): """ Verify storage system status """ managed_service = (config.ENV_DATA["platform"].lower() in constants.MANAGED_SERVICE_PLATFORMS) ocs_version = version.get_semantic_ocs_version_from_config() if ocs_version >= version.VERSION_4_9 and not managed_service: log.info("Verifying storage system status") storage_system = OCP(kind=constants.STORAGESYSTEM, namespace=config.ENV_DATA["cluster_namespace"]) storage_system_data = storage_system.get() storage_system_status = {} for condition in storage_system_data["items"][0]["status"][ "conditions"]: storage_system_status[condition["type"]] = condition["status"] log.debug(f"storage system status: {storage_system_status}") assert storage_system_status == constants.STORAGE_SYSTEM_STATUS, ( f"Storage System status is not in expected state. Expected {constants.STORAGE_SYSTEM_STATUS}" f" but found {storage_system_status}")
def get_machinesets(): """ Get machine sets Returns: machine_sets (list): list of machine sets """ machine_sets = list() machinesets_obj = OCP( kind=constants.MACHINESETS, namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE ) for machine in machinesets_obj.get()["items"]: machine_sets.append( machine.get("spec") .get("selector") .get("matchLabels") .get("machine.openshift.io/cluster-api-machineset") ) return machine_sets
def get_all_pvcs_in_storageclass(storage_class): """ This function returen all the PVCs in a given storage class Args: storage_class (str): name of the storage class Returns: out: list of PVC objects """ ocp_pvc_obj = OCP(kind=constants.PVC) pvc_list = ocp_pvc_obj.get(all_namespaces=True)['items'] out = [] for pvc in pvc_list: pvc_obj = PVC(**pvc) if pvc_obj.backed_sc == storage_class: out.append(pvc_obj) return out
def taint_ocs_nodes(nodes_to_taint=None): """ Function to taint nodes with "node.ocs.openshift.io/storage=true:NoSchedule" Args: nodes_to_taint (list): Nodes to taint """ if not check_taint_on_ocs_nodes(): ocp = OCP() ocs_nodes = get_ocs_nodes() nodes_to_taint = nodes_to_taint if nodes_to_taint else ocs_nodes log.info(f"Taint nodes with taint: " f"{constants.OPERATOR_NODE_TAINT}") for node in nodes_to_taint: taint_cmd = f"adm taint nodes {node.name} {constants.OPERATOR_NODE_TAINT}" ocp.exec_oc_cmd(command=taint_cmd) else: log.info( f"One or more nodes already have taint {constants.OPERATOR_NODE_TAINT} " )
def __init__(self, **kwargs): """ Initializer function kwargs: Copy of ocs/defaults.py::<some pod> dictionary """ self.pod_data = kwargs super(Pod, self).__init__(**kwargs) self.temp_yaml = tempfile.NamedTemporaryFile(mode='w+', prefix='POD_', delete=False) self._name = self.pod_data.get('metadata').get('name') self._labels = self.get_labels() self._roles = [] self.ocp = OCP(api_version=defaults.API_VERSION, kind=constants.POD, namespace=self.namespace) self.fio_thread = None
def check_local_volume(): """ Function to check if Local-volume is present or not Returns: bool: True if LV present, False if LV not present """ if csv.get_csvs_start_with_prefix( csv_prefix=defaults.LOCAL_STORAGE_OPERATOR_NAME, namespace=config.ENV_DATA['local_storage_namespace']): ocp_obj = OCP() command = f"get localvolume local-block -n {config.ENV_DATA['local_storage_namespace']} " try: status = ocp_obj.exec_oc_cmd(command, out_yaml_format=False) except CommandFailed as ex: logger.debug(f"Local volume does not exists! Exception: {ex}") return False return "No resources found" not in status
def add_capacity_test(): osd_size = storage_cluster.get_osd_size() result = storage_cluster.add_capacity(osd_size) pod = OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) pod.wait_for_resource(timeout=300, condition=constants.STATUS_RUNNING, selector='app=rook-ceph-osd', resource_count=result * 3) # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061 # pod.wait_for_resource( # timeout=300, # condition=constants.STATUS_COMPLETED, # selector=constants.OSD_PREPARE_APP_LABEL, # resource_count=result * 3 # ) # Commented this lines as a workaround due to bug 1842500 ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'], tries=80)
def wait_for_osd_pods_to_be_running(self, storagedeviceset_count): """ The function gets the number of storage device set in the cluster, and wait for the osd pods to be in status running. Args: storagedeviceset_count (int): the number of storage device set in the cluster """ logging.info("starting function 'wait_for_osd_pods_to_be_running'") pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]) pod.wait_for_resource( timeout=420, condition=constants.STATUS_RUNNING, selector="app=rook-ceph-osd", resource_count=storagedeviceset_count * 3, ) self.new_pods_in_status_running = True
def add_storage_capacity(capacity, storagecluster_name, namespace=defaults.ROOK_CLUSTER_NAMESPACE): """ Add storage capacity to the cluster Args: capacity (str): Size of the storage storagecluster_name (str): Name of a storage cluster Returns: bool: True if commands executes successfully """ ocp = OCP(namespace=namespace) # ToDo Update patch command with pr https://github.com/red-hat-storage/ocs-ci/pull/803 cmd = f''' patch storagecluster/{storagecluster_name} --type='json' -p='[{{"op": "replace", "path": "/spec/storageDeviceSets/0/dataPVCTemplate/spec/resources/requests/storage", "value":{capacity}}}]' ''' ocp.exec_oc_cmd(cmd) return True
def get_node_index_in_local_block(node_name): """ Get the node index in the node values as it appears in the local block resource Args: node_name (str): The node name to search for his index Returns: int: The node index in the nodeSelector values """ ocp_lvs_obj = OCP( kind=constants.LOCAL_VOLUME_SET, namespace=defaults.LOCAL_STORAGE_NAMESPACE, resource_name=constants.LOCAL_BLOCK_RESOURCE, ) node_selector = ocp_lvs_obj.get().get("spec").get("nodeSelector") node_values = (node_selector.get("nodeSelectorTerms")[0].get( "matchExpressions")[0].get("values")) return node_values.index(node_name)
def add_capacity(count, storagecluster_name, namespace=defaults.ROOK_CLUSTER_NAMESPACE): """ Add capacity to the cluster Args: storagecluster_name (str): Name of a storage cluster count (int): Count of osds to add, for ex: if total count of osds is 3, it will add 3 osds more Returns: bool: True if commands executes successfully """ ocp = OCP(namespace=namespace) # ToDo Update patch command with pr https://github.com/red-hat-storage/ocs-ci/pull/803 cmd = f''' patch storagecluster/{storagecluster_name} --type='json' -p='[{{"op": "replace", "path": "/spec/storageDeviceSets/0/count", "value":{count}}}]' ''' ocp.exec_oc_cmd(cmd) return True
def scale_down_deployments(node_name): """ Scale down the deployments of a node as described in the documents of node replacement with LSO Args: node_name (str): The node name """ ocp = OCP(kind="node", namespace=defaults.ROOK_CLUSTER_NAMESPACE) pods_to_scale_down = get_node_pods_to_scale_down(node_name) for p in pods_to_scale_down: deployment_name = pod.get_deployment_name(p.name) log.info(f"Scale down deploymet {deployment_name}") ocp.exec_oc_cmd(f"scale deployment {deployment_name} --replicas=0") log.info("Scale down rook-ceph-crashcollector") ocp.exec_oc_cmd( f"scale deployment --selector=app=rook-ceph-crashcollector," f"node_name='{node_name}' --replicas=0")
def get_build_name_by_pattern(self, pattern="client", namespace=None, filter=None): """ Get build name by pattern Returns: list: build name """ ocp_obj = OCP(kind="Build", namespace=namespace) build_names = ocp_obj.exec_oc_cmd("get build -o name", out_yaml_format=False) build_names = build_names.split("\n") build_list = [] for name in build_names: if filter is not None and re.search(filter, name): log.info(f"build name filtered {name}") elif re.search(pattern, name): (_, name) = name.split("/") log.info(f"build name match found appending {name}") build_list.append(name) return build_list
def test_smallfile_workload(self, ripsaw): """ Run SmallFile Workload """ log.info("Apply Operator CRD") ripsaw.apply_crd('resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml') log.info("Running SmallFile bench") sf_data = templating.load_yaml_to_dict( constants.SMALLFILE_BENCHMARK_YAML) sf_obj = OCS(**sf_data) sf_obj.create() # wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler(40, 3, get_pod_name_by_pattern, 'smallfile-client', 'my-ripsaw'): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind='pod', namespace='my-ripsaw') log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource(condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600) start_time = time.time() timeout = 900 while True: logs = bench_pod.exec_oc_cmd(f'logs {small_file_client_pod}', out_yaml_format=False) if "RUN STATUS DONE" in logs: log.info("SmallFile Benchmark Completed Successfully") break if timeout < (time.time() - start_time): raise TimeoutError( f"Timed out waiting for benchmark to complete") time.sleep(30)
def setup(self, interface, pvc_factory, service_account_factory, teardown_factory): """ Create dc pod with replica 5 """ self.replica_count = 5 pvc_obj = pvc_factory(interface=interface, size=3) sa_obj = service_account_factory(project=pvc_obj.project) try: pod1 = create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, sa_name=sa_obj.name, dc_deployment=True, replica_count=self.replica_count, deploy_pod_status=constants.STATUS_RUNNING, ) except TimeoutExpiredError: # The test cannot be continued if all the pods are created on the same node pods = pod.get_all_pods(namespace=pvc_obj.namespace) pod_nodes = [pod.get_pod_node(pod_obj).name for pod_obj in pods] if set(pod_nodes) == 1: pytest.skip( "All pods are created on same node and reached Running state" ) raise self.name = pod1.labels["name"] self.namespace = pod1.namespace dc_obj = OCP( kind=constants.DEPLOYMENTCONFIG, namespace=self.namespace, resource_name=self.name, ) dc_info = dc_obj.get(resource_name=self.name, selector=f"app={self.name}")["items"][0] dc_obj = OCS(**dc_info) teardown_factory(dc_obj)
def create_configmap_cluster_monitoring_pod(sc_name): """ Create a configmap named cluster-monitoring-config and configure pvc on monitoring pod Args: sc_name (str): Name of the storage class """ logger.info("Creating configmap cluster-monitoring-config") config_map = templating.load_yaml_to_dict( constants.CONFIGURE_PVC_ON_MONITORING_POD ) config = yaml.safe_load(config_map['data']['config.yaml']) config['prometheusK8s']['volumeClaimTemplate']['spec']['storageClassName'] = sc_name config['alertmanagerMain']['volumeClaimTemplate']['spec']['storageClassName'] = sc_name config = yaml.dump(config) config_map['data']['config.yaml'] = config assert helpers.create_resource(**config_map, wait=False) ocp = OCP('v1', 'ConfigMap', 'openshift-monitoring') assert ocp.get(resource_name='cluster-monitoring-config') logger.info("Successfully created configmap cluster-monitoring-config")
def remove_nodes(nodes): """ Remove the nodes from cluster Args: nodes (list): list of node instances to remove from cluster """ ocp = OCP(kind="node") node_names = [node.get().get("metadata").get("name") for node in nodes] node_names_str = " ".join(node_names) # unschedule node unschedule_nodes(node_names) # Drain all the pods from the node drain_nodes(node_names) # delete the nodes log.info(f"Deleting nodes {node_names_str}") ocp.exec_oc_cmd(f"delete nodes {node_names_str}")
def get_all_pods(namespace=None, selector=None): """ Get all pods in a namespace. Args: namespace (str): Name of the namespace If namespace is None - get all pods selector (list) : List of the resource selector to search with. Example: ['alertmanager','prometheus'] Returns: list: List of Pod objects """ ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) pods = ocp_pod_obj.get()['items'] if selector: pods_new = [ pod for pod in pods if pod['metadata']['labels'].get('app') in selector ] pods = pods_new pod_objs = [Pod(**pod) for pod in pods] return pod_objs
def get_node_objs(node_names=None): """ Get node objects by node names Args: node_names (list): The node names to get their objects for. If None, will return all cluster nodes Returns: list: Cluster node OCP objects """ nodes_obj = OCP(kind='node') node_dicts = nodes_obj.get()['items'] if not node_names: return [OCS(**node_obj) for node_obj in node_dicts] else: return [ OCS(**node_obj) for node_obj in node_dicts if (node_obj.get('metadata').get('name') in node_names) ]
def refresh_connection(self): """ Login into OCP, refresh endpoint and token. """ ocp = OCP( kind=constants.ROUTE, namespace=defaults.OCS_MONITORING_NAMESPACE, threading_lock=self._threading_lock, ) kubeconfig = os.getenv("KUBECONFIG") kube_data = "" with open(kubeconfig, "r") as kube_file: kube_data = kube_file.readlines() login_ok = ocp.login(self._user, self._password) if not login_ok: raise AuthError("Login to OCP failed") self._token = ocp.get_user_token() with open(kubeconfig, "w") as kube_file: kube_file.writelines(kube_data) route_obj = ocp.get(resource_name=defaults.PROMETHEUS_ROUTE) self._endpoint = "https://" + route_obj["spec"]["host"]
def get_node_ip_addresses(ipkind): """ Gets a dictionary of required IP addresses for all nodes Args: ipkind: ExternalIP or InternalIP or Hostname Returns: dict: Internal or Exteranl IP addresses keyed off of node name """ ocp = OCP(kind=constants.NODE) masternodes = ocp.get(selector=constants.MASTER_LABEL).get("items") workernodes = ocp.get(selector=constants.WORKER_LABEL).get("items") nodes = masternodes + workernodes return { node["metadata"]["name"]: each["address"] for node in nodes for each in node["status"]["addresses"] if each["type"] == ipkind }
def __init__(self, namespace=None): self.namespace = ( namespace if namespace else config.ENV_DATA["cluster_namespace"] ) if storagecluster_independent_check(): sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RGW else: sc_name = constants.DEFAULT_STORAGECLASS_RGW self.storageclass = OCP( kind="storageclass", namespace=namespace, resource_name=sc_name ) self.s3_internal_endpoint = ( self.storageclass.get().get("parameters").get("endpoint") ) self.region = self.storageclass.get().get("parameters").get("region") # Todo: Implement retrieval in cases where CephObjectStoreUser is available self.key_id = None self.secret_key = None self.s3_resource = None
def check_all_obcs_status(namespace=None): """ Check all OBCs status in given namespace Args: namespace (str): Namespace where endpoint is running Returns: obc_bound_list: A list of all OBCs in Bound state """ all_obcs_in_namespace = (OCP(namespace=namespace, kind="ObjectBucketClaim").get().get("items")) obc_bound_list, obc_not_bound_list = ([] for i in range(2)) for obc in all_obcs_in_namespace: status = obc.get("status").get("phase") if status == constants.STATUS_BOUND: obc_bound_list.append(status) else: obc_not_bound_list.append(status) return obc_bound_list
def get_csi_provisioner_pod(interface): """ Get the provisioner pod based on interface Returns: Pod object: The provisioner pod object based on iterface """ ocp_pod_obj = OCP( kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace'] ) selector = 'app=csi-rbdplugin-provisioner' if ( interface == constants.CEPHBLOCKPOOL ) else 'app=csi-cephfsplugin-provisioner' provision_pod_items = ocp_pod_obj.get( selector=selector )['items'] assert provision_pod_items, f"No {interface} provisioner pod found" provisioner_pod = ( Pod(**provision_pod_items[0]).name, Pod(**provision_pod_items[1]).name ) return provisioner_pod
def get_machine_objs(machine_names=None): """ Get machine objects by machine names Args: machine_names (list): The machine names to get their objects If None, will return all cluster machines Returns: list: Cluster machine OCS objects """ machines_obj = OCP(kind="Machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) machine_dicts = machines_obj.get()["items"] if not machine_names: return [OCS(**obj) for obj in machine_dicts] else: return [ OCS(**obj) for obj in machine_dicts if (obj.get("metadata").get("name") in machine_names) ]
def test_add_capacity(self): """ Test to add variable capacity to the OSD cluster while IOs running """ osd_size = storage_cluster.get_osd_size() result = storage_cluster.add_capacity(osd_size) pod = OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) pod.wait_for_resource(timeout=300, condition=constants.STATUS_RUNNING, selector='app=rook-ceph-osd', resource_count=result * 3) # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061 pod.wait_for_resource(timeout=300, condition=constants.STATUS_COMPLETED, selector=constants.OSD_PREPARE_APP_LABEL, resource_count=result * 3) ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'], tries=80)
def check_pods_in_running_state(namespace=defaults.ROOK_CLUSTER_NAMESPACE): """ checks whether all the pods in a given namespace are in Running state or not Returns: Boolean: True, if all pods in Running state. False, otherwise """ ret_val = True list_of_pods = get_all_pods(namespace) ocp_pod_obj = OCP(kind=constants.POD, namespace=namespace) for p in list_of_pods: # we don't want to compare osd-prepare and canary pods as they get created freshly when an osd need to be added. if "rook-ceph-osd-prepare" not in p.name and "rook-ceph-drain-canary" not in p.name: status = ocp_pod_obj.get_resource(p.name, 'STATUS') if status not in "Running": logging.error( f"The pod {p.name} is in {status} state. Expected = Running" ) ret_val = False return ret_val
def run(self): """ Run the benchmark and wait until it completed """ # Create the benchmark object self.sf_obj = OCS(**self.crd_data) self.sf_obj.create() # Wait for benchmark pods to get created - takes a while for bench_pod in TimeoutSampler( 240, 10, get_pod_name_by_pattern, "smallfile-client", benchmark_operator.BMO_NAME, ): try: if bench_pod[0] is not None: small_file_client_pod = bench_pod[0] break except IndexError: log.info("Bench pod not ready yet") bench_pod = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) log.info("Waiting for SmallFile benchmark to Run") assert bench_pod.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=small_file_client_pod, sleep=30, timeout=600, ) log.info("The SmallFiles benchmark is running, wait for completion") bench_pod.wait_for_resource( condition=constants.STATUS_COMPLETED, resource_name=small_file_client_pod, timeout=18000, sleep=60, ) log.info("The SmallFiles benchmark is completed")
def setup(self): """ Setting up the environment for each performance and scale test Args: name (str): The test name that will use in the performance dashboard """ log.info("Setting up test environment") self.crd_data = None # place holder for Benchmark CDR data self.es = None # place holder for the incluster deployment elasticsearch self.es_backup = None # place holder for the elasticsearch backup self.main_es = None # place holder for the main elasticsearch object self.benchmark_obj = None # place holder for the benchmark object self.client_pod = None # Place holder for the client pod object self.dev_mode = config.RUN["cli_params"].get("dev_mode") self.pod_obj = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME) # Place holders for test results file (all sub-tests together) self.results_path = "" self.results_file = "" # Getting the full path for the test logs self.full_log_path = os.environ.get("PYTEST_CURRENT_TEST").split( "]")[0] self.full_log_path = self.full_log_path.replace("::", "/").replace("[", "-") log.info(f"Logs file path name is : {self.full_log_path}") # Collecting all Environment configuration Software & Hardware # for the performance report. self.environment = get_environment_info() self.environment["clusterID"] = get_running_cluster_id() self.ceph_cluster = CephCluster() self.used_capacity = self.get_cephfs_data() self.get_osd_info() self.get_node_info(node_type="master") self.get_node_info(node_type="worker")