Example #1
0
 def test_reclaim_policy_retain(self):
     """
     Calling functions for pvc invalid name and size
     """
     pvc_count = len(list_ceph_images(pool_name=self.cbp_obj.name))
     pvc_obj = helpers.create_pvc(
         sc_name=self.sc_obj_retain.name,
         pvc_name=helpers.create_unique_resource_name('retain', 'pvc'))
     helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND)
     pvc_obj.reload()
     pv_name = pvc_obj.get()['spec']['volumeName']
     pv_namespace = pvc_obj.get()['metadata']['namespace']
     pv_obj = ocp.OCP(kind='PersistentVolume', namespace=pv_namespace)
     assert pvc_obj.delete()
     pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name)
     assert pv_obj.get(pv_name).get('status').get('phase') == 'Released', (
         f"Status of PV {pv_obj.get(pv_name)} is not 'Released'")
     log.info("Status of PV is Released")
     assert pvc_count + 1 == len(
         list_ceph_images(pool_name=self.cbp_obj.name))
     assert pv_obj.delete(resource_name=pv_name)
Example #2
0
    def get_rhcos_workers(self):
        """
        Returns a list of rhcos worker names

        Returns:
            rhcos_workers (list): list of rhcos worker nodes

        """
        rhcos_workers = []
        ocp_obj = ocp.OCP(kind='node')
        node_info = ocp_obj.get()
        for each in node_info['items']:
            labels = each['metadata']['labels']
            if (
                labels['node.openshift.io/os_id'] == 'rhcos'
                and 'node-role.kubernetes.io/worker' in labels
            ):
                for every in each['status']['addresses']:
                    if every['type'] == 'Hostname':
                        rhcos_workers.append(every['address'])
        return rhcos_workers
Example #3
0
    def test_remove_mon_pod_from_cluster(self):
        """
        To remove mon pod from the cluster
        after the I/O is performed on the pool
        and waiting for the operator to create a
        new mon pod on its own

        """
        ceph_cluster = CephCluster()
        pods = ocp.OCP(kind=constants.POD,
                       namespace=config.ENV_DATA['cluster_namespace'])
        list_mons = ceph_cluster.get_mons_from_cluster()
        assert len(list_mons) > 1, pytest.skip(
            "INVALID: Mon count should be more than one to delete.")
        self.pool_obj = create_ceph_block_pool()
        assert run_io_on_pool(self.pool_obj), 'Failed to run I/O on the pool'
        assert delete_cephblockpools([self.pool_obj]), 'Failed to delete pool'
        ceph_cluster.cluster_health_check(timeout=0)
        ceph_cluster.remove_mon_from_cluster()
        assert verify_mon_pod_up(pods), "Mon pods are not up and running state"
        ceph_cluster.cluster_health_check(timeout=60)
Example #4
0
def add_role_to_user(role_type, user, cluster_role=False, namespace=None):
    """
    Function to add a cluster/regular role to user

    Args:
        role_type (str): Type of the role to be added
        user (str): User to be added for the role
        cluster_role (bool): Whether to add a cluster-role or a regular role
        namespace (str): Namespace to be used

    Raises:
        AssertionError: When failure in adding new role to user

    """
    ocp_obj = ocp.OCP()
    cluster = 'cluster-' if cluster_role else ''
    namespace = f'-n {namespace}' if namespace else ''
    role_cmd = (
        f"adm policy add-{cluster}role-to-user {role_type} {user} {namespace}")
    assert ocp_obj.exec_oc_cmd(command=role_cmd), 'Adding role failed'
    logger.info(f"Role_type {role_type} added to the user {user}")
Example #5
0
 def post_ocp_deploy(self):
     """
     Function does post OCP deployment stuff we need to do.
     """
     # Workaround for #1777384 - enable container_use_cephfs on RHEL workers
     # Ticket: RHSTOR-787, see more details in the issue: #1151
     logger.info("Running WA for ticket: RHSTOR-787")
     ocp_obj = ocp.OCP()
     cmd = ['/usr/sbin/setsebool -P container_use_cephfs on']
     workers = get_typed_worker_nodes(os_id="rhel")
     for worker in workers:
         cmd_list = cmd.copy()
         node = worker.get().get('metadata').get('name')
         logger.info(
             f"{node} is a RHEL based worker - applying '{cmd_list}'")
         # We saw few times there was an issue to spawn debug RHEL pod.
         # Let's use retry decorator to make sure our CI is more stable.
         retry(CommandFailed)(ocp_obj.exec_oc_debug_cmd)(node=node,
                                                         cmd_list=cmd_list)
     # end of workaround
     self.add_stage_cert()
Example #6
0
def get_all_storageclass():
    """
    Function for getting all storageclass excluding 'gp2' and 'flex'

    Returns:
         list: list of storageclass

    """
    sc_obj = ocp.OCP(
        kind=constants.STORAGECLASS,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE
    )
    result = sc_obj.get()
    sample = result['items']

    storageclass = [
        item for item in sample if (
            item.get('metadata').get('name') not in (constants.IGNORE_SC_GP2, constants.IGNORE_SC_FLEX)
        )
    ]
    return storageclass
Example #7
0
def validate_pv_delete(pv_name):
    """
    validates if pv is deleted after pvc deletion

    Args:
        pv_name (str): pv from pvc to validates
    Returns:
        bool: True if deletion is successful

    Raises:
        AssertionError: If pv is not deleted
    """
    ocp_pv_obj = ocp.OCP(kind=constants.PV,
                         namespace=defaults.ROOK_CLUSTER_NAMESPACE)

    try:
        if ocp_pv_obj.get(resource_name=pv_name):
            raise AssertionError

    except CommandFailed:
        return True
Example #8
0
def workload_stop_ceph_mgr():
    """
    Downscales Ceph Manager deployment, measures the time when it was
    downscaled and monitors alerts that were triggered during this event.

    Returns:
        dict: Contains information about `start` and `stop` time for stopping
            Ceph Manager pod.
    """
    oc = ocp.OCP(kind=constants.DEPLOYMENT,
                 namespace=config.ENV_DATA['cluster_namespace'])
    mgr_deployments = oc.get(selector=constants.MGR_APP_LABEL)['items']
    mgr = mgr_deployments[0]['metadata']['name']

    def stop_mgr():
        """
        Downscale Ceph Manager deployment for 6 minutes. First 5 minutes
        the alert should be in 'Pending'.
        After 5 minutes it should be 'Firing'.
        This configuration of monitoring can be observed in ceph-mixins which
        are used in the project:
            https://github.com/ceph/ceph-mixins/blob/d22afe8c0da34490cb77e52a202eefcf4f62a869/config.libsonnet#L25

        Returns:
            str: Name of downscaled deployment.
        """
        # run_time of operation
        run_time = 60 * 6
        nonlocal oc
        nonlocal mgr
        logger.info(f"Downscaling deployment {mgr} to 0")
        oc.exec_oc_cmd(f"scale --replicas=0 deployment/{mgr}")
        logger.info(f"Waiting for {run_time} seconds")
        time.sleep(run_time)
        return oc.get(mgr)

    measured_op = measure_operation(stop_mgr)
    logger.info(f"Upscaling deployment {mgr} back to 1")
    oc.exec_oc_cmd(f"scale --replicas=1 deployment/{mgr}")
    return measured_op
Example #9
0
def validate_pdb_creation():
    """
    Validate creation of PDBs for MON, MDS and OSD pods.

    Raises:
        AssertionError: If required PDBs were not created.

    """
    pdb_obj = ocp.OCP(kind="PodDisruptionBudget")
    item_list = pdb_obj.get().get("items")
    pdb_list = [item["metadata"]["name"] for item in item_list]
    osd_count = count_cluster_osd()
    pdb_required = [constants.MDS_PDB, constants.MON_PDB]
    for num in range(osd_count):
        pdb_required.append(constants.OSD_PDB + str(num))

    pdb_list.sort()
    pdb_required.sort()
    for required, given in zip(pdb_required, pdb_list):
        assert required == given, f"{required} was not created"

    logger.info(f"All required PDBs created: {pdb_required}")
Example #10
0
def modify_registry_pod_count(count):
    """
    Function to modify registry replica count(increase/decrease pod count)

    Args:
        count (int): registry replica count to be changed to

    Returns:
        bool: True in case if changes are applied. False otherwise

    Raises:
        TimeoutExpiredError: When number of image registry pods doesn't match the count

    """
    params = '{"spec":{"replicas":%d}}' % count
    ocp_obj = ocp.OCP(
        kind=constants.IMAGE_REGISTRY_CONFIG,
        namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE,
    )
    ocp_obj.patch(params=params, format_type="merge"), (
        "Failed to run patch command to increase number of image registry pod"
    )

    # Validate number of image registry pod should match the count
    for pod_list in TimeoutSampler(
        300,
        10,
        get_pod_name_by_pattern,
        "image-registry",
        constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE,
    ):
        try:
            if pod_list is not None and len(pod_list) == count + 1:
                return True
        except IndexError as ie:
            logger.error(
                f"Number of image registry pod doesn't match the count. Error: {ie}"
            )
            return False
Example #11
0
def remove_kmsid(kmsid):
    """
    This function will remove all the details for the given kmsid from the csi-kms-connection-details configmap

    Args:
        kmsid (str) : kmsid to be remove_kmsid

    Raises:
        KMSResourceCleaneupError: If the kmsid entry is not deleted

    """
    ocp_obj = ocp.OCP()
    patch = f'\'[{{"op": "remove", "path": "/data/{kmsid}"}}]\''
    patch_cmd = (
        f"patch -n {constants.OPENSHIFT_STORAGE_NAMESPACE} cm "
        f"{constants.VAULT_KMS_CSI_CONNECTION_DETAILS} --type json -p " + patch
    )
    ocp_obj.exec_oc_cmd(command=patch_cmd)
    kmsid_list = get_encryption_kmsid()
    if kmsid in kmsid_list:
        raise KMSResourceCleaneupError(f"KMS ID {kmsid} deletion failed")
    logger.info(f"KMS ID {kmsid} deleted")
Example #12
0
def remove_role_from_user(role_type, user, cluster_role=False, namespace=None):
    """
    Function to remove a cluster/regular role from a user

    Args:
        role_type (str): Type of the role to be removed
        user (str): User of the role
        cluster_role (bool): Whether to remove a cluster-role or a regular role
        namespace (str): Namespace to be used

    Raises:
        AssertionError: When failure in removing role from user

    """
    ocp_obj = ocp.OCP()
    cluster = "cluster-" if cluster_role else ""
    namespace = f"-n {namespace}" if namespace else ""
    role_cmd = (
        f"adm policy remove-{cluster}role-from-user {role_type} {user} {namespace}"
    )
    assert ocp_obj.exec_oc_cmd(command=role_cmd), "Removing role failed"
    logger.info(f"Role_type {role_type} removed from user {user}")
Example #13
0
def test_noobaa_service_mon_after_ocs_upgrade():
    """
    Verify 'noobaa-service-monitor' does not exist after OCS upgrade.

    Test Procedure:
    1.Upgrade OCS version
    2.Check servicemonitors
    3.Verify 'noobaa-service-monitor' does not exist

    """
    ocs_version = version.get_ocs_version_from_csv(only_major_minor=False,
                                                   ignore_pre_release=True)
    if ocs_version <= version.get_semantic_version("4.7.4"):
        pytest.skip("The test is not supported on version less than 4.7.4")
    ocp_obj = ocp.OCP(kind=constants.SERVICE_MONITORS,
                      namespace=defaults.ROOK_CLUSTER_NAMESPACE)
    servicemon = ocp_obj.get()
    servicemonitors = servicemon["items"]
    for servicemonitor in servicemonitors:
        assert (servicemonitor["metadata"]["name"] !=
                "noobaa-service-monitor"), "noobaa-service-monitor exist"
    log.info("noobaa-service-monitor does not exist")
Example #14
0
def cluster(request, log_cli_level):
    """
    This fixture initiates deployment for both OCP and OCS clusters.
    Specific platform deployment classes will handle the fine details
    of action
    """
    log.info(f"All logs located at {ocsci_log_path()}")

    teardown = config.RUN['cli_params']['teardown']
    deploy = config.RUN['cli_params']['deploy']
    factory = dep_factory.DeploymentFactory()
    deployer = factory.get_deployment()

    # Add a finalizer to teardown the cluster after test execution is finished
    if teardown:

        def cluster_teardown_finalizer():
            deployer.destroy_cluster(log_cli_level)

        request.addfinalizer(cluster_teardown_finalizer)
        log.info("Will teardown cluster because --teardown was provided")

    # Download client
    force_download = (config.RUN['cli_params'].get('deploy')
                      and config.DEPLOYMENT['force_download_client'])
    get_openshift_client(force_download=force_download)

    if deploy:
        # Deploy cluster
        deployer.deploy_cluster(log_cli_level)
        # Workaround for #1777384 - enable container_use_cephfs on RHEL workers
        ocp_obj = ocp.OCP()
        cmd = ['/usr/sbin/setsebool -P container_use_cephfs on']
        workers = get_typed_worker_nodes(os_id="rhel")
        for worker in workers:
            cmd_list = cmd.copy()
            node = worker.get().get('metadata').get('name')
            log.info(f"{node} is a RHEL based worker - applying '{cmd_list}'")
            ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=cmd_list)
Example #15
0
def update_pull_secret():
    """
    Update pull secret with extra quay.io/rhceph-dev credentials.

    Note: This is a hack done to allow odf to odf deployment before full addon is available.
    """
    oc = ocp.OCP(kind=constants.SECRET, namespace="openshift-config")
    logger.info("Update pull secret")
    pull_secret = oc.exec_oc_cmd(
        "get -n openshift-config secret/pull-secret -o yaml")
    secret_data = pull_secret["data"][".dockerconfigjson"]
    secret_data = base64.b64decode(secret_data).decode()
    rhceph_dev_key = config.AUTH["quay-rhceph-dev-auth"]
    secret_data = json.loads(secret_data)
    secret_data["quay.io/rhceph-dev"] = {"auth": rhceph_dev_key, "email": ""}
    secret_data = str.encode(json.dumps(secret_data))
    with tempfile.NamedTemporaryFile() as secret_file:
        secret_file.write(secret_data)
        secret_file.flush()
        exec_cmd(
            f"oc set data secret/pull-secret -n openshift-config --from-file=.dockerconfigjson={secret_file.name}"
        )
    def test_monitoring_after_restarting_prometheus_pod(self, pods):
        """
        Test case to validate prometheus pod restart
        should not have any functional impact

        """

        # Get the prometheus pod
        prometheus_pod_obj = pod.get_all_pods(
            namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus"]
        )

        for pod_object in prometheus_pod_obj:
            # Get the pvc which mounted on prometheus pod
            pod_info = pod_object.get()
            pvc_name = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"][
                "claimName"
            ]

            # Restart the prometheus pod
            pod_object.delete(force=True)
            pod_obj = ocp.OCP(
                kind=constants.POD, namespace=defaults.OCS_MONITORING_NAMESPACE
            )
            assert pod_obj.wait_for_resource(
                condition="Running", selector="app=prometheus", timeout=60
            )

            # Check the same pvc is mounted on new pod
            pod_info = pod_object.get()
            assert (
                pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"]
                in pvc_name
            ), f"Old pvc not found after restarting the prometheus pod {pod_object.name}"

        for pod_obj in pods:
            assert check_pvcdata_collected_on_prometheus(
                pod_obj.pvc.name
            ), f"On prometheus pod for created pvc {pod_obj.pvc.name} related data is not collected"
Example #17
0
File: node.py Project: leseb/ocs-ci
def delete_and_create_osd_node_aws_ipi(osd_node_name):
    """
    Unschedule, drain and delete osd node, and creating a new osd node.
    At the end of the function there should be the same number of osd nodes as
    it was in the beginning, and also ceph health should be OK.
    This function is for AWS IPI.

    Args:
        osd_node_name (str): the name of the osd node

    """
    # Unscheduling node
    unschedule_nodes([osd_node_name])
    # Draining Node
    drain_nodes([osd_node_name])
    log.info("Getting machine name from specified node name")
    machine_name = machine.get_machine_from_node_name(osd_node_name)
    log.info(f"Node {osd_node_name} associated machine is {machine_name}")
    log.info(
        f"Deleting machine {machine_name} and waiting for new machine to come up"
    )
    machine.delete_machine_and_check_state_of_new_spinned_machine(machine_name)
    new_machine_list = machine.get_machines()
    for machines in new_machine_list:
        # Trimming is done to get just machine name
        # eg:- machine_name:- prsurve-40-ocs-43-kbrvf-worker-us-east-2b-nlgkr
        # After trimming:- prsurve-40-ocs-43-kbrvf-worker-us-east-2b
        if re.match(machines.name[:-6], machine_name):
            new_machine_name = machines.name
    machineset_name = machine.get_machineset_from_machine_name(
        new_machine_name)
    log.info("Waiting for new worker node to be in ready state")
    machine.wait_for_new_node_to_be_ready(machineset_name)
    new_node_name = get_node_from_machine_name(new_machine_name)
    log.info("Adding ocs label to newly created worker node")
    node_obj = ocp.OCP(kind='node')
    node_obj.add_label(resource_name=new_node_name,
                       label=constants.OPERATOR_NODE_LABEL)
    log.info(f"Successfully labeled {new_node_name} with OCS storage label")
Example #18
0
def svt_cleanup():
    """
    Removes clonned SVT project and virtual environemt and Projects
    Created while running SVT

    Raises:
        BaseException: In case any erros occured while removing project and ENV.

    Returns:
        bool: True if No exceptions, False otherwise

    """
    ns_obj = ocp.OCP(kind="namespace")
    try:
        shutil.rmtree("/tmp/svt")
        shutil.rmtree("/tmp/venv")
    except BaseException:
        log.error("Error while cleaning SVT project")

    try:
        project_list = [
            "cakephp-mysql0",
            "dancer-mysql0",
            "django-postgresql0",
            "eap64-mysql0",
            "nodejs-mongodb0",
            "rails-postgresql0",
            "tomcat8-mongodb0",
        ]
        # Reset namespace to default
        ocp.switch_to_default_rook_cluster_project()
        for project in project_list:
            run_cmd(f"oc delete project {project}")
            ns_obj.wait_for_delete(resource_name=project)

        return True
    except Exception:
        return False
Example #19
0
def get_node_resource_utilization_from_adm_top(nodename=None,
                                               node_type='worker'):
    """
    Gets the node's cpu and memory utilization in percentage using adm top command.

    Args:
        nodename (str) : The node name
        node_type (str) : The node type (e.g. master, worker)

    Returns:
        dict : Node name and its cpu and memory utilization in
               percentage

    """

    node_names = [nodename] if nodename else [
        node.name for node in get_typed_nodes(node_type=node_type)
    ]
    obj = ocp.OCP()
    resource_utilization_all_nodes = obj.exec_oc_cmd(
        command='adm top nodes', out_yaml_format=False).split("\n")
    utilization_dict = {}

    for node in node_names:
        for value in resource_utilization_all_nodes:
            if node in value:
                value = re.findall(r'\d+', value.strip())
                cpu_utilization = value[2]
                log.info("The CPU utilized by the node "
                         f"{node} is {cpu_utilization}%")
                memory_utilization = value[4]
                log.info("The memory utilized of the node "
                         f"{node} is {memory_utilization}%")
                utilization_dict[node] = {
                    'cpu': int(cpu_utilization),
                    'memory': int(memory_utilization)
                }
    return utilization_dict
Example #20
0
def check_mirroring_status_ok(replaying_images=None):
    """
    Check if mirroring status has health OK and expected number of replaying images

    Args:
        replaying_images (int): Expected number of images in replaying state

    Returns:
        bool: True if status contains expected health and states values, False otherwise

    """
    cbp_obj = ocp.OCP(
        kind=constants.CEPHBLOCKPOOL,
        resource_name=constants.DEFAULT_CEPHBLOCKPOOL,
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    mirroring_status = cbp_obj.get().get("status").get("mirroringStatus").get("summary")
    logger.info(f"Mirroring status: {mirroring_status}")
    keys_to_check = ["health", "daemon_health", "image_health", "states"]
    for key in keys_to_check:
        if key != "states":
            expected_value = "OK"
            current_value = mirroring_status.get(key)
        elif key == "states" and replaying_images:
            # Replaying images count can be higher due to presence of dummy images
            # There can be upto 2 dummy images in each ODF cluster
            expected_value = range(replaying_images, replaying_images + 3)
            current_value = mirroring_status.get("states").get("replaying")
        else:
            continue

        if current_value not in expected_value:
            logger.warning(
                f"Unexpected {key} status. Current status is {current_value} but expected {expected_value}"
            )
            return False

    return True
Example #21
0
def validate_cephfilesystem(fs_name):
    """
     Verify CephFileSystem exists at ceph and k8s

     Args:
        fs_name (str): The name of the Ceph FileSystem

     Returns:
         bool: True if CephFileSystem is created at ceph and k8s side else
            will return False with valid msg i.e Failure cause
    """
    CFS = ocp.OCP(
        kind=constants.CEPHFILESYSTEM,
        namespace=defaults.ROOK_CLUSTER_NAMESPACE
    )
    ct_pod = pod.get_ceph_tools_pod()
    ceph_validate = False
    k8s_validate = False
    cmd = "ceph fs ls"
    logger.info(fs_name)
    out = ct_pod.exec_ceph_cmd(ceph_cmd=cmd)
    if out:
        out = out[0]['name']
        logger.info(out)
        if out == fs_name:
            logger.info("FileSystem got created from Ceph Side")
            ceph_validate = True
        else:
            logger.error("FileSystem was not present at Ceph Side")
            return False
    result = CFS.get(resource_name=fs_name)
    if result['metadata']['name']:
        logger.info(f"Filesystem got created from kubernetes Side")
        k8s_validate = True
    else:
        logger.error("Filesystem was not create at Kubernetes Side")
        return False
    return True if (ceph_validate and k8s_validate) else False
Example #22
0
def wait_for_job_completion(namespace, timeout, error_msg):
    """
    This is a WORKAROUND of particular ocsci design choices: I just wait
    for one pod in the namespace, and then ask for the pod again to get
    it's name (but it would be much better to just wait for the job to
    finish instead, then ask for a name of the successful pod and use it
    to get logs ...)

    Returns:
        str: name of Pod resource of the finished job

    """
    ocp_pod = ocp.OCP(kind="Pod", namespace=namespace)
    try:
        ocp_pod.wait_for_resource(resource_count=1,
                                  condition=constants.STATUS_COMPLETED,
                                  timeout=timeout,
                                  sleep=30)
    except TimeoutExpiredError as ex:
        # report some high level error as well
        logger.error(error_msg)
        # TODO: log both describe and the output from the fio pods, as DEBUG
        ex.message = error_msg
        raise (ex)

    # indentify pod of the completed job
    pod_data = ocp_pod.get()
    # explicit list of assumptions, if these assumptions are not met, the
    # code won't work and it either means that something went terrible
    # wrong or that the code needs to be changed
    assert pod_data['kind'] == "List"
    pod_dict = pod_data['items'][0]
    assert pod_dict['kind'] == "Pod"
    pod_name = pod_dict['metadata']['name']
    logger.info(f"Identified pod name of the finished Job: {pod_name}")
    pod_name = pod_dict['metadata']['name']

    return pod_name
Example #23
0
def add_new_node_and_label_it(machineset_name):
    """
    Add a new node and label it

    Args:
        machineset_name (str): Name of the machine set
    eg: add_new_node_and_label_it("new-tdesala-zlqzn-worker-us-east-2a")
    """
    # Get the initial nodes list
    initial_nodes = tests.helpers.get_worker_nodes()
    log.info(f"Current available worker nodes are {initial_nodes}")

    # get machineset replica count
    machineset_replica_count = machine.get_replica_count(machineset_name)
    log.info(
        f"{machineset_name} has replica count: {machineset_replica_count}")

    # Increase its replica count
    log.info("Increasing the replica count by 1")
    machine.add_node(machineset_name, count=machineset_replica_count + 1)
    log.info(f"{machineset_name} now has replica "
             f"count: {machineset_replica_count + 1}")

    # wait for the new node to come to ready state
    log.info("Waiting for the new node to be in ready state")
    machine.wait_for_new_node_to_be_ready(machineset_name)

    # Get the node name of new spun node
    nodes_after_new_spun_node = tests.helpers.get_worker_nodes()
    new_spun_node = list(set(nodes_after_new_spun_node) - set(initial_nodes))
    log.info(f"New spun node is {new_spun_node}")

    # Label it
    node_obj = ocp.OCP(kind='node')
    node_obj.add_label(resource_name=new_spun_node[0],
                       label=constants.OPERATOR_NODE_LABEL)
    log.info(f"Successfully labeled {new_spun_node} with OCS storage label")
    return new_spun_node[0]
Example #24
0
def rsync_kubeconf_to_node(node):
    """
    Function to copy kubeconfig to OCP node

    Args:
        node (str): OCP node to copy kubeconfig if not present

    """
    # ocp_obj = ocp.OCP()
    filename = os.path.join(config.ENV_DATA['cluster_path'],
                            config.RUN['kubeconfig_location'])
    file_path = os.path.dirname(filename)
    master_list = get_master_nodes()
    ocp_obj = ocp.OCP()
    check_auth = 'auth'
    check_conf = 'kubeconfig'
    node_path = '/home/core/'
    if check_auth not in ocp_obj.exec_oc_debug_cmd(
            node=master_list[0], cmd_list=[f"ls {node_path}"]):
        ocp.rsync(src=file_path, dst=f"{node_path}", node=node, dst_node=True)
    elif check_conf not in ocp_obj.exec_oc_debug_cmd(
            node=master_list[0], cmd_list=[f"ls {node_path}auth"]):
        ocp.rsync(src=file_path, dst=f"{node_path}", node=node, dst_node=True)
Example #25
0
    def factory(db2u_project_name, ldap_r_n, ldap_r_p, db2u_r_n, db2u_r_p):
        """
        Args:
            db2u_project_name (str): Name of the db2u project.
            ldap_r_n (str): LDAP release name.
            ldap_r_p (str): LDAP release password.
            db2u_r_n (str): DB2U release name
            db2u_r_p (str): DB2U release name
        """
        db2u_project.append(db2u_project_name)
        ocp_proj.append(ocp.OCP(namespace=db2u_project_name))
        temp_ldap_r_n.append(ldap_r_n)
        temp_db2u_r_n.append(db2u_r_n)

        log.info("Creating LDAP secrets")
        ocp_proj[0].exec_oc_cmd(
            command=f"create secret generic {ldap_r_n}-db2u-ldap-bluadmin "
            f"--from-literal=password={ldap_r_p}")

        log.info("Creating DB2U secrets")
        ocp_proj[0].exec_oc_cmd(
            command=f"create secret generic {db2u_r_n}-db2u-instance "
            f"--from-literal=password={db2u_r_p}")
Example #26
0
 def run_memory_leak_in_bg():
     """
     Function to run memory leak in background thread
     Memory leak data is written in below format
     date time PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
     """
     oc = ocp.OCP(namespace=config.ENV_DATA['cluster_namespace'])
     while get_flag_status() == 'running':
         for worker in helpers.get_worker_nodes():
             filename = f"/tmp/{worker}-top-output.txt"
             top_cmd = f"debug nodes/{worker} -- chroot /host top -n 2 b"
             with open("/tmp/file.txt", "w+") as temp:
                 temp.write(
                     str(
                         oc.exec_oc_cmd(command=top_cmd,
                                        out_yaml_format=False)))
                 temp.seek(0)
                 for line in temp:
                     if line.__contains__("ceph-osd"):
                         with open(filename, "a+") as f:
                             f.write(str(datetime.now()))
                             f.write(' ')
                             f.write(line)
Example #27
0
def check_vr_state(state, namespace):
    """
    Check if all VR in the given namespace are in expected state

    Args:
        state (str): The VR state to check for (e.g. 'primary', 'secondary')
        namespace (str): the namespace of the VR resources

    Returns:
        bool: True if all VR are in expected state, False otherwise

    """
    vr_obj = ocp.OCP(kind=constants.VOLUME_REPLICATION, namespace=namespace)
    vr_list = get_all_vrs(namespace)

    vr_state_mismatch = []
    for vr in vr_list:
        desired_state = vr_obj.get(vr).get("spec").get("replicationState")
        current_state = vr_obj.get(vr).get("status").get("state")
        logger.info(
            f"VR: {vr} desired state is {desired_state}, current state is {current_state}"
        )

        if not (
            state.lower() == desired_state.lower()
            and state.lower() == current_state.lower()
        ):
            vr_state_mismatch.append(vr)

    if not vr_state_mismatch:
        logger.info(f"All {len(vr_list)} VR are in expected state {state}")
        return True
    else:
        logger.warning(
            f"Following {len(vr_state_mismatch)} VR are not in expected {state} state: {vr_state_mismatch}"
        )
        return False
Example #28
0
def change_registry_backend_to_ocs():
    """
    Function to deploy registry with OCS backend.

    Raises:
        AssertionError: When failure in change of registry backend to OCS

    """
    sc_name = f"{constants.DEFAULT_STORAGECLASS_CEPHFS}"
    pv_obj = helpers.create_pvc(
        sc_name=sc_name, pvc_name='registry-cephfs-rwx-pvc',
        namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE, size='100Gi',
        access_mode=constants.ACCESS_MODE_RWX
    )
    helpers.wait_for_resource_state(pv_obj, 'Bound')
    ocp_obj = ocp.OCP(
        kind=constants.CONFIG, namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE
    )
    param_cmd = f'[{{"op": "add", "path": "/spec/storage", "value": {{"pvc": {{"claim": "{pv_obj.name}"}}}}}}]'
    assert ocp_obj.patch(
        resource_name=constants.IMAGE_REGISTRY_RESOURCE_NAME, params=param_cmd, format_type='json'
    ), f"Registry pod storage backend to OCS is not success"

    if(config.ENV_DATA['platform'] not in constants.CLOUD_PLATFORMS):
        run_cmd(
            f'oc patch {constants.IMAGE_REGISTRY_CONFIG} --type merge -p '
            f'\'{{"spec":{{"managementState": "Managed"}}}}\''
        )
        logger.info(
            "Waiting 30 seconds after change managementState of image-registry."
        )
        time.sleep(30)
    # Validate registry pod status
    validate_registry_pod_status()

    # Validate pvc mount in the registry pod
    validate_pvc_mount_on_registry_pod()
Example #29
0
def mcg_workload_job(fio_job_dict_mcg, fio_configmap_dict_mcg, fio_conf_mcg,
                     fio_project_mcg, tmp_path, request):
    """
    Creates kubernetes job that should utilize MCG during upgrade.

    Returns:
        object: Job object

    """
    fio_configmap_dict_mcg["data"]["workload.fio"] = fio_conf_mcg
    fio_objs = [fio_configmap_dict_mcg, fio_job_dict_mcg]

    job_name = fio_job_dict_mcg['metadata']['name']

    log.info(f"Creating job {job_name}")
    job_file = ObjectConfFile("fio_continuous", fio_objs, fio_project_mcg,
                              tmp_path)

    # deploy the Job to the cluster and start it
    job_file.create()
    log.info(f"Job {job_name} created")

    # get job object
    ocp_job_obj = ocp.OCP(kind=constants.JOB,
                          namespace=fio_project_mcg.namespace)
    job = OCS(**ocp_job_obj.get(resource_name=job_name))

    def teardown():
        """
        Delete mcg job
        """
        job.delete()
        job.ocp.wait_for_delete(job.name)

    request.addfinalizer(teardown)

    return job
Example #30
0
    def __init__(self):
        with open(constants.CSI_CEPHFS_POD_YAML, "r") as pod_fd:
            pod_info = yaml.safe_load(pod_fd)
        pvc_name = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"][
            "claimName"]
        # Make sure the pvc and pod names are unique, so AlreadyExists
        # exceptions are not thrown.
        pvc_name += str(uuid.uuid4())
        self.pod_name = pod_info["metadata"]["name"] + str(uuid.uuid4())
        config.RUN["cli_params"]["teardown"] = True
        self.cephfs_pvc = helpers.create_pvc(
            sc_name=constants.DEFAULT_STORAGECLASS_CEPHFS,
            namespace=config.ENV_DATA["cluster_namespace"],
            pvc_name=pvc_name,
            size=SIZE,
        )
        helpers.wait_for_resource_state(self.cephfs_pvc,
                                        constants.STATUS_BOUND,
                                        timeout=1200)
        self.cephfs_pod = helpers.create_pod(
            interface_type=constants.CEPHFILESYSTEM,
            namespace=config.ENV_DATA["cluster_namespace"],
            pvc_name=pvc_name,
            pod_name=self.pod_name,
        )
        helpers.wait_for_resource_state(self.cephfs_pod,
                                        constants.STATUS_RUNNING,
                                        timeout=300)
        logging.info("pvc and cephfs pod created")
        self.ocp_obj = ocp.OCP(
            kind=constants.POD,
            namespace=config.ENV_DATA["cluster_namespace"],
        )

        self.test_file_list = add_million_files(self.pod_name, self.ocp_obj)
        logging.info("cephfs test files created")