def label_nodes(request):
    """
    Fixture to label the node(s) that will run the application pod.
    That will be all workers node that do not run the OCS cluster.
    """
    def teardown():
        log.info('Clear label form worker (Application) nodes')
        # Getting all Application nodes
        app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
        helpers.remove_label_from_worker_node(app_nodes,
                                              constants.APP_NODE_LABEL)

    request.addfinalizer(teardown)

    # Getting all OCS nodes (to verify app pod wil not run on)
    ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    # Add label to the worker nodes
    worker_nodes = helpers.get_worker_nodes()
    # Getting list of free nodes
    free_nodes = list(set(worker_nodes) - set(ocs_nodes))

    log.info('Adding the app-node label to Non-OCS workers')
    log.debug(f'The Workers nodes are : {worker_nodes}')
    log.debug(f'The OCS nodes are : {ocs_nodes}')
    log.debug(f'The free nodes are : {free_nodes}')

    assert free_nodes, \
        'Did not found any worker to run on, pleas deploy another worker'

    helpers.label_worker_node(free_nodes, constants.APP_NODE_LABEL,
                              constants.VDBENCH_NODE_LABEL)

    return
Ejemplo n.º 2
0
    def cleanup(self):
        """
        Function to tear down
        """
        # Delete all pods, pvcs and namespaces
        for namespace in self.namespace_list:
            delete_objs_parallel(
                obj_list=pod.get_all_pods(namespace=namespace.namespace),
                namespace=namespace.namespace,
                kind=self.kind,
            )
            delete_objs_parallel(
                obj_list=pvc.get_all_pvc_objs(namespace=namespace.namespace),
                namespace=namespace.namespace,
                kind=constants.PVC,
            )
            ocp = OCP(kind=constants.NAMESPACE)
            ocp.delete(resource_name=namespace.namespace)

        # Remove scale label from worker nodes in cleanup
        scale_workers = machine.get_labeled_nodes(constants.SCALE_LABEL)
        helpers.remove_label_from_worker_node(node_list=scale_workers,
                                              label_key="scale-label")

        # Delete machineset which will delete respective nodes too for aws-ipi platform
        if self.ms_name:
            for name in self.ms_name:
                machine.delete_custom_machineset(name)
Ejemplo n.º 3
0
def uninstall_lso(lso_sc):
    """
    Function uninstalls local-volume objects from OCS cluster

    """
    ocp_obj = ocp.OCP()

    sc_obj = (ocp.OCP(kind=constants.STORAGECLASS,
                      resource_name=lso_sc,
                      namespace=config.ENV_DATA['local_storage_namespace']))

    lv_name = sc_obj.get().get('metadata').get('labels').get(
        'local.storage.openshift.io/owner-name')
    lv_obj = (ocp.OCP(kind=constants.LOCAL_VOLUME,
                      resource_name=lv_name,
                      namespace=config.ENV_DATA['local_storage_namespace']))

    log.info(
        f"Local storage was found. using storage class: {lso_sc},  local volume:{lv_name}"
    )

    device_list = lv_obj.get().get('spec').get('storageClassDevices')[0].get(
        'devicePaths')
    storage_node_list = get_labeled_nodes(constants.OPERATOR_NODE_LABEL)

    pv_obj_list = (ocp.OCP(
        kind=constants.PV,
        selector=f'storage.openshift.com/local-volume-owner-name={lv_name}',
        namespace=config.ENV_DATA['local_storage_namespace']))

    log.info("Deleting local volume PVs")
    for pv in pv_obj_list.get().get('items'):
        log.info(f"deleting pv {pv.get('metadata').get('name')}")
        pv_obj_list.delete(resource_name=pv.get('metadata').get('name'))

    log.info("Removing local volume from storage nodes")
    for node in storage_node_list:
        log.info(f"Removing from node {node}")
        ocp_obj.exec_oc_debug_cmd(
            node=node, cmd_list=[f"rm -rfv /mnt/local-storage/{lso_sc}"])

    disk_list_str = ""
    for device in device_list:
        disk_list_str = disk_list_str + f" {device}"
    disk_list_str = f"DISKS=\"{disk_list_str}\""
    log.info(f"The disk list is {disk_list_str}")

    sgd_command = "for disk in $DISKS; do sgdisk --zap-all $disk;done"
    log.info("Wiping disks on storage nodes ")
    for node in storage_node_list:
        log.info(f"Wiping on node {node}")
        cmd_list = [disk_list_str, sgd_command]
        ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=cmd_list)

    log.info(f"Deleting storage class {lso_sc}")
    sc_obj.delete(resource_name=lso_sc)

    log.info(f"Deleting local volume {lv_name}")
    lv_obj.delete(resource_name=lv_name)
Ejemplo n.º 4
0
def delete_worker_node():
    # Remove scale label from worker nodes
    scale_workers = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if scale_workers:
        helpers.remove_label_from_worker_node(node_list=scale_workers,
                                              label_key="scale-label")
    # Delete machineset
    if ms_name:
        for name in ms_name:
            machine.delete_custom_machineset(name)
Ejemplo n.º 5
0
def is_node_labeled(node_name, label=constants.OPERATOR_NODE_LABEL):
    """
    Check if the node is labeled with a specified label.

    Args:
        node_name (str): The node name to check if it has the specific label
        label (str): The name of the label. Default value is the OCS label.

    Returns:
        bool: True if the node is labeled with the specified label. False otherwise

    """
    node_names_with_label = machine.get_labeled_nodes(label=label)
    return node_name in node_names_with_label
Ejemplo n.º 6
0
    def test_rolling_reboot_node(self, node_type):
        """
        Test to rolling reboot of nodes
        """

        # Get info from SCALE_DATA_FILE for validation
        if os.path.exists(SCALE_DATA_FILE):
            file_data = templating.load_yaml(SCALE_DATA_FILE)
            namespace = file_data.get("NAMESPACE")
            pod_scale_list = file_data.get("POD_SCALE_LIST")
            pvc_scale_list = file_data.get("PVC_SCALE_LIST")
        else:
            raise FileNotFoundError

        node_list = list()

        # Rolling reboot nodes
        if node_type == constants.WORKER_MACHINE:
            tmp_list = get_nodes(node_type=node_type)
            ocs_node_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
            for tmp in tmp_list:
                if tmp.name in ocs_node_list:
                    node_list.append(tmp)
        else:
            node_list = get_nodes(node_type=node_type)

        factory = platform_nodes.PlatformNodesFactory()
        nodes = factory.get_nodes_platform()

        for node in node_list:
            nodes.restart_nodes(nodes=[node])
            scale_lib.validate_node_and_oc_services_are_up_after_reboot()

        # Validate storage pods are running
        wait_for_storage_pods()

        # Validate cluster health ok and all pods are running
        assert utils.ceph_health_check(
            delay=180
        ), "Ceph health in bad state after node reboots"

        # Validate all PVCs from namespace are in Bound state
        assert scale_lib.validate_all_pvcs_and_check_state(
            namespace=namespace, pvc_scale_list=pvc_scale_list
        )

        # Validate all PODs from namespace are up and running
        assert scale_lib.validate_all_pods_and_check_state(
            namespace=namespace, pod_scale_list=pod_scale_list
        )
Ejemplo n.º 7
0
    def teardown():

        if with_ocs:
            return

        if m_set != '':
            log.info(f'Destroy {m_set}')
            machine.delete_custom_machineset(m_set)
        else:
            log.info('Clear label form worker (Application) nodes')
            # Getting all Application nodes
            app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
            log.debug(f'The application nodes are : {app_nodes}')
            helpers.remove_label_from_worker_node(app_nodes,
                                                  constants.VDBENCH_NODE_LABEL)
Ejemplo n.º 8
0
def get_ocs_nodes(num_of_nodes=None):
    """
    Gets the ocs nodes

    Args:
        num_of_nodes (int): The number of ocs nodes to return. If not specified,
            it returns all the ocs nodes.

    Returns:
        list: List of ocs nodes

    """
    ocs_node_names = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    ocs_nodes = get_node_objs(ocs_node_names)
    num_of_nodes = num_of_nodes or len(ocs_nodes)

    return ocs_nodes[:num_of_nodes]
Ejemplo n.º 9
0
def uninstall_lso(lso_sc):
    """
    Function uninstalls local-volume objects from OCS cluster

    """
    ocp_obj = ocp.OCP()
    sc_obj = ocp.OCP(
        kind=constants.STORAGECLASS,
        resource_name=lso_sc,
        namespace=config.ENV_DATA["local_storage_namespace"],
    )

    log.info("Deleting local volume set")
    lvs_obj = ocp.OCP(
        kind=constants.LOCAL_VOLUME_SET,
        namespace=config.ENV_DATA["local_storage_namespace"],
    )
    lvs_obj.delete(constants.LOCAL_VOLUME_SET_YAML)

    pv_obj_list = ocp.OCP(
        kind=constants.PV,
        namespace=config.ENV_DATA["local_storage_namespace"],
    )

    log.info("Deleting local volume PVs")
    for pv in pv_obj_list.get().get("items"):
        log.info(f"deleting pv {pv.get('metadata').get('name')}")
        pv_obj_list.delete(resource_name=pv.get("metadata").get("name"))

    log.info(f"Deleting storage class {lso_sc}")
    sc_obj.delete(resource_name=lso_sc)

    log.info("deleting local volume discovery")
    lvd_obj = ocp.OCP(
        kind=constants.LOCAL_VOLUME_DISCOVERY,
        namespace=config.ENV_DATA["local_storage_namespace"],
    )
    lvd_obj.delete(yaml_file=constants.LOCAL_VOLUME_DISCOVERY_YAML)

    log.info("Removing local volume from storage nodes")
    storage_node_list = get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    for node in storage_node_list:
        log.info(f"Removing from node {node}")
        ocp_obj.exec_oc_debug_cmd(
            node=node, cmd_list=[f"rm -rfv /mnt/local-storage/{lso_sc}"]
        )
Ejemplo n.º 10
0
def check_and_add_enough_worker(worker_count):
    """
    Function to check if there is enough workers available to scale pods.
    IF there is no enough worker then worker will be added based on supported platforms
    Function also adds scale label to the respective worker nodes.

    Args:
        worker_count (int): Expected worker count to be present in the setup

    Returns:
        book: True is there is enough worker count else raise exception.

    """
    # Check either to use OCS workers for scaling app pods
    # Further continue to label the worker with scale label else not
    worker_list = node.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get("use_ocs_worker_for_scale"):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    # Check if there is enough nodes to continue scaling of app pods
    if len(scale_worker_list) >= worker_count:
        logging.info(f"Setup has expected worker count {worker_count} "
                     "to continue scale of pods")
        return True
    else:
        logging.info(
            "There is no enough worker in the setup, will add enough worker "
            "for the automation supported platforms")
        # Add enough worker for AWS
        if (config.ENV_DATA["deployment_type"] == "ipi"
                and config.ENV_DATA["platform"].lower() == "aws"):
            # Create machineset for app worker nodes on each aws zone
            # Each zone will have one app worker node
            ms_name = list()
            labels = [("node-role.kubernetes.io/app", "app-scale")]
            for obj in machine.get_machineset_objs():
                if "app" in obj.name:
                    ms_name.append(obj.name)
            if not ms_name:
                if len(machine.get_machineset_objs()) == 3:
                    for zone in ["a", "b", "c"]:
                        ms_name.append(
                            machine.create_custom_machineset(
                                instance_type="m5.4xlarge",
                                labels=labels,
                                zone=zone,
                            ))
                else:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type="m5.4xlarge",
                            labels=labels,
                            zone="a",
                        ))
                for ms in ms_name:
                    machine.wait_for_new_node_to_be_ready(ms)
            if len(ms_name) == 3:
                exp_count = int(worker_count / 3)
            else:
                exp_count = worker_count
            for name in ms_name:
                machine.add_node(machine_set=name, count=exp_count)
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)
            worker_list = node.get_worker_nodes()
            ocs_worker_list = machine.get_labeled_nodes(
                constants.OPERATOR_NODE_LABEL)
            scale_label_worker = machine.get_labeled_nodes(
                constants.SCALE_LABEL)
            ocs_worker_list.extend(scale_label_worker)
            final_list = list(dict.fromkeys(ocs_worker_list))
            for node_item in final_list:
                if node_item in worker_list:
                    worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
            return True
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "vsphere"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "baremetal"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "azure"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        else:
            raise UnavailableResourceException(
                "There is no enough worker nodes to continue app pod scaling")
Ejemplo n.º 11
0
def uninstall_ocs():
    """
    The function uninstalls the OCS operator from a openshift
    cluster and removes all its settings and dependencies

    """
    ocp_obj = ocp.OCP()
    provisioners = constants.OCS_PROVISIONERS

    # List the storage classes
    sc_list = get_all_storageclass()
    sc_name_list = []
    for storage_class in sc_list:
        if storage_class.get('provisioner') not in provisioners:
            sc_list.remove(storage_class)
        else:
            sc_name_list.append(storage_class.get('metadata').get('name'))

    # Query for PVCs and OBCs that are using the storage class provisioners listed in the previous step.
    pvc_to_delete = []
    pvc_name_list = []
    for sc in sc_name_list:
        pvc_to_delete.extend(get_all_pvcs_in_storageclass(sc))

    # ignoring all noobaa pvcs & make name list
    for pvc in pvc_to_delete:
        if "noobaa" in pvc.name:
            pvc_to_delete.remove(pvc)
        else:
            pvc_name_list.append(pvc.name)

    pods_to_delete = []
    all_pods = get_all_pods()  # default openshift-storage namespace
    all_pods.extend(get_all_pods(namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE))
    all_pods.extend(get_all_pods(namespace=constants.OPENSHIFT_MONITORING_NAMESPACE))

    for pod_obj in all_pods:
        try:
            pvc_name = get_pvc_name(pod_obj)
        except UnavailableResourceException:
            continue
        if pvc_name in pvc_name_list:
            pods_to_delete.append(pod_obj)

    log.info("Removing monitoring stack from OpenShift Container Storage")
    remove_monitoring_stack_from_ocs()

    log.info("Removing OpenShift Container Platform registry from OpenShift Container Storage")
    remove_ocp_registry_from_ocs(config.ENV_DATA['platform'])

    log.info("Removing the cluster logging operator from OpenShift Container Storage")
    csv = ocp.OCP(
        kind=constants.CLUSTER_SERVICE_VERSION,
        namespace=constants.OPENSHIFT_LOGGING_NAMESPACE
    )
    logging_csv = csv.get().get('items')
    if logging_csv:
        clusterlogging_obj = ocp.OCP(
            kind=constants.CLUSTER_LOGGING, namespace=constants.OPENSHIFT_LOGGING_NAMESPACE
        )
        clusterlogging_obj.delete(resource_name='instance')

    log.info("deleting pvcs")
    for pvc in pvc_to_delete:
        log.info(f"deleting pvc: {pvc.name}")
        pvc.delete()

    log.info("deleting pods")
    for pod in pods_to_delete:
        log.info(f"deleting pod {pod.name}")
        pod.delete()

    log.info("removing rook directory from nodes")
    nodes_list = get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    for node in nodes_list:
        log.info(f"removing rook from {node}")
        ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=["rm -rf /var/lib/rook"])

    log.info("Delete the storage classes with an openshift-storage provisioner list")
    for storage_class in sc_list:
        log.info(f"deleting storage class {storage_class.get('metadata').get('name')}")
        sc_obj = ocp.OCP(kind=constants.STORAGECLASS)
        sc_obj.delete(resource_name=storage_class.get('metadata').get('name'))

    log.info("unlabaling storage nodes")
    nodes_list = get_all_nodes()
    for node in nodes_list:
        node_obj = ocp.OCP(kind=constants.NODE, resource_name=node)
        node_obj.add_label(resource_name=node, label=constants.OPERATOR_NODE_LABEL[:-3] + '-')
        node_obj.add_label(resource_name=node, label=constants.TOPOLOGY_ROOK_LABEL + '-')

    log.info("deleting storageCluster object")
    storage_cluster = ocp.OCP(kind=constants.STORAGECLUSTER, resource_name=constants.DEFAULT_CLUSTERNAME)
    storage_cluster.delete(resource_name=constants.DEFAULT_CLUSTERNAME)

    log.info("removing CRDs")
    crd_list = ['backingstores.noobaa.io', 'bucketclasses.noobaa.io', 'cephblockpools.ceph.rook.io',
                'cephfilesystems.ceph.rook.io', 'cephnfses.ceph.rook.io',
                'cephobjectstores.ceph.rook.io', 'cephobjectstoreusers.ceph.rook.io', 'noobaas.noobaa.io',
                'ocsinitializations.ocs.openshift.io', 'storageclusterinitializations.ocs.openshift.io',
                'storageclusters.ocs.openshift.io', 'cephclusters.ceph.rook.io']
    for crd in crd_list:
        ocp_obj.exec_oc_cmd(f"delete crd {crd} --timeout=300m")

    log.info("deleting openshift-storage namespace")
    ocp_obj.delete_project('openshift-storage')
    ocp_obj.wait_for_delete('openshift-storage')
Ejemplo n.º 12
0
def label_nodes(request, with_ocs):
    """
    Fixture to label the node(s) that will run the application pod.
    That will be all workers node that do not run the OCS cluster.
    """

    m_set = ''  # this will hold machine_set name that added

    def teardown():

        ceph_health_check()

        if with_ocs:
            return

        if m_set != '':
            log.info(f'Destroy {m_set}')
            machine.delete_custom_machineset(m_set)
        else:
            log.info('Clear label form worker (Application) nodes')
            # Getting all Application nodes
            app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
            log.debug(f'The application nodes are : {app_nodes}')
            helpers.remove_label_from_worker_node(app_nodes,
                                                  constants.VDBENCH_NODE_LABEL)

    request.addfinalizer(teardown)

    if with_ocs:
        return

    # Add label to the worker nodes

    # Getting all OCS nodes (to verify app pod wil not run on)
    ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    worker_nodes = helpers.get_worker_nodes()
    # Getting list of free nodes
    free_nodes = list(set(worker_nodes) - set(ocs_nodes))

    if not free_nodes:
        # No free nodes -  Creating new machineset for application pods
        log.info('Adding new machineset, with worker for application pod')
        m_set = machine.create_custom_machineset(
            label=constants.APP_NODE_LABEL)
        machine.wait_for_new_node_to_be_ready(m_set)

        free_nodes = machine.get_labeled_nodes(
            f'node-role.kubernetes.io/app={constants.APP_NODE_LABEL}')

        # TODO: implement this for VMWare as well.

    log.info('Adding the app-node label to Non-OCS workers')
    log.debug(f'The Workers nodes are : {worker_nodes}')
    log.debug(f'The OCS nodes are : {ocs_nodes}')
    log.debug(f'The free nodes are : {free_nodes}')

    assert free_nodes, \
        'Did not found any worker to run on, pleas deploy another worker'

    helpers.label_worker_node(free_nodes, constants.APP_NODE_LABEL,
                              constants.VDBENCH_NODE_LABEL)

    return
Ejemplo n.º 13
0
    def test_vdbench_workload(self, template, with_ocs, load, label_nodes,
                              ripsaw, servers, threads, blocksize, fileio,
                              samples, width, depth, files, file_size, runtime,
                              pause):
        """
        Run VDBench Workload

        Args :
            template (str) : Name of yaml file that will used as a template
            with_ocs (bool) : This parameter will indicate if the test will
                              run on the same nodes as the OCS
            load (int) : load to run on the storage in percentage of the capacity.
            label_nodes (fixture) : This fixture is labeling the worker(s)
                                    that will used for App. pod(s)
            ripsaw (fixture) : Fixture to deploy the ripsaw benchmarking operator
            servers (int) : Number of servers (pods) that will run the IO
            threads (int) : Number of threads that will run on each server
            blocksize (list - str): List of BlockSize - must add the 'K' to it
            fileio (str) : How to select file for the IO : random / sequential
            samples (int) : Number of time(s) to run each test
            width (int) : Width of directory tree to create
            depth (int) : Depth of directory tree to create
            files (int) : Number of files to create in each directory
            file_size (int) : File size (in MB) to create
            runtime (int) : Time (in Sec.) for each test iteration
            pause (int) : Time (in Min.) to pause between each test iteration.
        """
        log.info(f'going to use {template} as template')
        log.info("Apply Operator CRD")

        crd = 'resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml'
        ripsaw.apply_crd(crd)

        log.info('Running vdbench benchmark')
        if template:
            template = os.path.join(constants.TEMPLATE_VDBENCH_DIR, template)
        else:
            template = constants.VDBENCH_BENCHMARK_YAML
        sf_data = templating.load_yaml(template)

        target_results = template + 'Results'

        log.info('Calculating Storage size....')
        ceph_cluster = CephCluster()
        total_capacity = ceph_cluster.get_ceph_capacity()
        assert total_capacity > constants.VDBENCH_MIN_CAPACITY, (
            "Storage capacity is too low for performance testing")
        log.info(f'The Total usable capacity is {total_capacity}')

        if load:
            width = constants.VDBENCH_WIDTH
            depth = constants.VDBENCH_DEPTH
            file_size = constants.VDBENCH_FILE_SIZE
            capacity_per_pod = constants.VDBENCH_CAP_PER_POD
            total_dirs = width**depth
            log.info(f'The total dirs in the tree {total_dirs}')
            log.info(f'Going to run with {load} % of the capacity load.')
            tested_capacity = round(total_capacity * 1024 * load / 100)
            log.info(f'Tested capacity is {tested_capacity} MB')
            servers = round(tested_capacity / capacity_per_pod)
            """
            To spread the application pods evenly on all workers or application nodes and at least 2 app pods
            per node.
            """
            nodes = len(
                node.get_typed_nodes(node_type=constants.WORKER_MACHINE))
            if not with_ocs:
                nodes = len(
                    machine.get_labeled_nodes(
                        f'node-role.kubernetes.io/app={constants.APP_NODE_LABEL}'
                    ))
            log.info(f'Going to use {nodes} nodes for the test !')
            servers = round(servers / nodes) * nodes
            if servers < (nodes * 2):
                servers = nodes * 2

            files = round(tested_capacity / servers / total_dirs)
            total_files = round(files * servers * total_dirs)
            log.info(f'number of pods is {servers}')
            log.info(f'Going to create {total_files} files !')
            log.info(f'number of files in dir is {files}')
        """
            Setting up the parameters for this test
        """
        if servers:
            sf_data['spec']['workload']['args']['servers'] = servers
            target_results = target_results + '-' + str(servers)
        if threads:
            sf_data['spec']['workload']['args']['threads'] = threads
            target_results = target_results + '-' + str(threads)
        if fileio:
            sf_data['spec']['workload']['args']['fileio'] = fileio
            target_results = target_results + '-' + str(fileio)
        if samples:
            sf_data['spec']['workload']['args']['samples'] = samples
            target_results = target_results + '-' + str(samples)
        if width:
            sf_data['spec']['workload']['args']['width'] = width
            target_results = target_results + '-' + str(width)
        if depth:
            sf_data['spec']['workload']['args']['depth'] = depth
            target_results = target_results + '-' + str(depth)
        if files:
            sf_data['spec']['workload']['args']['files'] = files
            target_results = target_results + '-' + str(files)
        if file_size:
            sf_data['spec']['workload']['args']['file_size'] = file_size
            target_results = target_results + '-' + str(file_size)
        if runtime:
            sf_data['spec']['workload']['args']['runtime'] = runtime
            target_results = target_results + '-' + str(runtime)
        if pause:
            sf_data['spec']['workload']['args']['pause'] = pause
            target_results = target_results + '-' + str(pause)
        if len(blocksize) > 0:
            sf_data['spec']['workload']['args']['bs'] = blocksize
            target_results = target_results + '-' + '_'.join(blocksize)
        if with_ocs:
            if sf_data['spec']['workload']['args']['pin_server']:
                del sf_data['spec']['workload']['args']['pin_server']
        """
            Calculating the size of the volume that need to be test, it should
            be at least twice in the size then the size of the files, and at
            least 100Gi.
            since the file_size is in Kb and the vol_size need to be in Gb,
            more calculation is needed.
        """
        vol_size = int((files * total_dirs) * file_size * 1.3)
        log.info('number of files to create : {}'.format(
            int(files * (width**depth))))
        log.info(f'The size of all files is : {vol_size}MB')
        vol_size = int(vol_size / 1024)
        if vol_size < 100:
            vol_size = 100
        sf_data['spec']['workload']['args']['storagesize'] = f'{vol_size}Gi'

        log.debug(f'output of configuration file is {sf_data}')

        timeout = 86400  # 3600 (1H) * 24 (1D)  = one days

        sf_obj = OCS(**sf_data)
        sf_obj.create()
        # wait for benchmark pods to get created - takes a while
        for bench_pod in TimeoutSampler(300, 10, get_pod_name_by_pattern,
                                        'vdbench-client', 'my-ripsaw'):
            try:
                if bench_pod[0] is not None:
                    vdbench_client_pod = bench_pod[0]
                    break
            except IndexError:
                log.info('Benchmark client pod not ready yet')

        bench_pod = OCP(kind='pod', namespace='my-ripsaw')
        log.info('Waiting for VDBench benchmark to Run')
        assert bench_pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                           resource_name=vdbench_client_pod,
                                           sleep=30,
                                           timeout=600)
        start_time = time.time()
        while True:
            logs = bench_pod.exec_oc_cmd(f'logs {vdbench_client_pod}',
                                         out_yaml_format=False)
            if 'Test Run Finished' in logs:
                log.info('VdBench Benchmark Completed Successfully')
                break

            if timeout < (time.time() - start_time):
                raise TimeoutError(
                    'Timed out waiting for benchmark to complete')
            time.sleep(30)

        # Getting the results file from the benchmark pod and put it with the
        # test logs.
        # TODO: find the place of the actual test log and not in the parent
        #       logs path
        target_results = '{}/{}.tgz'.format(ocsci_log_path(), target_results)
        pod_results = constants.VDBENCH_RESULTS_FILE
        retrive_files_from_pod(vdbench_client_pod, target_results, pod_results)
Ejemplo n.º 14
0
    def identify_and_add_nodes(self, scenario, num_of_nodes):
        """
        Fetches info about the worker nodes and add nodes (if required)

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test

        Returns:
            tuple: tuple containing:
                list: list of OCS nodes name
                list: list of non-OCS nodes name

        """
        nodes_to_add = 0
        initial_worker_nodes = node.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes))

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(initial_worker_nodes)

        if "dedicated" in scenario and len(non_ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(non_ocs_nodes)

        if nodes_to_add > 0:
            logger.info(f"{nodes_to_add} extra workers nodes needed")

            if config.ENV_DATA["deployment_type"] == "ipi":
                machine_name = random.choice(
                    machine.get_machines(
                        machine_type=constants.WORKER_MACHINE)).name
                machineset_name = machine.get_machineset_from_machine_name(
                    machine_name)
                node.add_new_node_and_label_it(
                    machineset_name=machineset_name,
                    num_nodes=nodes_to_add,
                    mark_for_ocs_label=False,
                )
            else:
                is_rhel = config.ENV_DATA.get(
                    "rhel_workers") or config.ENV_DATA.get("rhel_user")
                node_type = constants.RHEL_OS if is_rhel else constants.RHCOS
                node.add_new_node_and_label_upi(
                    node_type=node_type,
                    num_nodes=nodes_to_add,
                    mark_for_ocs_label=False,
                )

            new_worker_nodes = node.get_worker_nodes()
            new_nodes_added = list(
                set(new_worker_nodes) - set(initial_worker_nodes))
            assert (len(new_nodes_added) == nodes_to_add
                    ), "Extra nodes not added in the cluster"
            non_ocs_nodes += new_nodes_added

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            logger.info("Adding OCS storage label to Non-OCS workers")
            node_obj = ocp.OCP(kind=constants.NODE)
            nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))]
            for node_name in nodes_to_label:
                node_obj.add_label(resource_name=node_name,
                                   label=constants.OPERATOR_NODE_LABEL)
                ocs_nodes.append(node_name)
            non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes))

        logger.info(f"The OCS nodes are : {ocs_nodes}")
        logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}")
        return ocs_nodes, non_ocs_nodes
    def identify_and_add_nodes(self, scenario, num_of_nodes):
        """
        Fetches info about the worker nodes and add nodes (if required)

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test

        Returns:
            tuple: tuple containing:
                list: list of OCS nodes name
                list: list of non-OCS nodes name

        """
        nodes_to_add = 0
        initial_worker_nodes = node.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes))

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(initial_worker_nodes)

        if "dedicated" in scenario and len(non_ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(non_ocs_nodes)

        if nodes_to_add > 0:
            logger.info(f"{nodes_to_add} extra workers nodes needed")

            if config.ENV_DATA["deployment_type"] == "ipi":
                machine_name = machine.get_machine_from_node_name(
                    random.choice(initial_worker_nodes))
                machineset_name = machine.get_machineset_from_machine_name(
                    machine_name)
                machineset_replica_count = machine.get_replica_count(
                    machineset_name)
                machine.add_node(machineset_name,
                                 count=machineset_replica_count + nodes_to_add)
                logger.info("Waiting for the new node(s) to be in ready state")
                machine.wait_for_new_node_to_be_ready(machineset_name)
            else:
                if (config.ENV_DATA.get("platform").lower() ==
                        constants.VSPHERE_PLATFORM):
                    pytest.skip(
                        "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521"
                    )
                is_rhel = config.ENV_DATA.get(
                    "rhel_workers") or config.ENV_DATA.get("rhel_user")
                node_type = constants.RHEL_OS if is_rhel else constants.RHCOS
                node.add_new_node_and_label_upi(
                    node_type=node_type,
                    num_nodes=nodes_to_add,
                    mark_for_ocs_label=False,
                )

            new_worker_nodes = node.get_worker_nodes()
            new_nodes_added = list(
                set(new_worker_nodes) - set(initial_worker_nodes))
            assert (len(new_nodes_added) == nodes_to_add
                    ), "Extra nodes not added in the cluster"
            non_ocs_nodes += new_nodes_added

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            logger.info("Adding OCS storage label to Non-OCS workers")
            node_obj = ocp.OCP(kind=constants.NODE)
            nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))]
            for node_name in nodes_to_label:
                node_obj.add_label(resource_name=node_name,
                                   label=constants.OPERATOR_NODE_LABEL)
                ocs_nodes.append(node_name)
            non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes))

        logger.info(f"The OCS nodes are : {ocs_nodes}")
        logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}")
        return ocs_nodes, non_ocs_nodes
Ejemplo n.º 16
0
def uninstall_ocs():
    """
    The function uninstalls the OCS operator from a openshift
    cluster and removes all its settings and dependencies

    """
    ocp_obj = ocp.OCP()
    provisioners = constants.OCS_PROVISIONERS

    # List the storage classes
    sc_list = [
        sc for sc in get_all_storageclass()
        if sc.get('provisioner') in provisioners
    ]

    # Query for PVCs and OBCs that are using the storage class provisioners listed in the previous step.
    pvc_to_delete = []
    for sc in sc_list:
        pvc_to_delete.extend(pvc for pvc in get_all_pvcs_in_storageclass(
            sc.get('metadata').get('name')) if 'noobaa' not in pvc.name)

    log.info("Removing monitoring stack from OpenShift Container Storage")
    remove_monitoring_stack_from_ocs()

    log.info(
        "Removing OpenShift Container Platform registry from OpenShift Container Storage"
    )
    remove_ocp_registry_from_ocs(config.ENV_DATA['platform'])

    log.info(
        "Removing the cluster logging operator from OpenShift Container Storage"
    )
    try:
        remove_cluster_logging_operator_from_ocs()
    except CommandFailed:
        log.info("No cluster logging found")

    log.info("Deleting pvcs")
    for pvc in pvc_to_delete:
        log.info(f"Deleting pvc: {pvc.name}")
        pvc.delete()

    storage_cluster = ocp.OCP(kind=constants.STORAGECLUSTER,
                              resource_name=constants.DEFAULT_CLUSTERNAME,
                              namespace='openshift-storage')

    log.info("Checking for local storage")
    lso_sc = None
    if check_local_volume():
        "Local volume was found. Will be removed later"
        lso_sc = storage_cluster.get().get('spec').get('storageDeviceSets')[
            0].get('dataPVCTemplate').get('spec').get('storageClassName')

    log.info("Deleting storageCluster object")
    storage_cluster.delete(resource_name=constants.DEFAULT_CLUSTERNAME)

    log.info("Removing CRDs")
    crd_list = [
        'backingstores.noobaa.io', 'bucketclasses.noobaa.io',
        'cephblockpools.ceph.rook.io', 'cephfilesystems.ceph.rook.io',
        'cephnfses.ceph.rook.io', 'cephobjectstores.ceph.rook.io',
        'cephobjectstoreusers.ceph.rook.io', 'noobaas.noobaa.io',
        'ocsinitializations.ocs.openshift.io',
        'storageclusterinitializations.ocs.openshift.io',
        'storageclusters.ocs.openshift.io', 'cephclusters.ceph.rook.io'
    ]
    for crd in crd_list:
        ocp_obj.exec_oc_cmd(f"delete crd {crd} --timeout=300m")

    log.info("Deleting openshift-storage namespace")
    ocp_obj.delete_project('openshift-storage')
    ocp_obj.wait_for_delete('openshift-storage')
    switch_to_project("default")

    log.info("Removing rook directory from nodes")
    nodes_list = get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    for node in nodes_list:
        log.info(f"Removing rook from {node}")
        ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=["rm -rf /var/lib/rook"])

    log.info("Removing LSO ")
    if lso_sc is not None:
        uninstall_lso(lso_sc)

    log.info(
        "Delete the storage classes with an openshift-storage provisioner list"
    )
    for storage_class in sc_list:
        log.info(
            f"Deleting storage class {storage_class.get('metadata').get('name')}"
        )
        sc_obj = ocp.OCP(kind=constants.STORAGECLASS)
        sc_obj.delete(resource_name=storage_class.get('metadata').get('name'))

    log.info("Unlabeling storage nodes")
    nodes_list = get_all_nodes()
    for node in nodes_list:
        node_obj = ocp.OCP(kind=constants.NODE, resource_name=node)
        node_obj.add_label(resource_name=node,
                           label=constants.OPERATOR_NODE_LABEL[:-3] + '-')
        node_obj.add_label(resource_name=node,
                           label=constants.TOPOLOGY_ROOK_LABEL + '-')

    log.info("OCS was removed successfully from cluster ")
Ejemplo n.º 17
0
def add_worker_node(instance_type=None):
    global ms_name
    ms_name = list()
    worker_list = node.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get("use_ocs_worker_for_scale"):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    if (config.ENV_DATA["deployment_type"] == "ipi"
            and config.ENV_DATA["platform"].lower() == "aws"):
        log.info("Adding worker nodes on the current cluster")
        labels = [("node-role.kubernetes.io/app", "app-scale")]
        # Create machineset for app worker nodes on each zone
        for obj in machine.get_machineset_objs():
            if "app" in obj.name:
                ms_name.append(obj.name)
        if instance_type is not None:
            instance_type = instance_type
        else:
            instance_type = "m5.4xlarge"
        if not ms_name:
            if len(machine.get_machineset_objs()) == 3:
                for zone in ["a", "b", "c"]:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type=instance_type,
                            labels=labels,
                            zone=zone,
                        ))
            else:
                ms_name.append(
                    machine.create_custom_machineset(
                        instance_type=instance_type,
                        labels=labels,
                        zone="a",
                    ))
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)

        worker_list = node.get_worker_nodes()
        ocs_worker_list = machine.get_labeled_nodes(
            constants.OPERATOR_NODE_LABEL)
        scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
        ocs_worker_list.extend(scale_label_worker)
        final_list = list(dict.fromkeys(ocs_worker_list))
        for node_item in final_list:
            if node_item in worker_list:
                worker_list.remove(node_item)
        if worker_list:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
        return True
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "vsphere"):
        log.info("Running scale test on existing worker nodes.")
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "baremetal"):
        log.info("Running scale test on existing worker nodes.")
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "azure"):
        raise UnsupportedPlatformError("Unsupported Platform")
Ejemplo n.º 18
0
def add_worker_node(instance_type=None):
    global ms_name
    ms_name = list()
    worker_list = helpers.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get('use_ocs_worker_for_scale'):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(node_list=worker_list,
                                          label_key='scale-label',
                                          label_value='app-scale')
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    if config.ENV_DATA['deployment_type'] == 'ipi' and config.ENV_DATA[
            'platform'].lower() == 'aws':
        log.info("Adding worker nodes on the current cluster")
        # Create machineset for app worker nodes on each zone
        for obj in machine.get_machineset_objs():
            if 'app' in obj.name:
                ms_name.append(obj.name)
        if instance_type is not None:
            instance_type = instance_type
        else:
            instance_type = 'm5.4xlarge'
        if not ms_name:
            if len(machine.get_machineset_objs()) == 3:
                for zone in ['a', 'b', 'c']:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type=instance_type, zone=zone))
            else:
                ms_name.append(
                    machine.create_custom_machineset(
                        instance_type=instance_type, zone='a'))
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)

        worker_list = helpers.get_worker_nodes()
        ocs_worker_list = machine.get_labeled_nodes(
            constants.OPERATOR_NODE_LABEL)
        scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
        ocs_worker_list.extend(scale_label_worker)
        final_list = list(dict.fromkeys(ocs_worker_list))
        for node_item in final_list:
            if node_item in worker_list:
                worker_list.remove(node_item)
        if worker_list:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
        return True
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'vsphere':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'baremetal':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'azure':
        raise UnsupportedPlatformError("Unsupported Platform")
    def setup(
        self,
        request,
        scenario,
        nodes,
        multi_pvc_factory,
        service_account_factory,
        dc_pod_factory,
    ):
        """
        Identify the nodes and start multiple dc pods for the test

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            nodes: A fixture to get instance of the relevant platform nodes class
            multi_pvc_factory: A fixture create a set of new PVCs
            service_account_factory: A fixture to create a service account
            dc_pod_factory: A fixture to create dc pod

        Returns:
            list: dc pod objs

        """
        worker_nodes = node.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(worker_nodes) - set(ocs_nodes))

        def finalizer():
            helpers.remove_label_from_worker_node(node_list=worker_nodes,
                                                  label_key="nodetype")

            # Check ceph health
            ceph_health_check(tries=80)

        request.addfinalizer(finalizer)

        if (scenario == "dedicated") and len(non_ocs_nodes) == 0:
            if config.ENV_DATA.get("deployment_type").lower() == "ipi":
                machines = machine.get_machinesets()
                node.add_new_node_and_label_it(machines[0],
                                               num_nodes=1,
                                               mark_for_ocs_label=False)
            else:
                if (config.ENV_DATA.get("platform").lower() ==
                        constants.VSPHERE_PLATFORM):
                    pytest.skip(
                        "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521"
                    )
                is_rhel = config.ENV_DATA.get(
                    "rhel_workers") or config.ENV_DATA.get("rhel_user")
                node_type = constants.RHEL_OS if is_rhel else constants.RHCOS
                node.add_new_node_and_label_upi(node_type=node_type,
                                                num_nodes=1,
                                                mark_for_ocs_label=False)
            non_ocs_nodes = list(set(node.get_worker_nodes()) - set(ocs_nodes))

        app_pod_nodes = ocs_nodes if (scenario
                                      == "colocated") else non_ocs_nodes

        # Label nodes to be able to run app pods
        helpers.label_worker_node(node_list=app_pod_nodes,
                                  label_key="nodetype",
                                  label_value="app-pod")

        access_modes_rbd = [
            constants.ACCESS_MODE_RWO,
            f"{constants.ACCESS_MODE_RWX}-Block",
        ]

        access_modes_cephfs = [
            constants.ACCESS_MODE_RWO, constants.ACCESS_MODE_RWX
        ]

        pvcs_rbd = multi_pvc_factory(
            interface=constants.CEPHBLOCKPOOL,
            size=self.pvc_size,
            access_modes=access_modes_rbd,
            status=constants.STATUS_BOUND,
            num_of_pvc=len(access_modes_rbd),
        )

        project = pvcs_rbd[0].project

        pvcs_cephfs = multi_pvc_factory(
            interface=constants.CEPHFILESYSTEM,
            project=project,
            size=self.pvc_size,
            access_modes=access_modes_cephfs,
            status=constants.STATUS_BOUND,
            num_of_pvc=len(access_modes_cephfs),
        )

        pvcs = pvcs_cephfs + pvcs_rbd
        # Set volume mode on PVC objects
        for pvc_obj in pvcs:
            pvc_info = pvc_obj.get()
            setattr(pvc_obj, "volume_mode", pvc_info["spec"]["volumeMode"])

        sa_obj = service_account_factory(project=project)
        pods = []

        # Create pods
        for pvc_obj in pvcs:
            if constants.CEPHFS_INTERFACE in pvc_obj.storageclass.name:
                interface = constants.CEPHFILESYSTEM
            else:
                interface = constants.CEPHBLOCKPOOL

            num_pods = 2 if pvc_obj.access_mode == constants.ACCESS_MODE_RWX else 1
            logger.info("Creating app pods")
            for _ in range(num_pods):
                pods.append(
                    dc_pod_factory(
                        interface=interface,
                        pvc=pvc_obj,
                        node_selector={"nodetype": "app-pod"},
                        raw_block_pv=pvc_obj.volume_mode == "Block",
                        sa_obj=sa_obj,
                    ))

        logger.info(
            f"Created {len(pods)} pods using {len(pvcs_cephfs)} cephfs, {len(pvcs_rbd)} rbd PVCs."
        )

        return pods
 def teardown():
     log.info('Clear label form worker (Application) nodes')
     # Getting all Application nodes
     app_nodes = machine.get_labeled_nodes(constants.APP_NODE_LABEL)
     helpers.remove_label_from_worker_node(app_nodes,
                                           constants.APP_NODE_LABEL)
Ejemplo n.º 21
0
    def identify_and_add_nodes(self, scenario, num_of_nodes):
        """
        Fetches info about the worker nodes and add nodes (if required)

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test

        Returns:
            tuple: tuple containing:
                list: list of OCS nodes name
                list: list of non-OCS nodes name

        """
        nodes_to_add = 0
        initial_worker_nodes = helpers.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes))

        if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(initial_worker_nodes)

        if 'dedicated' in scenario and len(non_ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(non_ocs_nodes)

        if nodes_to_add > 0:
            logger.info(f"{nodes_to_add} extra workers nodes needed")
            if config.ENV_DATA['deployment_type'] == 'ipi':
                machine_name = machine.get_machine_from_node_name(
                    random.choice(initial_worker_nodes)
                )
                machineset_name = machine.get_machineset_from_machine_name(
                    machine_name
                )
                machineset_replica_count = machine.get_replica_count(
                    machineset_name
                )
                machine.add_node(
                    machineset_name,
                    count=machineset_replica_count + nodes_to_add
                )
                logger.info("Waiting for the new node(s) to be in ready state")
                machine.wait_for_new_node_to_be_ready(machineset_name)
            else:
                # TODO: Add required num of nodes instead of skipping
                # https://github.com/red-hat-storage/ocs-ci/issues/1291
                pytest.skip("Add node not implemented for UPI, github issue #1291")

            new_worker_nodes = helpers.get_worker_nodes()
            new_nodes_added = list(set(new_worker_nodes) - set(initial_worker_nodes))
            assert len(new_nodes_added) > 0, 'Extra nodes not added in the cluster'
            non_ocs_nodes += new_nodes_added

        if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes:
            logger.info('Adding OCS storage label to Non-OCS workers')
            node_obj = ocp.OCP(kind=constants.NODE)
            nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))]
            for node_name in nodes_to_label:
                node_obj.add_label(
                    resource_name=node_name, label=constants.OPERATOR_NODE_LABEL
                )
                ocs_nodes.append(node_name)
            non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes))

        logger.info(f"The OCS nodes are : {ocs_nodes}")
        logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}")
        return ocs_nodes, non_ocs_nodes