Exemple #1
0
def noobaa_running_node_restart(pod_name):
    """
    Function to restart node which has noobaa pod's running

    Args:
        pod_name (str): Name of noobaa pod

    """

    nb_pod_obj = pod.get_pod_obj(
        (get_pod_name_by_pattern(
            pattern=pod_name,
            namespace=constants.OPENSHIFT_STORAGE_NAMESPACE))[0],
        namespace=constants.OPENSHIFT_STORAGE_NAMESPACE,
    )
    nb_node_name = pod.get_pod_node(nb_pod_obj).name
    factory = platform_nodes.PlatformNodesFactory()
    nodes = factory.get_nodes_platform()
    nb_nodes = get_node_objs(node_names=nb_node_name)
    log.info(f"{pod_name} is running on {nb_node_name}")
    log.info(f"Restating node: {nb_node_name}....")
    nodes.restart_nodes_by_stop_and_start(nodes=nb_nodes, force=True)

    # Validate nodes are up and running
    wait_for_nodes_status()
    ceph_health_check(tries=30, delay=60)
    helpers.wait_for_resource_state(nb_pod_obj,
                                    constants.STATUS_RUNNING,
                                    timeout=180)
Exemple #2
0
    def test_scale_osds_reboot_nodes(self, interface, project_factory,
                                     multi_pvc_factory, dc_pod_factory):
        """
        Check storage utilization, if its less then runs IO,
        Scale osds from 3-6, check for rebalance and reboot workers
        """
        current_osd_count = count_cluster_osd()
        proj_obj = project_factory()
        if current_osd_count == 3:
            while not validate_osd_utilization(osd_used=50):
                # Create pvc
                pvc_objs = multi_pvc_factory(project=proj_obj,
                                             interface=interface,
                                             size=self.pvc_size,
                                             num_of_pvc=self.num_of_pvcs)

                dc_pod_objs = list()
                for pvc_obj in pvc_objs:
                    dc_pod_objs.append(dc_pod_factory(pvc=pvc_obj))

                wait_for_dc_app_pods_to_reach_running_state(dc_pod_objs)

                for pod_obj in dc_pod_objs:
                    pod_obj.run_io(storage_type='fs',
                                   size='3G',
                                   runtime='60',
                                   fio_filename=f'{pod_obj.name}_io')

        # Add capacity
        osd_size = storage_cluster.get_osd_size()
        count = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
        pod.wait_for_resource(timeout=300,
                              condition=constants.STATUS_RUNNING,
                              selector='app=rook-ceph-osd',
                              resource_count=count * 3)
        assert ceph_health_check(), "New OSDs failed to reach running state"

        cluster = CephCluster()

        # Get rebalance status
        rebalance_status = cluster.get_rebalance_status()
        logger.info(rebalance_status)
        if rebalance_status:
            time_taken = cluster.time_taken_to_complete_rebalance()
            logger.info(f"The time taken to complete rebalance {time_taken}")

        # Rolling reboot on worker nodes
        worker_nodes = get_typed_nodes(node_type='worker')

        factory = platform_nodes.PlatformNodesFactory()
        nodes = factory.get_nodes_platform()

        for node in worker_nodes:
            nodes.restart_nodes(nodes=[node])
            wait_for_nodes_status()

        assert ceph_health_check(
            delay=180), "Failed, Ceph health bad after nodes reboot"
Exemple #3
0
def nodes():
    """
    Return an instance of the relevant platform nodes class
    (e.g. AWSNodes, VMWareNodes) to be later used in the test
    for nodes related operations, like nodes restart,
    detach/attach volume, etc.

    """
    factory = platform_nodes.PlatformNodesFactory()
    nodes = factory.get_nodes_platform()
    return nodes
Exemple #4
0
    def test_rolling_reboot_node(self, node_type):
        """
        Test to rolling reboot of nodes
        """

        # Get info from SCALE_DATA_FILE for validation
        if os.path.exists(SCALE_DATA_FILE):
            file_data = templating.load_yaml(SCALE_DATA_FILE)
            namespace = file_data.get("NAMESPACE")
            pod_scale_list = file_data.get("POD_SCALE_LIST")
            pvc_scale_list = file_data.get("PVC_SCALE_LIST")
        else:
            raise FileNotFoundError

        node_list = list()

        # Rolling reboot nodes
        if node_type == constants.WORKER_MACHINE:
            tmp_list = get_nodes(node_type=node_type)
            ocs_node_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
            for tmp in tmp_list:
                if tmp.name in ocs_node_list:
                    node_list.append(tmp)
        else:
            node_list = get_nodes(node_type=node_type)

        factory = platform_nodes.PlatformNodesFactory()
        nodes = factory.get_nodes_platform()

        for node in node_list:
            nodes.restart_nodes(nodes=[node])
            scale_lib.validate_node_and_oc_services_are_up_after_reboot()

        # Validate storage pods are running
        wait_for_storage_pods()

        # Validate cluster health ok and all pods are running
        assert utils.ceph_health_check(
            delay=180
        ), "Ceph health in bad state after node reboots"

        # Validate all PVCs from namespace are in Bound state
        assert scale_lib.validate_all_pvcs_and_check_state(
            namespace=namespace, pvc_scale_list=pvc_scale_list
        )

        # Validate all PODs from namespace are up and running
        assert scale_lib.validate_all_pods_and_check_state(
            namespace=namespace, pod_scale_list=pod_scale_list
        )
Exemple #5
0
def cycle_nodes(cluster_path, action):
    """
    Start/Stop AWS nodes to save costs when not in use.

    Args:
        cluster_path(str): location of cluster path that has auth files
        action (str): action to perform either start or stop

    """
    node_obj_file = os.path.join(cluster_path, NODE_OBJ_FILE)
    nodes_file = os.path.join(cluster_path, NODE_FILE)
    instance_file = os.path.join(cluster_path, INSTANCE_FILE)
    if action == 'stop':
        ceph = CephCluster()
        ceph.set_noout()
        node_objs = get_node_objs()
        kls = platform_nodes.PlatformNodesFactory()
        nodes = kls.get_nodes_platform()
        with open(instance_file, "wb") as instance_file:
            log.info("Storing ocs instances objects")
            pickle.dump(nodes.get_ec2_instances(nodes=node_objs),
                        instance_file)
        with open(nodes_file, "wb") as node_file:
            log.info("Storing ocp nodes objects")
            pickle.dump(nodes, node_file)
        with open(node_obj_file, "wb") as node_obj_file:
            log.info("Stopping all nodes")
            pickle.dump(node_objs, node_obj_file)
            nodes.stop_nodes(nodes=node_objs)
    elif action == 'start':
        with open(instance_file, "rb") as instance_file:
            log.info("Reading instance objects")
            instances = pickle.load(instance_file)
        with open(nodes_file, "rb") as node_file:
            log.info("Reading ocp nodes object")
            nodes = pickle.load(node_file)
        with open(node_obj_file, "rb") as node_obj_file:
            log.info("Starting ocs nodes")
            node_objs = pickle.load(node_obj_file)
            nodes.start_nodes(instances=instances, nodes=node_objs)
            unset_noout()