예제 #1
0
def bring_bricks_offline(bricks_list, volname=None,
                         bring_bricks_offline_methods=None):
    """Bring the bricks specified in the bricks_list offline.

    Args:
        volname (str): Name of the volume
        bricks_list (list): List of bricks to bring them offline.

    Kwargs:
        bring_bricks_offline_methods (list): List of methods using which bricks
            will be brought offline. The method to bring a brick offline is
            randomly selected from the bring_bricks_offline_methods list.
            By default all bricks will be brought offline with
            'service_kill' method.

    Returns:
        bool : True on successfully bringing all bricks offline.
               False otherwise
    """
    if bring_bricks_offline_methods is None:
        bring_bricks_offline_methods = ['service_kill']

    elif bring_bricks_offline_methods = to_list(bring_bricks_offline_methods)

    bricks_list = to_list(bricks_list)
    _rc = True
    failed_to_bring_offline_list = []
    for brick in bricks_list:
        if bring_brick_offline_method == 'service_kill':
            brick_node, brick_path = brick.split(":")
            brick_path = brick_path.replace("/", "-")
            peer_id = get_peer_id(brick_node, brick_node)
            kill_cmd = ("pid=`ps -ef | grep -ve 'grep' | "
                        "grep -e '%s%s.pid' | awk '{print $2}'` && "
                        "kill -15 $pid || kill -9 $pid" %
                        (peer_id, brick_path))
            ret, _, _ = g.run(brick_node, kill_cmd)
            if not ret:
                g.log.error("Unable to kill the brick %s", brick)
                failed_to_bring_offline_list.append(brick)
                _rc = False
        else:
            g.log.error("Invalid method '%s' to bring brick offline",
                        bring_brick_offline_method)
            return False

    if not _rc:
        g.log.error("Unable to bring some of the bricks %s offline",
                    failed_to_bring_offline_list)
        return False

    g.log.info("All the bricks : %s are brought offline", bricks_list)
    return True
예제 #2
0
def is_glusterd_running(servers):
    """Checks the glusterd status on specified servers.
    Args:
        servers (str|list): A server|List of server hosts on which glusterd
            status has to be checked.
    Returns:
            0  : if glusterd running
            1  : if glusterd not running
           -1  : if glusterd not running and PID is alive
    """
    servers = to_list(servers)

    cmd1 = "systemctl status glusterd2"
    cmd2 = "pidof glusterd2"
    cmd1_results = g.run_parallel(servers, cmd1)
    cmd2_results = g.run_parallel(servers, cmd2)

    _rc = 0
    for server, ret_values in cmd1_results.iteritems():
        retcode, _, _ = ret_values
        if retcode:
            g.log.error("glusterd2 is not running on the server %s", server)
            _rc = 1
            if not cmd2_results[server][0]:
                g.log.error("PID of glusterd2 is alive and status is not "
                            "running")
                _rc = -1
    return _rc
예제 #3
0
def operate_glusterd(servers, operation):
    """
    Performs start/stop/restart glusterd2 on specified
    servers according to mentioned operation.
    Args:
        servers (str|list): A server|List of server hosts on which glusterd2
            has to be started.
        operation (str) : start/stop/restart glusterd2
    Returns:
        bool : True if operation performed on glusterd2 is successful
            on all servers.False otherwise.
    """
    servers = to_list(servers)

    if operation == "start":
        cmd = "pgrep glusterd2 || systemctl start glusterd2"

    if operation == "stop":
        cmd = "systemctl stop glusterd2"

    if operation == "restart":
        cmd = "systemctl restart glusterd2"

    results = g.run_parallel(servers, cmd)

    _rc = True
    for server, ret_values in results.iteritems():
        retcode, _, _ = ret_values
        if retcode:
            g.log.error("Unable to %s glusterd2 on server "
                        "%s", operation, server)
            _rc = False

    return _rc
예제 #4
0
def get_glusterd_pids(nodes):
    """
    Checks if glusterd process is running and
    return the process id's in dictionary format
    Args:
        nodes ( str|list ) : Node/Nodes of the cluster
    Returns:
        tuple : Tuple containing two elements (ret, gluster_pids).
        The first element 'ret' is of type 'bool', True if only if
        glusterd is running on all the nodes in the list and each
        node contains only one instance of glusterd running.
        False otherwise.
        The second element 'glusterd_pids' is of type dictonary and
        it contains the process ID's for glusterd.
    """
    glusterd_pids = {}
    _rc = True
    nodes = to_list(nodes)

    cmd = "pidof glusterd2"
    g.log.info("Executing cmd: %s on node %s", cmd, nodes)
    results = g.run_parallel(nodes, cmd)
    for node in results:
        ret, out, _ = results[node]
        output = out.strip()
        splited_output = output.split("\n")
        if not ret:
            if len(splited_output):
                if not output:
                    g.log.error(
                        "NO glusterd2 process found or "
                        "gd2 is not running on the node %s", node)
                    _rc = False
                    glusterd_pids[node] = ['-1']
                else:
                    g.log.info("glusterd2 process with "
                               "pid %s found on %s", splited_output, node)
                    glusterd_pids[node] = (splited_output)
            else:
                g.log.error(
                    "More than one glusterd2 process "
                    "found on node %s", node)
                _rc = False
                glusterd_pids[node] = out
        else:
            g.log.error(
                "Not able to get glusterd2 process "
                "or glusterd2 process is"
                "killed on node %s", node)
            _rc = False
            glusterd_pids[node] = ['-1']
    return _rc, glusterd_pids
예제 #5
0
def peer_probe_servers(mnode, servers, validate=True):
    """Probe specified servers and validate whether probed servers
    are in cluster and connected state if validate is set to True.

    Args:
        mnode (str): Node on which command has to be executed.
        servers (str|list): A server|List of servers to be peer probed.

    Kwargs:
        validate (bool): True to validate if probed peer is in cluster and
            connected state. False otherwise. Defaults to True.

    Returns:
        bool: True on success and False on failure.
    """
    from glustolibs.gluster.lib_utils import to_list

    servers = to_list(servers)

    if mnode in servers:
        servers.remove(mnode)

    # Get list of nodes from 'gluster pool list'
    nodes_in_pool_list = nodes_from_pool_list(mnode)
    if not nodes_in_pool_list:
        g.log.error("Unable to get nodes from gluster pool list. "
                    "Failing peer probe.")
        return False

    for server in servers:
        if server not in nodes_in_pool_list:
            ret, _, _ = peer_probe(mnode, server)
            if ret != 0:
                g.log.error("Failed to peer probe the node '%s'.", server)
                return False
            g.log.info("Successfully peer probed the node '%s'.", server)

    # Validating whether peer is in connected state after peer probe
    if validate:
        _rc = False
        i = 0
        while i < 200:
            if is_peer_connected(mnode, servers):
                _rc = True
                break

        if not _rc:
            g.log.error("Peers are in not connected state")
        g.log.info("All peers are in connected state")
        return _rc
예제 #6
0
def peer_detach_servers(mnode, servers, validate=True):
    """Detach peers and validate status of peer if validate is set to True.

    Args:
        mnode (str): Node on which command has to be executed.
        servers (str|list): A server|List of servers to be detached.

    Kwargs:
        validate (bool): True if status of the peer needs to be validated,
            False otherwise. Defaults to True.

    Returns:
        bool: True on success and False on failure.
    """

    from glustolibs.gluster.lib_utils import to_list

    servers = to_list(servers)

    if mnode in servers:
        servers.remove(mnode)

    for server in servers:
        ret, _, _ = peer_detach(mnode, server)
        if ret:
            g.log.error("Failed to peer detach the node '%s'.", server)
            return False

    # Validating whether peer detach is successful
    if validate:
        i = 0
        while i < 200:
            count = 0
            nodes_in_pool = nodes_from_pool_list(mnode)
            _rc = True
            for server in servers:
                if server in nodes_in_pool:
                    g.log.error("Peer '%s' still in pool", server)
                    _rc = False
                    count += 1
            if not count:
                break

        if not _rc:
            g.log.error("Validation after peer detach failed.")
        g.log.info("Validation after peer detach is successful")
        return _rc
예제 #7
0
def enable_and_validate_volume_options(mnode, volname, volume_options_list,
                                       time_delay=1):
    """Enable the volume option and validate whether the option has be
    successfully enabled or not
    Args:
        mnode (str): Node on which commands are executed.
        volname (str): Name of the volume.
        volume_options_list (str|list): A volume option|List of volume options
            to be enabled
        time_delay (int): Time delay between 2 volume set operations
    Returns:
        bool: True when enabling and validating all volume options is
            successful. False otherwise
    """

    volume_options_list = to_list(volume_options_list)

    for option in volume_options_list:
        # Set volume option to 'enable'
        g.log.info("Setting the volume option : %s", )
        ret = set_volume_options(mnode, volname, {option: "on"})
        if not ret:
            return False

        # Validate whether the option is set on the volume
        g.log.info("Validating the volume option : %s to be set to 'enable'",
                   option)
        option_dict = get_volume_options(mnode, volname, option)
        g.log.info("Options Dict: %s", option_dict)
        if not option_dict:
            g.log.error("%s is not enabled on the volume %s", option, volname)
            return False

        if option not in option_dict['name'] or "on" not in option_dict['value']:
            g.log.error("%s is not enabled on the volume %s", option, volname)
            return False

        g.log.info("%s is enabled on the volume %s", option, volname)
        time.sleep(time_delay)

    return True
예제 #8
0
def is_peer_connected(mnode, servers):
    """Checks whether specified peer is in cluster and 'Connected' state.

    Args:
        mnode (str): Node from which peer probe has to be executed.
        servers (str): A server| list of servers to be validated.

    Returns
        bool : True on success (peer in cluster and connected), False on
            failure.
    """
    from glustolibs.gluster.lib_utils import to_list

    servers = to_list(servers)

    for server in servers:
        _, out, _ = peer_status(mnode, server)
        out = json.loads(out)
        if not out['online']:
            g.log.error("The peer %s is not connected", server)
            return False
    return True
예제 #9
0
def bring_bricks_online(mnode, volname, bricks_list,
                        bring_bricks_online_methods=None):
    """Bring the bricks specified in the bricks_list online.

    Args:
        mnode (str): Node on which commands will be executed.
        volname (str): Name of the volume.
        bricks_list (list): List of bricks to bring them online.

    Kwargs:
        bring_bricks_online_methods (list): List of methods using which bricks
            will be brought online. The method to bring a brick online is
            randomly selected from the bring_bricks_online_methods list.
            By default all bricks will be brought online with
            ['glusterd_restart', 'volume_start_force'] methods.
            If 'volume_start_force' command is randomly selected then all the
            bricks would be started with the command execution. Hence we break
            from bringing bricks online individually

    Returns:
        bool : True on successfully bringing all bricks online.
            False otherwise
    """
    if bring_bricks_online_methods is None:
        bring_bricks_online_methods = ['glusterd_restart',
                                       'volume_start_force']
        bring_brick_online_method = random.choice(bring_bricks_online_methods)

    elif bring_bricks_online_methods = to_list(bring_bricks_online_methods)

    g.log.info("Bringing bricks '%s' online with '%s'",
               bricks_list, bring_bricks_online_methods)

    _rc = True
    failed_to_bring_online_list = []
    if bring_brick_online_method == 'glusterd_restart':
        bring_brick_online_command = "systemctl restart glusterd2"
        for brick in bricks_list:
            brick_node, _ = brick.split(":")
            ret, _, _ = g.run(brick_node, bring_brick_online_command)
            if not ret:
                g.log.error("Unable to restart glusterd on node %s",
                            brick_node)
                _rc = False
                failed_to_bring_online_list.append(brick)
            g.log.info("Successfully restarted glusterd on node %s to "
                       "bring back brick %s online", brick_node, brick)

    elif bring_brick_online_method == 'volume_start_force':
        bring_brick_online_command = ("glustercli volume start %s force" %
                                      volname)
        ret, _, _ = g.run(mnode, bring_brick_online_command)
        if not ret:
            g.log.error("Unable to start the volume %s with force option",
                        volname)
            _rc = False
        g.log.info("Successfully restarted volume %s to bring all "
                   "the bricks '%s' online", volname, bricks_list)
        break
    else:
        g.log.error("Invalid method '%s' to bring brick online",
                    bring_brick_online_method)
        return False

    g.log.info("Waiting for 10 seconds for all the bricks to be online")
    time.sleep(10)
    return _rc