def bring_bricks_offline(bricks_list, volname=None, bring_bricks_offline_methods=None): """Bring the bricks specified in the bricks_list offline. Args: volname (str): Name of the volume bricks_list (list): List of bricks to bring them offline. Kwargs: bring_bricks_offline_methods (list): List of methods using which bricks will be brought offline. The method to bring a brick offline is randomly selected from the bring_bricks_offline_methods list. By default all bricks will be brought offline with 'service_kill' method. Returns: bool : True on successfully bringing all bricks offline. False otherwise """ if bring_bricks_offline_methods is None: bring_bricks_offline_methods = ['service_kill'] elif bring_bricks_offline_methods = to_list(bring_bricks_offline_methods) bricks_list = to_list(bricks_list) _rc = True failed_to_bring_offline_list = [] for brick in bricks_list: if bring_brick_offline_method == 'service_kill': brick_node, brick_path = brick.split(":") brick_path = brick_path.replace("/", "-") peer_id = get_peer_id(brick_node, brick_node) kill_cmd = ("pid=`ps -ef | grep -ve 'grep' | " "grep -e '%s%s.pid' | awk '{print $2}'` && " "kill -15 $pid || kill -9 $pid" % (peer_id, brick_path)) ret, _, _ = g.run(brick_node, kill_cmd) if not ret: g.log.error("Unable to kill the brick %s", brick) failed_to_bring_offline_list.append(brick) _rc = False else: g.log.error("Invalid method '%s' to bring brick offline", bring_brick_offline_method) return False if not _rc: g.log.error("Unable to bring some of the bricks %s offline", failed_to_bring_offline_list) return False g.log.info("All the bricks : %s are brought offline", bricks_list) return True
def is_glusterd_running(servers): """Checks the glusterd status on specified servers. Args: servers (str|list): A server|List of server hosts on which glusterd status has to be checked. Returns: 0 : if glusterd running 1 : if glusterd not running -1 : if glusterd not running and PID is alive """ servers = to_list(servers) cmd1 = "systemctl status glusterd2" cmd2 = "pidof glusterd2" cmd1_results = g.run_parallel(servers, cmd1) cmd2_results = g.run_parallel(servers, cmd2) _rc = 0 for server, ret_values in cmd1_results.iteritems(): retcode, _, _ = ret_values if retcode: g.log.error("glusterd2 is not running on the server %s", server) _rc = 1 if not cmd2_results[server][0]: g.log.error("PID of glusterd2 is alive and status is not " "running") _rc = -1 return _rc
def operate_glusterd(servers, operation): """ Performs start/stop/restart glusterd2 on specified servers according to mentioned operation. Args: servers (str|list): A server|List of server hosts on which glusterd2 has to be started. operation (str) : start/stop/restart glusterd2 Returns: bool : True if operation performed on glusterd2 is successful on all servers.False otherwise. """ servers = to_list(servers) if operation == "start": cmd = "pgrep glusterd2 || systemctl start glusterd2" if operation == "stop": cmd = "systemctl stop glusterd2" if operation == "restart": cmd = "systemctl restart glusterd2" results = g.run_parallel(servers, cmd) _rc = True for server, ret_values in results.iteritems(): retcode, _, _ = ret_values if retcode: g.log.error("Unable to %s glusterd2 on server " "%s", operation, server) _rc = False return _rc
def get_glusterd_pids(nodes): """ Checks if glusterd process is running and return the process id's in dictionary format Args: nodes ( str|list ) : Node/Nodes of the cluster Returns: tuple : Tuple containing two elements (ret, gluster_pids). The first element 'ret' is of type 'bool', True if only if glusterd is running on all the nodes in the list and each node contains only one instance of glusterd running. False otherwise. The second element 'glusterd_pids' is of type dictonary and it contains the process ID's for glusterd. """ glusterd_pids = {} _rc = True nodes = to_list(nodes) cmd = "pidof glusterd2" g.log.info("Executing cmd: %s on node %s", cmd, nodes) results = g.run_parallel(nodes, cmd) for node in results: ret, out, _ = results[node] output = out.strip() splited_output = output.split("\n") if not ret: if len(splited_output): if not output: g.log.error( "NO glusterd2 process found or " "gd2 is not running on the node %s", node) _rc = False glusterd_pids[node] = ['-1'] else: g.log.info("glusterd2 process with " "pid %s found on %s", splited_output, node) glusterd_pids[node] = (splited_output) else: g.log.error( "More than one glusterd2 process " "found on node %s", node) _rc = False glusterd_pids[node] = out else: g.log.error( "Not able to get glusterd2 process " "or glusterd2 process is" "killed on node %s", node) _rc = False glusterd_pids[node] = ['-1'] return _rc, glusterd_pids
def peer_probe_servers(mnode, servers, validate=True): """Probe specified servers and validate whether probed servers are in cluster and connected state if validate is set to True. Args: mnode (str): Node on which command has to be executed. servers (str|list): A server|List of servers to be peer probed. Kwargs: validate (bool): True to validate if probed peer is in cluster and connected state. False otherwise. Defaults to True. Returns: bool: True on success and False on failure. """ from glustolibs.gluster.lib_utils import to_list servers = to_list(servers) if mnode in servers: servers.remove(mnode) # Get list of nodes from 'gluster pool list' nodes_in_pool_list = nodes_from_pool_list(mnode) if not nodes_in_pool_list: g.log.error("Unable to get nodes from gluster pool list. " "Failing peer probe.") return False for server in servers: if server not in nodes_in_pool_list: ret, _, _ = peer_probe(mnode, server) if ret != 0: g.log.error("Failed to peer probe the node '%s'.", server) return False g.log.info("Successfully peer probed the node '%s'.", server) # Validating whether peer is in connected state after peer probe if validate: _rc = False i = 0 while i < 200: if is_peer_connected(mnode, servers): _rc = True break if not _rc: g.log.error("Peers are in not connected state") g.log.info("All peers are in connected state") return _rc
def peer_detach_servers(mnode, servers, validate=True): """Detach peers and validate status of peer if validate is set to True. Args: mnode (str): Node on which command has to be executed. servers (str|list): A server|List of servers to be detached. Kwargs: validate (bool): True if status of the peer needs to be validated, False otherwise. Defaults to True. Returns: bool: True on success and False on failure. """ from glustolibs.gluster.lib_utils import to_list servers = to_list(servers) if mnode in servers: servers.remove(mnode) for server in servers: ret, _, _ = peer_detach(mnode, server) if ret: g.log.error("Failed to peer detach the node '%s'.", server) return False # Validating whether peer detach is successful if validate: i = 0 while i < 200: count = 0 nodes_in_pool = nodes_from_pool_list(mnode) _rc = True for server in servers: if server in nodes_in_pool: g.log.error("Peer '%s' still in pool", server) _rc = False count += 1 if not count: break if not _rc: g.log.error("Validation after peer detach failed.") g.log.info("Validation after peer detach is successful") return _rc
def enable_and_validate_volume_options(mnode, volname, volume_options_list, time_delay=1): """Enable the volume option and validate whether the option has be successfully enabled or not Args: mnode (str): Node on which commands are executed. volname (str): Name of the volume. volume_options_list (str|list): A volume option|List of volume options to be enabled time_delay (int): Time delay between 2 volume set operations Returns: bool: True when enabling and validating all volume options is successful. False otherwise """ volume_options_list = to_list(volume_options_list) for option in volume_options_list: # Set volume option to 'enable' g.log.info("Setting the volume option : %s", ) ret = set_volume_options(mnode, volname, {option: "on"}) if not ret: return False # Validate whether the option is set on the volume g.log.info("Validating the volume option : %s to be set to 'enable'", option) option_dict = get_volume_options(mnode, volname, option) g.log.info("Options Dict: %s", option_dict) if not option_dict: g.log.error("%s is not enabled on the volume %s", option, volname) return False if option not in option_dict['name'] or "on" not in option_dict['value']: g.log.error("%s is not enabled on the volume %s", option, volname) return False g.log.info("%s is enabled on the volume %s", option, volname) time.sleep(time_delay) return True
def is_peer_connected(mnode, servers): """Checks whether specified peer is in cluster and 'Connected' state. Args: mnode (str): Node from which peer probe has to be executed. servers (str): A server| list of servers to be validated. Returns bool : True on success (peer in cluster and connected), False on failure. """ from glustolibs.gluster.lib_utils import to_list servers = to_list(servers) for server in servers: _, out, _ = peer_status(mnode, server) out = json.loads(out) if not out['online']: g.log.error("The peer %s is not connected", server) return False return True
def bring_bricks_online(mnode, volname, bricks_list, bring_bricks_online_methods=None): """Bring the bricks specified in the bricks_list online. Args: mnode (str): Node on which commands will be executed. volname (str): Name of the volume. bricks_list (list): List of bricks to bring them online. Kwargs: bring_bricks_online_methods (list): List of methods using which bricks will be brought online. The method to bring a brick online is randomly selected from the bring_bricks_online_methods list. By default all bricks will be brought online with ['glusterd_restart', 'volume_start_force'] methods. If 'volume_start_force' command is randomly selected then all the bricks would be started with the command execution. Hence we break from bringing bricks online individually Returns: bool : True on successfully bringing all bricks online. False otherwise """ if bring_bricks_online_methods is None: bring_bricks_online_methods = ['glusterd_restart', 'volume_start_force'] bring_brick_online_method = random.choice(bring_bricks_online_methods) elif bring_bricks_online_methods = to_list(bring_bricks_online_methods) g.log.info("Bringing bricks '%s' online with '%s'", bricks_list, bring_bricks_online_methods) _rc = True failed_to_bring_online_list = [] if bring_brick_online_method == 'glusterd_restart': bring_brick_online_command = "systemctl restart glusterd2" for brick in bricks_list: brick_node, _ = brick.split(":") ret, _, _ = g.run(brick_node, bring_brick_online_command) if not ret: g.log.error("Unable to restart glusterd on node %s", brick_node) _rc = False failed_to_bring_online_list.append(brick) g.log.info("Successfully restarted glusterd on node %s to " "bring back brick %s online", brick_node, brick) elif bring_brick_online_method == 'volume_start_force': bring_brick_online_command = ("glustercli volume start %s force" % volname) ret, _, _ = g.run(mnode, bring_brick_online_command) if not ret: g.log.error("Unable to start the volume %s with force option", volname) _rc = False g.log.info("Successfully restarted volume %s to bring all " "the bricks '%s' online", volname, bricks_list) break else: g.log.error("Invalid method '%s' to bring brick online", bring_brick_online_method) return False g.log.info("Waiting for 10 seconds for all the bricks to be online") time.sleep(10) return _rc