def are_all_self_heal_daemons_are_online(mnode, volname): """Verifies whether all the self-heal-daemons are online for the specified volume. Args: mnode (str): Node on which cmd has to be executed. volname (str): volume name Returns: bool : True if all the self-heal-daemons are online for the volume. False otherwise. NoneType: None if unable to get the volume status """ from glustolibs.gluster.volume_libs import is_distribute_volume if is_distribute_volume(mnode, volname): g.log.info( "Volume %s is a distribute volume. " "Hence not checking for self-heal daemons " "to be online", volname) return True service = 'shd' failure_msg = ("Verifying all self-heal-daemons are online failed for " "volume %s" % volname) # Get volume status vol_status = get_volume_status(mnode=mnode, volname=volname, service=service) if vol_status is None: g.log.error(failure_msg) return None # Get all nodes from pool list from glustolibs.gluster.peer_ops import nodes_from_pool_list all_nodes = nodes_from_pool_list(mnode) if not all_nodes: g.log.error(failure_msg) return False online_status = True for node in all_nodes: node_shd_status_value = ( vol_status[volname][node]['Self-heal Daemon']['status']) if node_shd_status_value != '1': online_status = False g.run(mnode, ("gluster volume status %s shd" % volname)) if online_status is True: g.log.info("All self-heal Daemons are online") return True else: g.log.error("Some of the self-heal Daemons are offline") return False
def wait_for_self_heal_daemons_to_be_online(mnode, volname, timeout=300): """Waits for the volume self-heal-daemons to be online until timeout Args: mnode (str): Node on which commands will be executed. volname (str): Name of the volume. Kwargs: timeout (int): timeout value in seconds to wait for self-heal-daemons to be online. Returns: True if all self-heal-daemons are online within timeout, False otherwise """ from glustolibs.gluster.volume_libs import is_distribute_volume if is_distribute_volume(mnode, volname): g.log.info( "Volume %s is a distribute volume. " "Hence not waiting for self-heal daemons " "to be online", volname) return True counter = 0 flag = 0 while counter < timeout: status = are_all_self_heal_daemons_are_online(mnode, volname) if status: flag = 1 break if not status: time.sleep(10) counter = counter + 10 if not flag: g.log.error( "All self-heal-daemons of the volume '%s' are not online " "even after %d minutes", (volname, timeout / 60.0)) return False else: g.log.info("All self-heal-daemons of the volume '%s' are online ", volname) return True