def wait_for_all_monitors_to_upgrade(new_version, upgrade_key): """ Fairly self explanatory name. This function will wait for all monitors in the cluster to upgrade or it will return after a timeout period has expired. :param new_version: str of the version to watch :param upgrade_key: the cephx key name to use """ done = False start_time = time.time() monitor_list = [] mon_map = get_mon_map('admin') if mon_map['monmap']['mons']: for mon in mon_map['monmap']['mons']: monitor_list.append(mon['name']) while not done: try: done = all(monitor_key_exists(upgrade_key, "{}_{}_{}_done".format( "mon", mon, new_version )) for mon in monitor_list) current_time = time.time() if current_time > (start_time + 10 * 60): raise Exception else: # Wait 30 seconds and test again if all monitors are upgraded time.sleep(30) except subprocess.CalledProcessError: raise
def wait_on_previous_node(previous_node): log("Previous node is: {}".format(previous_node)) previous_node_finished = monitor_key_exists( 'admin', "{}_done".format(previous_node)) while previous_node_finished is False: log("{} is not finished. Waiting".format(previous_node)) # Has this node been trying to upgrade for longer than # 10 minutes? # If so then move on and consider that node dead. # NOTE: This assumes the clusters clocks are somewhat accurate # If the hosts clock is really far off it may cause it to skip # the previous node even though it shouldn't. current_timestamp = time.time() previous_node_start_time = monitor_key_get( 'admin', "{}_start".format(previous_node)) if (current_timestamp - (10 * 60)) > previous_node_start_time: # Previous node is probably dead. Lets move on if previous_node_start_time is not None: log( "Waited 10 mins on node {}. current time: {} > " "previous node start time: {} Moving on".format( previous_node, (current_timestamp - (10 * 60)), previous_node_start_time)) return else: # I have to wait. Sleep a random amount of time and then # check if I can lock,upgrade and roll. wait_time = random.randrange(5, 30) log('waiting for {} seconds'.format(wait_time)) time.sleep(wait_time) previous_node_finished = monitor_key_exists( 'admin', "{}_done".format(previous_node))
def wait_on_previous_node(previous_node): log("Previous node is: {}".format(previous_node)) previous_node_finished = monitor_key_exists( 'admin', "{}_done".format(previous_node)) while previous_node_finished is False: log("{} is not finished. Waiting".format(previous_node)) # Has this node been trying to upgrade for longer than # 10 minutes? # If so then move on and consider that node dead. # NOTE: This assumes the clusters clocks are somewhat accurate # If the hosts clock is really far off it may cause it to skip # the previous node even though it shouldn't. current_timestamp = time.time() previous_node_start_time = monitor_key_get( 'admin', "{}_start".format(previous_node)) if (current_timestamp - (10 * 60)) > previous_node_start_time: # Previous node is probably dead. Lets move on if previous_node_start_time is not None: log( "Waited 10 mins on node {}. current time: {} > " "previous node start time: {} Moving on".format( previous_node, (current_timestamp - (10 * 60)), previous_node_start_time)) return else: # I have to wait. Sleep a random amount of time and then # check if I can lock,upgrade and roll. wait_time = random.randrange(5, 30) log('waiting for {} seconds'.format(wait_time)) time.sleep(wait_time) previous_node_finished = monitor_key_exists( 'admin', "{}_done".format(previous_node))