def wait_for_all_monitors_to_upgrade(new_version, upgrade_key): """ Fairly self explanatory name. This function will wait for all monitors in the cluster to upgrade or it will return after a timeout period has expired. :param new_version: str of the version to watch :param upgrade_key: the cephx key name to use """ done = False start_time = time.time() monitor_list = [] mon_map = get_mon_map('admin') if mon_map['monmap']['mons']: for mon in mon_map['monmap']['mons']: monitor_list.append(mon['name']) while not done: try: done = all(monitor_key_exists(upgrade_key, "{}_{}_{}_done".format( "mon", mon, new_version )) for mon in monitor_list) current_time = time.time() if current_time > (start_time + 10 * 60): raise Exception else: # Wait 30 seconds and test again if all monitors are upgraded time.sleep(30) except subprocess.CalledProcessError: raise
def roll_monitor_cluster(new_version, upgrade_key): """ This is tricky to get right so here's what we're going to do. :param new_version: str of the version to upgrade to :param upgrade_key: the cephx key name to use when upgrading There's 2 possible cases: Either I'm first in line or not. If I'm not first in line I'll wait a random time between 5-30 seconds and test to see if the previous monitor is upgraded yet. """ log('roll_monitor_cluster called with {}'.format(new_version)) my_name = socket.gethostname() monitor_list = [] mon_map = get_mon_map('admin') if mon_map['monmap']['mons']: for mon in mon_map['monmap']['mons']: monitor_list.append(mon['name']) else: status_set('blocked', 'Unable to get monitor cluster information') sys.exit(1) log('monitor_list: {}'.format(monitor_list)) # A sorted list of osd unit names mon_sorted_list = sorted(monitor_list) try: position = mon_sorted_list.index(my_name) log("upgrade position: {}".format(position)) if position == 0: # I'm first! Roll # First set a key to inform others I'm about to roll lock_and_roll(upgrade_key=upgrade_key, service='mon', my_name=my_name, version=new_version) else: # Check if the previous node has finished status_set('blocked', 'Waiting on {} to finish upgrading'.format( mon_sorted_list[position - 1])) wait_on_previous_node(upgrade_key=upgrade_key, service='mon', previous_node=mon_sorted_list[position - 1], version=new_version) lock_and_roll(upgrade_key=upgrade_key, service='mon', my_name=my_name, version=new_version) except ValueError: log("Failed to find {} in list {}.".format( my_name, mon_sorted_list)) status_set('blocked', 'failed to upgrade monitor')
def roll_monitor_cluster(new_version): """ This is tricky to get right so here's what we're going to do. There's 2 possible cases: Either I'm first in line or not. If I'm not first in line I'll wait a random time between 5-30 seconds and test to see if the previous monitor is upgraded yet. """ log('roll_monitor_cluster called with {}'.format(new_version)) my_name = socket.gethostname() monitor_list = [] mon_map = get_mon_map('admin') if mon_map['monmap']['mons']: for mon in mon_map['monmap']['mons']: monitor_list.append(mon['name']) else: status_set('blocked', 'Unable to get monitor cluster information') sys.exit(1) log('monitor_list: {}'.format(monitor_list)) # A sorted list of osd unit names mon_sorted_list = sorted(monitor_list) try: position = mon_sorted_list.index(my_name) log("upgrade position: {}".format(position)) if position == 0: # I'm first! Roll # First set a key to inform others I'm about to roll lock_and_roll(my_name=my_name) else: # Check if the previous node has finished status_set('blocked', 'Waiting on {} to finish upgrading'.format( mon_sorted_list[position - 1])) wait_on_previous_node(previous_node=mon_sorted_list[position - 1]) lock_and_roll(my_name=my_name) except ValueError: log("Failed to find {} in list {}.".format( my_name, mon_sorted_list)) status_set('blocked', 'failed to upgrade monitor')