def ensure_host_provisioned(host, con_ssh=None): """ check if host is provisioned. Args: host (str): hostname or id in string format con_ssh (SSHClient): Returns: (return_code(int), msg(str)) # 1, 2, 3, 4, 5 only returns when fail_ok=True (0, "Host is host is provisioned) """ LOG.info("Checking if host {} is already provisioned ....".format(host)) if is_host_provisioned(host, con_ssh=None): return 0, "Host {} is provisioned" active_controller = system_helper.get_active_controller_name() conter_swact_back = False if active_controller == host: LOG.tc_step("Swact active controller and ensure active controller is changed") exit_code, output = swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) active_controller = system_helper.get_active_controller_name() conter_swact_back = True LOG.info("Host {} not provisioned ; doing lock/unlock to provision the host ....".format(host)) rc, output = lock_host(host, con_ssh=con_ssh) if rc != 0 and rc != -1: err_msg = "Lock host {} rejected".format(host) raise exceptions.HostError(err_msg) rc, output = unlock_host(host, available_only=True, con_ssh=con_ssh) if rc != 0: err_msg = "Unlock host {} failed: {}".format(host, output) raise exceptions.HostError(err_msg) if conter_swact_back: LOG.tc_step("Swact active controller back and ensure active controller is changed") exit_code, output = swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) LOG.info("Checking if host {} is provisioned after lock/unlock ....".format(host)) if not is_host_provisioned(host, con_ssh=None): raise exceptions.HostError("Failed to provision host {}") # Delay for the alarm to clear . Could be improved. time.sleep(120) return 0, "Host {} is provisioned after lock/unlock".format(host)
def _wait_for_upgrade_data_migration_complete(timeout=1800, check_interval=60, auth_info=Tenant.get('admin_platform'), fail_ok=False, con_ssh=None): """ Waits until upgrade data migration is complete or fail Args: timeout (int): MAX seconds to wait for data migration to complete fail_ok (bool): if true return error code con_ssh (SSHClient): auth_info (str): Returns (tuple): (0, "Upgrade data migration complete.") (1, "Upgrade dat migration failed. Applicable only if ail_ok") (2, "Upgrade data migration timeout out before complete. Applicable only if fail_ok") (3, "Timeout waiting the Host upgrade data migration to complete. Applicable if fail_ok ") """ endtime = time.time() + timeout while time.time() < endtime: upgrade_progress_tab = table_parser.table( cli.system('upgrade-show', ssh_client=con_ssh, auth_info=auth_info)[1]) upgrade_progress_tab = table_parser.filter_table(upgrade_progress_tab, Property="state") if "data-migration-complete" in table_parser.get_column(upgrade_progress_tab, 'Value'): LOG.info("Upgrade data migration is complete") return 0, "Upgrade data migration is complete" elif "data-migration-failed" in table_parser.get_column(upgrade_progress_tab, 'Value'): err_msg = "Host Upgrade data migration failed." LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg) time.sleep(check_interval) err_msg = "Timed out waiting for upgrade data migration to complete state" if fail_ok: LOG.warning(err_msg) return 3, err_msg else: raise exceptions.HostError(err_msg)
def activate_upgrade(con_ssh=None, fail_ok=False): """ Activates upgrade Args: con_ssh (SSHClient): fail_ok (bool): Returns (tuple): (0, dict/list) - success (1, <stderr>) # cli returns stderr, applicable if fail_ok is true """ rc, output = cli.system('upgrade-activate', ssh_client=con_ssh, fail_ok=True) if rc != 0: err_msg = "CLI system upgrade-activate failed: {}".format(output) LOG.warning(err_msg) if fail_ok: return rc, output else: raise exceptions.CLIRejected(err_msg) if not system_helper.wait_for_alarm_gone("250.001", con_ssh=con_ssh, timeout=900, check_interval=60, fail_ok=True): alarms = system_helper.get_alarms(alarm_id="250.001") err_msg = "After activating upgrade alarms are not cleared : {}".format(alarms) LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg) if not wait_for_upgrade_activate_complete(fail_ok=True): err_msg = "Upgrade activate failed" LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg) LOG.info("Upgrade activation complete") return 0, None
def delete_imported_load(load_version=None, con_ssh=None, fail_ok=False): load_id = get_imported_load_id(load_version=load_version, con_ssh=con_ssh) rc, output = cli.system('load-delete', load_id, ssh_client=con_ssh, fail_ok=True) if rc == 1: return 1, output if not wait_for_delete_imported_load(load_id, con_ssh=con_ssh, fail_ok=True): err_msg = "Unable to delete imported load {}".format(load_id) LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg)
def upgrade_hosts(hosts, timeout=HostTimeout.UPGRADE, fail_ok=False, con_ssh=None, auth_info=Tenant.get('admin_platform'), lock=False, unlock=False): """ Upgrade given hosts list one by one Args: hosts (list): list of hostname of hosts to be upgraded timeout (int): MAX seconds to wait for host to become online after upgrading fail_ok (bool): con_ssh (SSHClient): lock (bool): auth_info (str): unlock (bool): Returns (tuple): (0, "Hosts are upgraded and in online state.") (1, "Upgrade on host failed. applicable if fail_ok """ LOG.info("Upgrading {}...".format(hosts)) active_controller = system_helper.get_active_controller_name() if active_controller in hosts: hosts.remove(active_controller) LOG.info("Checking if active controller {} is already upgraded ....".format(active_controller)) if get_hosts_upgrade_target_release(active_controller) in get_hosts_upgrade_target_release(hosts): message = " Active controller {} is not upgraded. Must be upgraded first".format(active_controller) LOG.info(message) return 1, message # keep original host controllers = sorted([h for h in hosts if "controller" in h]) storages = sorted([h for h in hosts if "storage" in h]) computes = sorted([h for h in hosts if h not in storages and h not in controllers]) hosts_to_upgrade = controllers + storages + computes for host in hosts_to_upgrade: rc, output = upgrade_host(host, timeout=timeout, fail_ok=fail_ok, con_ssh=con_ssh, auth_info=auth_info, lock=lock, unlock=unlock) if rc != 0: if fail_ok: return rc, output else: raise exceptions.HostError(output) else: LOG.info("Host {} upgrade completed".format(host)) return 0, "hosts {} upgrade done ".format(hosts_to_upgrade)
def upgrade_host(host, timeout=InstallTimeout.UPGRADE, fail_ok=False, con_ssh=None, auth_info=Tenant.get('admin_platform'), lock=False, unlock=False): """ Upgrade given host Args: host (str): timeout (int): MAX seconds to wait for host to become online after unlocking fail_ok (bool): con_ssh (SSHClient): auth_info (str): unlock (bool): lock Returns (tuple): (0, "Host is upgraded and in online state.") (1, "Cli host upgrade rejected. Applicable only if ail_ok") (2, "Host failed data migration. Applicable only if fail_ok") (3, "Host did not come online after upgrade. Applicable if fail_ok ") (4, "Host fail lock before starting upgrade". Applicable if lock arg is True and fail_ok") (5, "Host fail to unlock after host upgrade. Applicable if unlock arg is True and fail_ok") (6, "Host unlocked after upgrade, but alarms are not cleared after 120 seconds. Applicable if unlock arg is True and fail_ok") """ LOG.info("Upgrading host {}...".format(host)) if lock: if system_helper.get_host_values(host, 'administrative', con_ssh=con_ssh)[0] == HostAdminState.UNLOCKED: message = "Host is not locked. Locking host before starting upgrade" LOG.info(message) rc, output = host_helper.lock_host(host, con_ssh=con_ssh, fail_ok=True) if rc != 0 and rc != -1: err_msg = "Host {} fail on lock before starting upgrade: {}".format(host, output) if fail_ok: return 4, err_msg else: raise exceptions.HostError(err_msg) if system_helper.is_aio_simplex(): exitcode, output = simplex_host_upgrade(con_ssh=con_ssh) return exitcode, output exitcode, output = cli.system('host-upgrade', host, ssh_client=con_ssh, fail_ok=True, auth_info=auth_info, timeout=timeout) if exitcode == 1: err_msg = "Host {} cli upgrade host failed: {}".format(host, output) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg) # sleep for 180 seconds to let host be re-installed with upgrade release time.sleep(180) if not system_helper.wait_for_host_values(host, timeout=timeout, check_interval=60, availability=HostAvailState.ONLINE, con_ssh=con_ssh, fail_ok=fail_ok): err_msg = "Host {} did not become online after upgrade".format(host) if fail_ok: return 3, err_msg else: raise exceptions.HostError(err_msg) if host.strip() == "controller-1": rc, output = _wait_for_upgrade_data_migration_complete(timeout=timeout, auth_info=auth_info, fail_ok=fail_ok, con_ssh=con_ssh) if rc != 0: err_msg = "Host {} upgrade data migration failure: {}".format(host, output) if fail_ok: return 2, err_msg else: raise exceptions.HostError(err_msg) if unlock: rc, output = host_helper.unlock_host(host, fail_ok=True, available_only=True) if rc != 0: err_msg = "Host {} fail to unlock after host upgrade: ".format(host, output) if fail_ok: return 5, err_msg else: raise exceptions.HostError(err_msg) # wait until 400.001 alarms get cleared if not system_helper.wait_for_alarm_gone("400.001", fail_ok=True): err_msg = "Alarms did not clear after host {} upgrade and unlock: ".format(host) if fail_ok: return 6, err_msg else: raise exceptions.HostError(err_msg) LOG.info("Upgrading host {} complete ...".format(host)) return 0, None