def prepare_restore_env(): # Full path to the backup on test server global STORE_BACKUP_PATH global HAS_WIPE_CEPH_OSDS global WIPE_CEPH_OSDS STORE_BACKUP_PATH = RestoreVars.get_restore_var('BACKUP_SRC_PATH') HAS_WIPE_CEPH_OSDS = RestoreVars.get_restore_var('HAS_WIPE_CEPH_OSDS') WIPE_CEPH_OSDS = RestoreVars.get_restore_var('WIPE_CEPH_OSDS')
def test_restore_simplex_install(install_setup): """ Complete fresh_install steps for a simplex lab Test Setups: - Retrieve dictionary containing lab information - Retrieve required paths to directories, images, and licenses - Initialize build server and boot server objects - Retrieve what steps to be skipped Test Steps: - Boot controller-0 - Run restore controller-0 - Unlock controller-0 """ lab = install_setup["lab"] controller0_node = lab["controller-0"] patch_dir = install_setup["directories"]["patches"] patch_server = install_setup["servers"]["patches"] do_boot_c0 = RestoreVars.get_restore_var('RESTORE_PRE_BOOT_CONTROLLER0') stop_before_ansible_restore =\ RestoreVars.get_restore_var('STOP_BEFORE_ANSIBLE_RESTORE') if do_boot_c0: fresh_install_helper.install_controller( sys_type=SysType.AIO_SX, patch_dir=patch_dir, patch_server_conn=patch_server.ssh_conn, init_global_vars=True) else: LOG.tc_step("Skipping controller-0 install") if stop_before_ansible_restore: skip("Stopping test before restoring") if InstallVars.get_install_var('IPV6_OAM'): restore_helper.setup_ipv6_oam(controller0_node) restore_helper.restore_platform() fresh_install_helper.unlock_active_controller(controller0_node) controller0_node.telnet_conn.hostname = r"controller\-[01]" controller0_node.telnet_conn.set_prompt(Prompt.CONTROLLER_PROMPT) if controller0_node.ssh_conn is None: controller0_node.ssh_conn = install_helper.ssh_to_controller( controller0_node.host_ip) install_helper.update_auth_url(ssh_con=controller0_node.ssh_conn) if lab.get("floating ip"): setup_tis_ssh(lab) fresh_install_helper.wait_for_hosts_ready(controller0_node.name, lab=lab) fresh_install_helper.reset_global_vars()
def restore_volumes(con_ssh=None): LOG.info('Restore cinder volumes using new (UPSTREAM) cinder-backup CLIs') # Getting all registered cinder volumes if con_ssh is None: con_ssh = ControllerClient.get_active_controller() using_cinder_backup = RestoreVars.get_restore_var('cinder_backup') volumes = cinder_helper.get_volumes() in_use_volumes = [] if len(volumes) > 0: LOG.info("System has {} registered volumes: {}".format( len(volumes), volumes)) if not using_cinder_backup: rc, restored_vols = install_helper.restore_cinder_volumes_from_backup( ) else: in_use_volumes = create_dummy_rbd_images(volumes, con_ssh=con_ssh) rc, restored_vols = restore_from_cinder_backups(volumes, con_ssh) assert rc == 0, "All or some volumes has failed import: Restored volumes {};" \ " Expected volumes {}".format(restored_vols, volumes) LOG.info('all {} volumes are imported'.format(len(restored_vols))) LOG.info( 'set back their original status for all in-use volumes: {}'.format( in_use_volumes)) for volume_id in in_use_volumes: con_ssh.exec_cmd('cinder reset-state --state in-use ' + volume_id) else: LOG.info( "System has {} NO registered volumes; skipping cinder volume restore" )
def test_standard_restore_install(install_setup): """ Configure the active controller Prerequisites: - pxeboot has been setup. Test Setups: - Retrieve dictionary containing lab information - Retrieve required paths to directories, images, and licenses - Determine active controller - Initialize build server and boot server objects - Retrieve what steps to be skipped Test Steps: - Install controller-0 - Unlock controller-0 - Boot the other hosts - Unlock the other hosts """ lab = install_setup["lab"] hosts = lab["hosts"] boot_device = lab['boot_device_dict'] controller0_node = lab["controller-0"] patch_dir = install_setup["directories"]["patches"] patch_server = install_setup["servers"]["patches"] # Power off that which is NOT Controller-0 hostnames = [ hostname for hostname in lab['hosts'] if 'controller-0' not in hostname ] vlm_helper.power_off_hosts(hostnames, lab=lab, count=2) do_boot_c0 = RestoreVars.get_restore_var('RESTORE_PRE_BOOT_CONTROLLER0') stop_before_ansible_restore = \ RestoreVars.get_restore_var('STOP_BEFORE_ANSIBLE_RESTORE') if do_boot_c0: fresh_install_helper.install_controller( sys_type=SysType.REGULAR, patch_dir=patch_dir, patch_server_conn=patch_server.ssh_conn, init_global_vars=True) else: LOG.tc_step("Skipping controller-0 install") if stop_before_ansible_restore: skip("Stopping test before restoring") if InstallVars.get_install_var('IPV6_OAM'): restore_helper.setup_ipv6_oam(controller0_node) restore_helper.restore_platform() fresh_install_helper.unlock_active_controller(controller0_node) controller0_node.telnet_conn.hostname = r"controller\-[01]" controller0_node.telnet_conn.set_prompt(Prompt.CONTROLLER_PROMPT) if controller0_node.ssh_conn is None: controller0_node.ssh_conn = install_helper.ssh_to_controller( controller0_node.host_ip) install_helper.update_auth_url(ssh_con=controller0_node.ssh_conn) # Boot that which is Not Controller-0 fresh_install_helper.restore_boot_hosts(boot_device) # Unlock controller-1 fresh_install_helper.unlock_hosts(['controller-1'], con_ssh=controller0_node.ssh_conn) # Unlock computes fresh_install_helper.unlock_hosts( [host_ for host_ in hosts if 'compute' in host_], con_ssh=controller0_node.ssh_conn) fresh_install_helper.send_arp_cmd() if lab.get("floating ip"): collect_sys_net_info(lab) setup_tis_ssh(lab) fresh_install_helper.reset_global_vars() fresh_install_helper.verify_install_uuid(lab)
def test_upgrade_restore(restore_setup): """ This script is restore part of simplex upgrade which restore the backup file with n+1 load test_upgrade_simplex_system.py will create backup files in n-1 load. Args: restore_setup: This function checks backup avialbility and the parameters for backup Examples To execute py.test --lab=wcp_67 --backup-path=/sandbox/upgrade --backup-build-id='2018-03-16_11-04-06' --backup-builds-dir=TC_18.03_Host --skip_setup_feed tc_bnr/restore/test_upgrade_simplex_restore.py steps: 1. Set the feed in tuxlab 2. Boot from tuxlab 3.login and set password 4. Mova backup files to controllers 5. Execute upgrade_controller_simplex <backup file> 6. Restore Volumes 7. Restore images 8. Restore computes 9. Activate upgrade 10. Upgrade complete 11. Load delete teardown: Unreserve VLM """ # This restore setup called from test_restore to setup the restore enviorment and files. controller0 = 'controller-0' lab = restore_setup["lab"] tis_backup_files = restore_setup['tis_backup_files'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) controller_node = lab[controller0] con_ssh = ControllerClient.get_active_controller(name=lab['short_name'], fail_ok=True) if not con_ssh: LOG.info("Establish ssh connection with {}".format(controller0)) controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format( lab['name'].split('_')[0]) + '|' + Prompt.CONTROLLER_0 controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) controller_node.ssh_conn.deploy_ssh_key() con_ssh = controller_node.ssh_conn LOG.info("Restore system from backup....") system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() images_backup_file = [ file for file in tis_backup_files if "images.tgz" in file ].pop() LOG.tc_step("Restoring controller 0 ") LOG.info("System config restore from backup file {} ...".format( system_backup_file)) if backup_src.lower() == 'usb': system_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, system_backup_file) else: system_backup_path = "{}{}".format(HostLinuxUser.get_home(), system_backup_file) LOG.tc_step("Restoring the backup system files ") install_helper.upgrade_controller_simplex( system_backup=system_backup_path, tel_net_session=controller_node.telnet_conn, fail_ok=True) LOG.info('re-connect to the active controller using ssh') con_ssh.close() time.sleep(60) con_ssh = install_helper.ssh_to_controller(controller_node.host_ip, retry=True) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) if backup_src.lower() == 'local': images_backup_path = "{}{}".format(HostLinuxUser.get_home(), images_backup_file) common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, images_backup_file), HostLinuxUser.get_home()) else: images_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, images_backup_file) LOG.tc_step( "Images restore from backup file {} ...".format(images_backup_file)) new_prompt = r'{}.*~.*\$ |controller\-0.*~.*\$ '.format( lab['name'].split('_')[0]) LOG.info('set prompt to:{}'.format(new_prompt)) con_ssh.set_prompt(new_prompt) install_helper.restore_controller_system_images( images_backup=images_backup_path, tel_net_session=controller_node.telnet_conn, fail_ok=True) LOG.debug('Wait for system ready in 60 seconds') time.sleep(60) LOG.tc_step("Copying backup files to /opt/backups ... ") if backup_src.lower() == 'local': con_ssh.exec_cmd("rm -f {} {}".format(system_backup_path, images_backup_path)) cmd_rm_known_host = r'sed -i "s/^[^#]\(.*\)"/#\1/g /etc/ssh/ssh_known_hosts; \sync' con_ssh.exec_sudo_cmd(cmd_rm_known_host) # transfer all backup files to /opt/backups from test server with con_ssh.login_as_root(): con_ssh.scp_on_dest(source_user=TestFileServer.get_user(), source_ip=TestFileServer.get_server(), source_pswd=TestFileServer.get_password(), source_path=backup_src_path + "/*", dest_path=StxPath.BACKUPS + '/', timeout=1200) else: # copy all backupfiles from USB to /opt/backups cmd = " cp {}/* {}".format(BackupRestore.USB_BACKUP_PATH, StxPath.BACKUPS) con_ssh.exec_sudo_cmd(cmd, expect_timeout=600) LOG.tc_step("Checking if backup files are copied to /opt/backups ... ") assert int(con_ssh.exec_cmd("ls {} | wc -l".format(StxPath.BACKUPS))[1]) >= 2, \ "Missing backup files in {}".format(StxPath.BACKUPS) LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step("Delete backup files from {} ....".format(StxPath.BACKUPS)) con_ssh.exec_sudo_cmd("rm -rf {}/*".format(StxPath.BACKUPS)) LOG.tc_step("Restoring compute ") install_helper.restore_compute(tel_net_session=controller_node.telnet_conn) # Activate the upgrade LOG.tc_step("Activating upgrade....") upgrade_helper.activate_upgrade() # LOG.info("Upgrade activate complete.....") # Complete upgrade LOG.tc_step("Completing upgrade") upgrade_helper.complete_upgrade() LOG.info("Upgrade is complete......") LOG.info("Lab: {} upgraded successfully".format(lab['name'])) # Delete the previous load LOG.tc_step("Deleting imported load... ") upgrade_helper.delete_imported_load()
def pytest_configure(config): # Lab fresh_install params lab_arg = config.getoption('lab') resume_install = config.getoption('resumeinstall') skiplist = config.getoption('skiplist') wipedisk = config.getoption('wipedisk') controller0_ceph_mon_device = config.getoption('ceph_mon_dev_controller_0') controller1_ceph_mon_device = config.getoption('ceph_mon_dev_controller_1') ceph_mon_gib = config.getoption('ceph_mon_gib') install_conf = config.getoption('installconf') lab_file_dir = config.getoption('file_dir') build_server = config.getoption('build_server') boot_server = config.getoption('boot_server') tis_build_dir = config.getoption('tis_build_dir') tis_builds_dir = config.getoption('tis_builds_dir') install_license = config.getoption('upgrade_license') heat_templates = config.getoption('heat_templates') guest_image = config.getoption('guest_image_path') boot_type = config.getoption('boot_list') iso_path = config.getoption('iso_path') low_lat = config.getoption('low_latency') security = config.getoption('security') controller = config.getoption('controller') compute = config.getoption('compute') storage = config.getoption('storage') stop_step = config.getoption('stop_step') drop_num = config.getoption('drop_num') patch_dir = config.getoption('patch_dir') kubernetes = config.getoption('kubernetes_config') no_openstack = config.getoption('no_openstack') deploy_openstack_from_controller_1 = config.getoption( 'deploy_openstack_from_controller_1') dc_ipv6 = config.getoption('dc_ipv6') helm_chart_path = config.getoption('helm_chart_path') no_manage = config.getoption('no_manage') extract_deploy_config = config.getoption('extract_deploy_config') vswitch_type = config.getoption('vswitch_type') ipv6_oam = config.getoption('ipv6_oam') subcloud_host = config.getoption('subcloud_host') # Restore parameters backup_src_path = config.getoption('backup_path') has_wipe_ceph_osds = config.getoption('has_wipe_ceph_osds') wipe_ceph_osds = config.getoption('wipe_ceph_osds') restore_pre_boot_controller0 = config.getoption( 'restore_pre_boot_controller0') stop_before_ansible_restore = config.getoption( 'stop_before_ansible_restore') RestoreVars.set_restore_var(backup_src_path=backup_src_path) RestoreVars.set_restore_var(has_wipe_ceph_osds=has_wipe_ceph_osds) RestoreVars.set_restore_var(wipe_ceph_osds=wipe_ceph_osds) RestoreVars.set_restore_var( restore_pre_boot_controller0=restore_pre_boot_controller0) RestoreVars.set_restore_var( stop_before_ansible_restore=stop_before_ansible_restore) if not lab_arg: raise ValueError("Lab name must be provided") vswitch_types = [ VSwitchType.OVS, VSwitchType.OVS_DPDK, VSwitchType.AVS, VSwitchType.NONE ] if vswitch_type not in vswitch_types: raise ValueError( "Invalid vswitch type {}; Valid types are: {} ".format( vswitch_type, vswitch_types)) lab_dict = setups.get_lab_dict(lab_arg) lab_name = lab_dict['name'] if 'yow' in lab_name: lab_name = lab_name[4:] if subcloud_host: is_subcloud = False sublcoud_name = None dc_float_ip = None dc_lab_name = None else: is_subcloud, sublcoud_name, dc_float_ip, dc_lab_name = setups.is_lab_subcloud( lab_dict, ipv6=ipv6_oam) if is_subcloud and 'yow' in dc_lab_name: dc_lab_name = dc_lab_name[4:] if resume_install is True: resume_install = fresh_install_helper.get_resume_step(lab_dict) LOG.info("Resume Install step at {}".format(resume_install)) if not install_conf: build_server = build_server if build_server else BuildServerPath.DEFAULT_BUILD_SERVER if not tis_builds_dir and not tis_build_dir: # Take latest master load from cengn host_build_dir_path = BuildServerPath.DEFAULT_HOST_BUILD_PATH elif tis_build_dir and os.path.isabs(tis_build_dir): host_build_dir_path = tis_build_dir else: # Take in-house StarlingX_Upstream_build tis_builds_dir = tis_builds_dir if tis_builds_dir else '' tis_build_dir = tis_build_dir if tis_build_dir else BuildServerPath.LATEST_BUILD host_build_dir_path = os.path.join( BuildServerPath.DEFAULT_WORK_SPACE, tis_builds_dir, tis_build_dir) host_build_dir_path = os.path.normpath(host_build_dir_path) if host_build_dir_path.endswith('/latest_build'): build_id = build_info.get_latest_host_build_dir( build_server=build_server, latest_build_simlink=host_build_dir_path) host_build_dir_path = host_build_dir_path[:-len('latest_build' )] + build_id files_server = build_server if lab_file_dir: if lab_file_dir.find(":/") != -1: files_server = lab_file_dir[:lab_file_dir.find(":/")] lab_file_dir = lab_file_dir[lab_file_dir.find(":") + 1:] if not os.path.isabs(lab_file_dir): lab_file_dir = "{}/lab/yow/{}".format(host_build_dir_path, lab_file_dir) else: lab_file_dir = "{}/lab/yow/{}".format(host_build_dir_path, lab_name if lab_name else '') \ if not is_subcloud else "{}/lab/yow/{}".format(host_build_dir_path, dc_lab_name if dc_lab_name else '') if not heat_templates: if BuildServerPath.BldsDirNames.TC_19_05_BUILD in host_build_dir_path: heat_templates = os.path.join( BuildServerPath.EAR_HOST_BUILD_PATH, BuildServerPath.HEAT_TEMPLATES) else: heat_templates = os.path.join( BuildServerPath.STX_HOST_BUILDS_DIR, 'latest_full_build', BuildServerPath.HEAT_TEMPLATES) elif not os.path.isabs(heat_templates): heat_templates = os.path.join(host_build_dir_path, heat_templates) if not helm_chart_path: helm_path_in_build = BuildServerPath.STX_HELM_CHARTS_CENGN \ if '/import/' in host_build_dir_path or '19.05' \ in host_build_dir_path else BuildServerPath.TITANIUM_HELM_CHARTS helm_chart_path = os.path.join(host_build_dir_path, helm_path_in_build) if boot_type.lower() in ('usb_burn', 'pxe_iso', 'iso_feed') and not iso_path: iso_path_in_build = BuildServerPath.ISO_PATH_CENGN \ if '/import/' in host_build_dir_path \ else BuildServerPath.ISO_PATH iso_path = os.path.join(host_build_dir_path, iso_path_in_build) install_conf = setups.write_installconf( lab=lab_arg, controller=controller, compute=compute, storage=storage, lab_files_dir=lab_file_dir, patch_dir=patch_dir, tis_build_dir=host_build_dir_path, build_server=build_server, files_server=files_server, license_path=install_license, guest_image=guest_image, heat_templates=heat_templates, boot=boot_type, iso_path=iso_path, security=security, low_latency=low_lat, stop=stop_step, vswitch_type=vswitch_type, boot_server=boot_server, resume=resume_install, skip=skiplist, kubernetes=kubernetes, helm_chart_path=helm_chart_path) setups.set_install_params( lab=lab_arg, skip=skiplist, resume=resume_install, wipedisk=wipedisk, drop=drop_num, installconf_path=install_conf, controller0_ceph_mon_device=controller0_ceph_mon_device, controller1_ceph_mon_device=controller1_ceph_mon_device, ceph_mon_gib=ceph_mon_gib, boot=boot_type, iso_path=iso_path, security=security, low_latency=low_lat, stop=stop_step, patch_dir=patch_dir, vswitch_type=vswitch_type, boot_server=boot_server, dc_float_ip=dc_float_ip, ipv6_oam=ipv6_oam, install_subcloud=sublcoud_name, kubernetes=kubernetes, no_openstack=no_openstack, dc_ipv6=dc_ipv6, helm_chart_path=helm_chart_path, no_manage=no_manage, deploy_openstack_from_controller_1= deploy_openstack_from_controller_1, extract_deploy_config=extract_deploy_config, subcloud_host=subcloud_host) frame_str = '*' * len('Install Arguments:') args = "\n\n{}\nInstall Arguments:\n{}\n".format(frame_str, frame_str) install_vars = InstallVars.get_install_vars() bs = install_vars['BUILD_SERVER'] for var, value in install_vars.items(): if (not value and value != 0) or (value == bs and var != 'BUILD_SERVER'): continue elif var == 'LAB': for k, v in dict(value).items(): if re.search('_nodes| ip', k): args += "\n{:<20}: {}".format(k, v) else: args += "\n{:<20}: {}".format(var, value) args += "\n{:<20}: {}\n".format('LOG_DIR', ProjVar.get_var('LOG_DIR')) LOG.info(args) if resume_install: try: con0_ip = install_vars.get('LAB', {}).get('controller-0 ip') if con0_ip: with host_helper.ssh_to_host(con0_ip, timeout=60) as con0_ssh: setups.set_build_info(con_ssh=con0_ssh) setups.set_session(con_ssh=con0_ssh) except: pass
def test_restore(restore_setup): controller1 = 'controller-1' controller0 = 'controller-0' lab = restore_setup["lab"] is_aio_lab = lab.get('system_type', 'Standard') == 'CPE' is_sx = is_aio_lab and (len(lab['controller_nodes']) < 2) tis_backup_files = restore_setup['tis_backup_files'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) controller_node = lab[controller0] con_ssh = ControllerClient.get_active_controller(name=lab['short_name'], fail_ok=True) sys_prompt = Prompt.TIS_NODE_PROMPT_BASE.format('.*' + lab['name'].split('_')[0]) controller_prompt = '{}|{}'.format(sys_prompt, Prompt.CONTROLLER_0) controller_node.telnet_conn.set_prompt(controller_prompt) if not con_ssh: LOG.info("Establish ssh connection with {}".format(controller0)) controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) controller_node.ssh_conn.deploy_ssh_key() con_ssh = controller_node.ssh_conn ControllerClient.set_active_controller(con_ssh) LOG.info("Restore system from backup....") system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() images_backup_file = [ file for file in tis_backup_files if "images.tgz" in file ].pop() LOG.tc_step("Restoring {}".format(controller0)) LOG.info("System config restore from backup file {} ...".format( system_backup_file)) if backup_src.lower() == 'usb': system_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, system_backup_file) else: system_backup_path = "{}{}".format(HostLinuxUser.get_home(), system_backup_file) compute_configured = install_helper.restore_controller_system_config( system_backup=system_backup_path, is_aio=is_aio_lab)[2] # return LOG.info('re-connect to the active controller using ssh') con_ssh.close() controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) LOG.info("Source Keystone user admin environment ...") LOG.info("set prompt to:{}, telnet_conn:{}".format( controller_prompt, controller_node.telnet_conn)) controller_node.telnet_conn.exec_cmd("cd; source /etc/platform/openrc") con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) make_sure_all_hosts_locked(con_ssh) if backup_src.lower() == 'local': images_backup_path = "{}{}".format(HostLinuxUser.get_home(), images_backup_file) common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, images_backup_file), HostLinuxUser.get_home()) else: images_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, images_backup_file) LOG.info( "Images restore from backup file {} ...".format(images_backup_file)) new_prompt = r'{}.*~.*\$ |controller\-0.*~.*\$ '.format( lab['name'].split('_')[0]) LOG.info('set prompt to:{}'.format(new_prompt)) con_ssh.set_prompt(new_prompt) install_helper.restore_controller_system_images( images_backup=images_backup_path, tel_net_session=controller_node.telnet_conn) # this is a workaround for CGTS-8190 install_helper.update_auth_url(con_ssh) LOG.tc_step( "Verifying restoring controller-0 is complete and is in available state ..." ) LOG.debug('Wait for system ready in 60 seconds') time.sleep(60) timeout = HostTimeout.REBOOT + 60 availability = HostAvailState.AVAILABLE is_available = system_helper.wait_for_hosts_states( controller0, availability=HostAvailState.AVAILABLE, fail_ok=True, timeout=timeout) if not is_available: LOG.warn( 'After {} seconds, the first node:{} does NOT reach {}'.format( timeout, controller0, availability)) LOG.info('Check if drbd is still synchronizing data') con_ssh.exec_sudo_cmd('drbd-overview') is_degraded = system_helper.wait_for_hosts_states( controller0, availability=HostAvailState.DEGRADED, fail_ok=True, timeout=300) if is_degraded: LOG.warn('Node: {} is degraded: {}'.format( controller0, HostAvailState.DEGRADED)) con_ssh.exec_sudo_cmd('drbd-overview') else: LOG.fatal('Node:{} is NOT in Available nor Degraded status') # the customer doc does have wording regarding this situation, continue # assert False, 'Node:{} is NOT in Available nor Degraded status' # delete the system backup files from sysadmin home LOG.tc_step("Copying backup files to /opt/backups ... ") if backup_src.lower() == 'local': con_ssh.exec_cmd("rm -f {} {}".format(system_backup_path, images_backup_path)) cmd_rm_known_host = r'sed -i "s/^[^#]\(.*\)"/#\1/g /etc/ssh/ssh_known_hosts; \sync' con_ssh.exec_sudo_cmd(cmd_rm_known_host) # transfer all backup files to /opt/backups from test server with con_ssh.login_as_root(): con_ssh.scp_on_dest(source_user=TestFileServer.get_user(), source_ip=TestFileServer.get_server(), source_pswd=TestFileServer.get_password(), source_path=backup_src_path + "/*", dest_path=StxPath.BACKUPS + '/', timeout=1200) else: # copy all backupfiles from USB to /opt/backups cmd = " cp {}/* {}".format(BackupRestore.USB_BACKUP_PATH, StxPath.BACKUPS) con_ssh.exec_sudo_cmd(cmd, expect_timeout=600) LOG.tc_step("Checking if backup files are copied to /opt/backups ... ") assert int(con_ssh.exec_cmd("ls {} | wc -l".format(StxPath.BACKUPS))[1]) >= 2, \ "Missing backup files in {}".format(StxPath.BACKUPS) if is_aio_lab: LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step('Run restore-complete (CGTS-9756)') cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format( HostLinuxUser.get_password()) controller_node.telnet_conn.login() controller_node.telnet_conn.exec_cmd( cmd, extra_expects=[' will reboot on completion']) LOG.info('- wait untill reboot completes, ') time.sleep(120) LOG.info('- confirm the active controller is actually back online') controller_node.telnet_conn.login() LOG.tc_step( "reconnecting to the active controller after restore-complete") con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) if not compute_configured: LOG.tc_step( 'Latest 18.07 EAR1 or Old-load on AIO/CPE lab: config its ' 'compute functionalities') # install_helper.run_cpe_compute_config_complete(controller_node, controller0) # LOG.info('closing current ssh connection') # con_ssh.close() LOG.tc_step('Run restore-complete (CGTS-9756)') controller_node.telnet_conn.login() cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.\ format(HostLinuxUser.get_password()) controller_node.telnet_conn.exec_cmd(cmd, extra_expects=' will reboot ') controller_node.telnet_conn.close() LOG.info( 'Wait until "config_controller" reboot the active controller') time.sleep(180) controller_node.telnet_conn = install_helper.open_telnet_session( controller_node) controller_node.telnet_conn.login() time.sleep(120) con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) host_helper.wait_for_hosts_ready(controller0) LOG.tc_step('Install the standby controller: {}'.format(controller1)) if not is_sx: install_non_active_node(controller1, lab) elif len(lab['controller_nodes']) >= 2: LOG.tc_step('Install the standby controller: {}'.format(controller1)) install_non_active_node(controller1, lab) boot_interfaces = lab['boot_device_dict'] hostnames = system_helper.get_hosts() storage_hosts = [host for host in hostnames if 'storage' in host] compute_hosts = [ host for host in hostnames if 'storage' not in host and 'controller' not in host ] if len(storage_hosts) > 0: # con_ssh.exec_sudo_cmd('touch /etc/ceph/ceph.client.None.keyring') for storage_host in storage_hosts: LOG.tc_step("Restoring {}".format(storage_host)) install_helper.open_vlm_console_thread( storage_host, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Diabled and Online ...".format( storage_host)) system_helper.wait_for_hosts_states( storage_host, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(storage_host)) rc, output = host_helper.unlock_host(storage_host, available_only=True) assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format( storage_host, rc, output) LOG.info("Veryifying the Ceph cluster is healthy ...") storage_helper.wait_for_ceph_health_ok(timeout=600) LOG.info("Importing images ...") image_backup_files = install_helper.get_backup_files( IMAGE_BACKUP_FILE_PATTERN, StxPath.BACKUPS, con_ssh) LOG.info("Image backup found: {}".format(image_backup_files)) imported = install_helper.import_image_from_backup( image_backup_files) LOG.info("Images successfully imported: {}".format(imported)) LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step('Run restore-complete (CGTS-9756), regular lab') controller_node.telnet_conn.login() cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format( HostLinuxUser.get_password()) controller_node.telnet_conn.exec_cmd( cmd, extra_expects='controller-0 login:'******'rebuild ssh connection') con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh LOG.tc_step("Restoring Compute Nodes ...") if len(compute_hosts) > 0: for compute_host in compute_hosts: LOG.tc_step("Restoring {}".format(compute_host)) install_helper.open_vlm_console_thread( compute_host, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Diabled and Online ...".format( compute_host)) system_helper.wait_for_hosts_states( compute_host, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(compute_host)) rc, output = host_helper.unlock_host(compute_host, available_only=True) assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format( compute_host, rc, output) LOG.info("All nodes {} are restored ...".format(hostnames)) else: LOG.warn('Only 1 controller, but not AIO lab!!??') LOG.tc_step("Delete backup files from {} ....".format(StxPath.BACKUPS)) con_ssh.exec_sudo_cmd("rm -rf {}/*".format(StxPath.BACKUPS)) LOG.tc_step('Perform post-restore testing/checking') post_restore_test(con_ssh) LOG.tc_step("Waiting until all alarms are cleared ....") timeout = 300 healthy, alarms = system_helper.wait_for_all_alarms_gone(timeout=timeout, fail_ok=True) if not healthy: LOG.warn('Alarms exist: {}, after waiting {} seconds'.format( alarms, timeout)) rc, message = con_ssh.exec_sudo_cmd('drbd-overview') if rc != 0 or (r'[===>' not in message and r'] sync\'ed: ' not in message): LOG.warn('Failed to get drbd-overview information') LOG.info('Wait for the system to be ready in {} seconds'.format( HostTimeout.REBOOT)) system_helper.wait_for_all_alarms_gone(timeout=HostTimeout.REBOOT, fail_ok=False) LOG.tc_step("Verifying system health after restore ...") rc, failed = system_helper.get_system_health_query(con_ssh=con_ssh) assert rc == 0, "System health not OK: {}".format(failed) collect_logs()
def pre_restore_checkup(): """ Fixture to check the system states before doing system restore, including: - collect logs - check if backup files exist on the backup media - check if the build-ids match with each other - wipe disks Args: Return: backup files: - the backup files to restore with """ lab = InstallVars.get_install_var('LAB') LOG.info("Lab info; {}".format(lab)) backup_build_id = RestoreVars.get_restore_var("BACKUP_BUILD_ID") controller_node = lab['controller-0'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) tis_backup_files = [] extra_controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format(lab['name']. split('_')[0]) + '|' + \ Prompt.CONTROLLER_0 controller_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=extra_controller_prompt, fail_ok=True) LOG.info('Collect logs before restore') if controller_conn: collect_logs(controller_conn) ControllerClient.set_active_controller(controller_conn) else: LOG.info('Cannot collect logs because no ssh connection to the lab') if not controller_conn: LOG.warn( 'failed to collect logs because no ssh connection established to ' 'controller-0 of lab:{}'.format(controller_node.host_ip)) else: pass LOG.info('backup_src={}, backup_src_path={}'.format( backup_src, backup_src_path)) if backup_src.lower() == 'usb': if controller_conn: LOG.info("Connection established with controller-0 ....") ControllerClient.set_active_controller(ssh_client=controller_conn) LOG.info( "Checking if a USB flash drive with backup files is plugged in... " ) usb_device_name = install_helper.get_usb_device_name( con_ssh=controller_conn) assert usb_device_name, "No USB found " LOG.info("USB flash drive found, checking for backup files ... ") usb_part_info = install_helper.get_usb_device_partition_info( usb_device=usb_device_name, con_ssh=controller_conn) assert usb_part_info and len( usb_part_info) > 0, "No USB or partition found" usb_part_name = "{}2".format(usb_device_name) assert usb_part_name in usb_part_info.keys( ), "No {} partition exist in USB" result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name, con_ssh=controller_conn) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=controller_conn), \ "Unable to mount USB partition {}".format(usb_part_name) tis_backup_files = install_helper.get_titanium_backup_filenames_usb( usb_device=usb_part_name, con_ssh=controller_conn) assert len( tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) # extract build id from the file name file_parts = tis_backup_files[0].split('_') file_backup_build_id = '_'.join([file_parts[3], file_parts[4]]) assert re.match(TIS_BLD_DIR_REGEX, file_backup_build_id), " Invalid build id format {} extracted from " \ "backup_file {}".format( file_backup_build_id, tis_backup_files[0]) if backup_build_id is not None: if backup_build_id != file_backup_build_id: LOG.info( " The build id extracted from backup file is different than " "specified; Using the extracted build id {} ....". format(file_backup_build_id)) backup_build_id = file_backup_build_id else: backup_build_id = file_backup_build_id RestoreVars.set_restore_var(backup_build_id=backup_build_id) else: LOG.info(" SSH connection not available yet with controller-0; " "USB will be checked after controller boot ....") else: test_server_attr = dict() test_server_attr['name'] = TestFileServer.get_hostname().split('.')[0] test_server_attr['server_ip'] = TestFileServer.get_server() test_server_attr['prompt'] = r'\[{}@{} {}\]\$ ' \ .format(TestFileServer.get_user(), test_server_attr['name'], TestFileServer.get_user()) test_server_conn = install_helper.establish_ssh_connection( test_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=test_server_attr['prompt']) test_server_conn.set_prompt(test_server_attr['prompt']) test_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) test_server_attr['ssh_conn'] = test_server_conn test_server_obj = Server(**test_server_attr) RestoreVars.set_restore_var(backup_src_server=test_server_obj) # test if backup path for the lab exist in Test server if os.path.basename(backup_src_path) != lab['short_name']: backup_src_path += '/{}'.format(lab['short_name']) RestoreVars.set_restore_var(backup_src_path=backup_src_path) assert not test_server_conn.exec_cmd("test -e {}".format(backup_src_path))[0], \ "Missing backup files from source {}: {}".format(test_server_attr['name'], backup_src_path) tis_backup_files = install_helper.get_backup_files( TITANIUM_BACKUP_FILE_PATTERN, backup_src_path, test_server_conn) assert len(tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) # extract build id from the file name file_parts = tis_backup_files[0].split('_') file_backup_build_id = '_'.join([file_parts[3], file_parts[4]]) assert re.match(TIS_BLD_DIR_REGEX, file_backup_build_id), "Invalid build id format {} extracted from " \ "backup_file {}".format( file_backup_build_id, tis_backup_files[0]) if backup_build_id is not None: if backup_build_id != file_backup_build_id: LOG.info( " The build id extracted from backup file is different than specified; " "Using the extracted build id {} ....".format( file_backup_build_id)) backup_build_id = file_backup_build_id else: backup_build_id = file_backup_build_id RestoreVars.set_restore_var(backup_build_id=backup_build_id) if controller_conn: # Wipe disks in order to make controller-0 NOT boot from hard-disks # hosts = [k for k , v in lab.items() if isinstance(v, node.Node)] # install_helper.wipe_disk_hosts(hosts) if not RestoreVars.get_restore_var('skip_reinstall'): LOG.info('Try to do wipedisk_via_helper on controller-0') install_helper.wipedisk_via_helper(controller_conn) assert backup_build_id, "The Build id of the system backup must be provided." return tis_backup_files
def restore_setup(pre_restore_checkup): """ Fixture to do preparation before system restore. Args: pre_restore_checkup: - actions done prior to this Returen: a dictionary - containing infromation about target system, output directory, build server and backup files. """ LOG.debug('Restore with settings:\n{}'.format( RestoreVars.get_restore_vars())) lab = InstallVars.get_install_var('LAB') LOG.info("Lab info; {}".format(lab)) hostnames = [k for k, v in lab.items() if isinstance(v, node.Node)] LOG.info("Lab hosts; {}".format(hostnames)) backup_build_id = RestoreVars.get_restore_var("BACKUP_BUILD_ID") output_dir = ProjVar.get_var('LOG_DIR') controller_node = lab['controller-0'] controller_prompt = '' extra_controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format(lab['name'].split('_')[0]) + '|' + \ Prompt.CONTROLLER_0 if RestoreVars.get_restore_var('skip_reinstall'): LOG.info('Skip reinstall as instructed') LOG.info('Connect to controller-0 now') controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=extra_controller_prompt, fail_ok=True) bld_server_obj = None else: # bld_server = get_build_server_info(InstallVars.get_install_var('BUILD_SERVER')) bld_server = get_build_server_info( RestoreVars.get_restore_var('BUILD_SERVER')) LOG.info("Connecting to Build Server {} ....".format( bld_server['name'])) bld_server_attr = dict() bld_server_attr['name'] = bld_server['name'] bld_server_attr['server_ip'] = bld_server['ip'] bld_server_attr['prompt'] = r'{}@{}\:(.*)\$ '.format( TestFileServer.get_user(), bld_server['name']) bld_server_conn = install_helper.establish_ssh_connection( bld_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=bld_server_attr['prompt']) bld_server_conn.exec_cmd("bash") bld_server_conn.set_prompt(bld_server_attr['prompt']) bld_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) bld_server_attr['ssh_conn'] = bld_server_conn bld_server_obj = Server(**bld_server_attr) # If controller is accessible, check if USB with backup files is avaialble load_path = os.path.join( BuildServerPath.DEFAULT_WORK_SPACE, RestoreVars.get_restore_var("BACKUP_BUILDS_DIR"), backup_build_id) InstallVars.set_install_var(tis_build_dir=load_path) # set up feed for controller LOG.fixture_step( "Setting install feed in tuxlab for controller-0 ... ") if 'vbox' not in lab['name'] and not RestoreVars.get_restore_var( 'skip_setup_feed'): assert install_helper.set_network_boot_feed(bld_server_conn, load_path), \ "Fail to set up feed for controller" if not RestoreVars.get_restore_var('skip_reinstall'): # power off hosts LOG.fixture_step("Powring off system hosts ... ") install_helper.power_off_host(hostnames) LOG.fixture_step("Booting controller-0 ... ") is_cpe = (lab.get('system_type', 'Standard') == 'CPE') low_latency = RestoreVars.get_restore_var('low_latency') os.environ['XTERM'] = 'xterm' install_helper.boot_controller(small_footprint=is_cpe, system_restore=True, low_latency=low_latency) # establish ssh connection with controller LOG.fixture_step( "Establishing ssh connection with controller-0 after install..." ) node_name_in_ini = r'{}.*\~\$ '.format( install_helper.get_lab_info(controller_node.barcode)['name']) controller_prompt = re.sub(r'([^\d])0*(\d+)', r'\1\2', node_name_in_ini) controller_prompt = controller_prompt + '|' + Prompt.TIS_NODE_PROMPT_BASE.format( lab['name'].split('_')[0]) + '|' + Prompt.CONTROLLER_0 LOG.info('initial_prompt=' + controller_prompt) controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) LOG.info('Deploy ssh key') controller_node.ssh_conn.deploy_ssh_key() ControllerClient.set_active_controller(ssh_client=controller_node.ssh_conn) con_ssh = controller_node.ssh_conn tis_backup_files = pre_restore_checkup backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) if backup_src.lower() == 'local': LOG.fixture_step( "Transferring system backup file to controller-0 {} ... ".format( HostLinuxUser.get_home())) system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, system_backup_file), HostLinuxUser.get_home()) assert con_ssh.exec_cmd("ls {}{}".format(HostLinuxUser.get_home(), system_backup_file))[0] == 0, \ "Missing backup file {} in dir {}".format(system_backup_file, HostLinuxUser.get_home()) elif backup_src.lower() == 'usb': tis_backup_files = pre_restore_checkup usb_device_name = install_helper.get_usb_device_name(con_ssh=con_ssh) usb_part_name = "{}2".format(usb_device_name) assert usb_device_name, "No USB found " LOG.fixture_step( "USB flash drive found, checking for backup files ... ") if len(tis_backup_files) == 0: LOG.fixture_step("Checking for backup files in USB ... ") usb_part_info = install_helper.get_usb_device_partition_info( usb_device=usb_device_name, con_ssh=con_ssh) assert usb_part_info and len( usb_part_info) > 0, "No USB or partition found" assert usb_part_name in usb_part_info.keys( ), "No {} partition exist in USB" result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=con_ssh), \ "Unable to mount USB partition {}".format(usb_part_name) tis_backup_files = install_helper.get_titanium_backup_filenames_usb( usb_device=usb_part_name) assert len( tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) else: result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=con_ssh), \ "Unable to mount USB partition {}".format(usb_part_name) _restore_setup = { 'lab': lab, 'output_dir': output_dir, 'build_server': bld_server_obj, 'tis_backup_files': tis_backup_files } return _restore_setup
def pytest_configure(config): # Lab install params lab = config.getoption('lab') use_usb = config.getoption('use_usb') backup_src_path = config.getoption('backup_path') backup_build_id = config.getoption('backup_build_id') backup_src = 'usb' if use_usb else 'local' skip_setup_feed = config.getoption('skip_setup_feed') skip_reinstall = config.getoption('skip_reinstall') low_latency = config.getoption('low_latency') cinder_backup = config.getoption('cinder_backup') # build_server = config.getoption('build_server') backup_builds_dir = config.getoption('backup_builds_dir') build_server = config.getoption('build_server') # tis_build_dir = config.getoption('tis_build_dir') setups.set_install_params(lab=lab, skip='feed' if skip_setup_feed else None, resume=None, installconf_path=None, drop=None, boot='usb' if use_usb else 'feed', controller0_ceph_mon_device=None, iso_path=None, controller1_ceph_mon_device=None, ceph_mon_gib=None, low_latency=low_latency, security='standard', stop=None, wipedisk=False, ovs=False, patch_dir=None, boot_server=None) if backup_src == 'usb': if (not backup_src_path) or (BackupRestore.USB_MOUNT_POINT not in backup_src_path): backup_src_path = BackupRestore.USB_BACKUP_PATH elif not backup_src_path: backup_src_path = BackupRestore.LOCAL_BACKUP_PATH if not backup_builds_dir: backup_builds_dir = os.path.basename(BuildServerPath.DEFAULT_HOST_BUILDS_DIR) RestoreVars.set_restore_vars(backup_src=backup_src, backup_src_path=backup_src_path, build_server=build_server, backup_build_id=backup_build_id, backup_builds_dir=backup_builds_dir) reinstall_storage = config.getoption('reinstall_storage') RestoreVars.set_restore_var(reinstall_storage=reinstall_storage) RestoreVars.set_restore_var(skip_setup_feed=skip_setup_feed) RestoreVars.set_restore_var(skip_reinstall=skip_reinstall) RestoreVars.set_restore_var(low_latency=low_latency) RestoreVars.set_restore_var(cinder_backup=cinder_backup) ProjVar.set_var(always_collect=True) ProjVar.set_var(SOURCE_OPENRC=True)