Example #1
0
def test_get_info():
    pv0_storage_nodes = system_helper.get_storage_nodes(pv0)
    assert 'storage-0' in pv0_storage_nodes and 'storage-1' in pv0_storage_nodes
    assert not system_helper.get_storage_nodes(r720)
    assert not system_helper.is_aio_system()
    LOG.tc_func_start()
    assert system_helper.is_aio_system(r720)
    LOG.tc_func_end()
    LOG.tc_func_start()
    assert not system_helper.is_aio_system(r730_3_7)
    LOG.tc_func_end()
    LOG.tc_func_start()
    assert not system_helper.get_storage_nodes(r730_3_7)
    LOG.tc_func_end()
Example #2
0
def test_create_host_partition_on_storage():
    """
    This test attempts to create a host partition on a storage node.  It is
    expected to fail, since host partition creation is only supported on
    controllers and computes.

    Assumptions:
    * We run this on a storage system, otherwise we will skip the test.

    Test steps:
    * Query storage nodes for available disk space
    * Attempt to create a partition on a storage node
    * Check it is rejected
    """

    hosts = system_helper.get_storage_nodes()

    if not hosts:
        skip("This test requires storage nodes.")

    LOG.tc_step("Gather the disks available on each host")
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue
        for uuid in free_disks:
            rc, out = storage_helper.create_host_partition(
                host, uuid, int(free_disks[uuid]), fail_ok=True)
            assert rc != 0, "Partition creation was successful"
Example #3
0
def is_image_conversion_sufficient(img_file_path=None,
                                   guest_os=None,
                                   min_diff=0.05,
                                   con_ssh=None,
                                   img_host_ssh=None):
    """
    Check if image conversion space is sufficient to convert given image to
    raw format
    Args:
        img_file_path (str): e.g., ~/images/tis-centos-guest.img
        guest_os (str): has to be specified if img_file_path is unspecified.
        e.g., 'tis-centos-guest'
        min_diff (int): in GB
        con_ssh:
        img_host_ssh

    Returns (bool):

    """
    if con_ssh is None:
        con_ssh = ControllerClient.get_active_controller()

    if not system_helper.get_storage_nodes(con_ssh=con_ssh):
        return True

    avail_size = get_avail_image_conversion_space(con_ssh=con_ssh)
    file_size = get_image_size(img_file_path=img_file_path,
                               guest_os=guest_os,
                               virtual_size=True,
                               ssh_client=img_host_ssh)

    return avail_size - file_size >= min_diff
def test_apply_storage_profile_negative(create_storage_profile, personality):

    if personality == 'controller':
        host_name = system_helper.get_standby_controller_name()
        assert host_name, "No standby controller available on system"
    else:
        host_name = host_helper.get_up_hypervisors()[0]

    # For storage systems, skip test if ceph isn't healthy
    if len(system_helper.get_storage_nodes()) > 0:
        ceph_healthy = storage_helper.is_ceph_healthy()
        if not ceph_healthy:
            skip('Skipping due to ceph not being healthy')

    profile_name = create_storage_profile['profile_name']
    origin_disk_num = create_storage_profile['disk_num']
    disks_num = len(storage_helper.get_host_disks(host_name, 'device_node'))

    expt_err = 'profile has more disks than host does' if disks_num < origin_disk_num -1 \
        else "Please check if host's disks match profile criteria"
    expt_err_list = [
        "Please check if host's disks match profile criteria",
        "Failed to create storage function. Host personality must be 'storage'",
    ]
    if disks_num < origin_disk_num - 1:
        expt_err_list.append("profile has more disks than host does")

    positional_arg = host_name + ' ' + profile_name

    HostsToRecover.add(host_name)
    host_helper.lock_host(host_name, swact=True)
    exitcode, output = cli.system('host-apply-storprofile',
                                  positional_arg,
                                  fail_ok=True)
    host_helper.unlock_host(host_name)

    assert exitcode == 1 and any(expt in output for expt in expt_err_list)
Example #5
0
def test_lock_unlock_storage_hosts(no_simplex, no_duplex):
    """
    Lock - Unlock Storage Hosts
    """
    if ProjVar.get_var('SYS_TYPE') != SysType.STORAGE:
        skip('Only applicable to Standard-external system')

    storage_hosts = system_helper.get_storage_nodes()
    LOG.info(" Storage nodes found: {}".format(len(storage_hosts)))

    for host in storage_hosts:
        LOG.info("Storage Host: {}".format(host))

        # Lock
        host_helper.lock_host(host=host, fail_ok=False)
        host_helper.wait_for_hosts_ready(hosts=host)

        container_helper.wait_for_apps_status(apps="stx-openstack",
                                              status=AppStatus.APPLIED,
                                              timeout=600,
                                              check_interval=60)
        # Unlock
        host_helper.unlock_host(host=host, fail_ok=False)
        host_helper.wait_for_hosts_ready(hosts=host)
Example #6
0
 def is_storage_node(self):
     return len(system_helper.get_storage_nodes()) > 0
def storage_node_not_exist():
    return len(system_helper.get_storage_nodes()) == 0
Example #8
0
def _test_increase_ceph_mon():
    """
    Increase the size of ceph-mon.  Only applicable to a storage system.

    Fails until CGTS-8216

    Test steps:
    1.  Determine the current size of ceph-mon
    2.  Attempt to modify ceph-mon to invalid values
    3.  Check if there is free space to increase ceph-mon
    4.  Attempt to increase ceph-mon
    5.  Wait for config out-of-date alarms to raise
    6.  Lock/unlock all affected nodes (controllers and storage)
    7.  Wait for alarms to clear
    8.  Check that ceph-mon has the correct updated value

    Enhancement:
    1.  Possibly check there is enough disk space for ceph-mon to increase.  Not sure if
    this is required since there always seems to be some space on the rootfs.

    """
    table_ = table_parser.table(cli.system("ceph-mon-list")[1])
    ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib",
                                           **{"hostname": "controller-0"})[0]
    LOG.info("ceph_mon_gib is currently: {}".format(ceph_mon_gib))

    LOG.tc_step("Attempt to modify ceph-mon to invalid values")
    invalid_cmg = ['19', '41', 'fds']
    for value in invalid_cmg:
        host = "controller-0"
        cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(host, value),
                   fail_ok=True)

    if int(ceph_mon_gib) >= 30:
        skip("Insufficient disk space to execute test")

    ceph_mon_gib_avail = 40 - int(ceph_mon_gib)
    new_ceph_mon_gib = math.trunc(ceph_mon_gib_avail / 10) + int(ceph_mon_gib)

    LOG.tc_step("Increase ceph_mon_gib to {}".format(new_ceph_mon_gib))
    hosts = system_helper.get_controllers()
    for host in hosts:
        cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(
            host, new_ceph_mon_gib))
        # We only need to do this for one controller now and it applies to both
        break

    LOG.info("Wait for expected alarms to appear")
    storage_hosts = system_helper.get_storage_nodes()
    total_hosts = hosts + storage_hosts
    for host in total_hosts:
        system_helper.wait_for_alarm(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                     entity_id="host={}".format(host))

    LOG.tc_step("Lock/unlock all affected nodes")
    for host in storage_hosts:
        HostsToRecover.add(host)
        host_helper.lock_host(host)
        host_helper.unlock_host(host)
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host))
        time.sleep(10)

    standby = system_helper.get_standby_controller_name()
    active = system_helper.get_active_controller_name()
    HostsToRecover.add(standby)
    host_helper.lock_host(standby)
    host_helper.unlock_host(standby)
    system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                      entity_id="host={}".format(standby))
    time.sleep(10)
    host_helper.swact_host(active)
    HostsToRecover.add(active)
    host_helper.lock_host(active)
    host_helper.unlock_host(active)
    system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                      entity_id="host={}".format(active))

    table_ = table_parser.table(cli.system("ceph-mon-list")[1])
    ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib",
                                           **{"hostname": "controller-0"})[0]
    assert ceph_mon_gib != new_ceph_mon_gib, "ceph-mon did not change"
Example #9
0
def test_ceph_reboot_storage_node(stx_openstack_required):
    """
    us69932_tc2_ceph_mon_process_kill from us69932_ceph_monitoring.odt

    Verify that ceph mon processes recover when they are killed
    nodes.

    Args:
        - Nothing

    Setup:
        - Requires system with storage nodes

    Test Steps:
        0.  Run CEPH pre-check fixture to check:
            - system has storage nodes
            - health of the ceph cluster is okay
            - that we have OSDs provisioned
        1.  Delete existing VMs
        2.  Boot new VMs and run dd on them
        3.  Reboot storage node and ensure both:
            - mon state goes down (if storage-0)
            - OSD state goes down
        4.  Ensure mon and OSD state recover afterwards
        5.  Cleanup VMs

    Potential rework:
        1.  Add the alarms checks for raise and clear
        2.  Maybe we don't want to reboot all storage nodes

    What defects this addresses:
        1.  CGTS-2975

    Update:
        This test was updated for the Storage and Robustness feature.
    """
    con_ssh = ControllerClient.get_active_controller()

    LOG.tc_step("Delete existing VMs")
    vm_helper.delete_vms()

    LOG.tc_step("Boot various VMs")
    vms = vm_helper.boot_vms_various_types(cleanup="function")

    vm_threads = []
    LOG.tc_step("SSH to VMs and write to disk")
    end_event = Events("End dd in vms")

    try:
        for vm in vms:
            vm_thread = vm_helper.write_in_vm(vm, end_event=end_event, expect_timeout=40)
            vm_threads.append(vm_thread)

        storage_nodes = system_helper.get_storage_nodes(con_ssh)

        for host in storage_nodes:
            LOG.tc_step('Reboot {}'.format(host))
            HostsToRecover.add(host, scope='function')
            host_helper.reboot_hosts(host, wait_for_offline=True, wait_for_reboot_finish=False)

            LOG.tc_step('Check health of CEPH cluster')
            ceph_healthy = True
            msg = None
            end_time = time.time() + 10
            while time.time() < end_time:
                ceph_healthy = storage_helper.is_ceph_healthy(con_ssh)
                if not ceph_healthy:
                    break

            assert not ceph_healthy, "ceph is not healthy"
            LOG.info(msg)

            LOG.tc_step('Check that OSDs are down')
            osd_list = storage_helper.get_osds(host, con_ssh)
            all_osds_up = True
            up_list = osd_list.copy()
            end_time = time.time() + 60
            while time.time() < end_time and all_osds_up:
                for osd_id in osd_list:
                    osd_up = storage_helper.is_osd_up(osd_id, con_ssh)
                    if not osd_up:
                        msg = 'OSD ID {} is down as expected'.format(osd_id)
                        LOG.info(msg)
                        up_list.remove(osd_id)
                if len(up_list) > 0:
                    osd_list = up_list.copy()
                else:
                    msg = ' All OSDs are down as expected'
                    LOG.info(msg)
                    all_osds_up = False

            assert not all_osds_up, " One or more OSD(s) {}  is(are) up but should be down".format(up_list)

            system_helper.wait_for_host_values(host, availability='available')

            LOG.tc_step('Check that OSDs are up')
            osd_list = storage_helper.get_osds(host, con_ssh)
            down_list = osd_list.copy()
            all_osds_up = False
            end_time = time.time() + 60
            while time.time() < end_time and not all_osds_up:
                for osd_id in osd_list:
                    osd_up = storage_helper.is_osd_up(osd_id, con_ssh)
                    if osd_up:
                        msg = 'OSD ID {} is up as expected'.format(osd_id)
                        LOG.info(msg)
                        down_list.remove(osd_id)
                if len(down_list) > 0:
                    osd_list = down_list.copy()
                else:
                    msg = ' All OSDs are up as expected'
                    LOG.info(msg)
                    all_osds_up = True

            assert all_osds_up, " One or more OSD(s) {}  is(are) down but should be up".format(down_list)

            LOG.tc_step('Check health of CEPH cluster')
            end_time = time.time() + 40
            while time.time() < end_time:
                ceph_healthy = storage_helper.is_ceph_healthy(con_ssh)
                if ceph_healthy is True:
                    break

            assert ceph_healthy, "ceph is not healthy"

        for vm_thread in vm_threads:
            assert vm_thread.res is True, "Writing in vm stopped unexpectedly"
    finally:
        end_event.set()
        for vm_thread in vm_threads:
            vm_thread.wait_for_thread_end(timeout=20)

    LOG.tc_step("Delete existing VMs")
    vm_helper.delete_vms()
Example #10
0
def pre_system_backup():
    """
    Actions before system backup, including:
        - check the USB device is ready if it is the destination
        - create folder for the backup files on destination server
        - collect logs on the current system

    Args:

    Returns:
    """
    lab = InstallVars.get_install_var('LAB')

    LOG.info("Preparing lab for system backup....")
    backup_dest = BackupVars.get_backup_var("BACKUP_DEST")

    NATBoxClient.set_natbox_client()

    _backup_info = {
        'backup_dest': backup_dest,
        'usb_parts_info': None,
        'backup_dest_full_path': None,
        'dest_server': None
    }

    if backup_dest == 'usb':
        _backup_info['dest'] = 'usb'
        active_controller_name = system_helper.get_active_controller_name()
        if active_controller_name != 'controller-0':
            msg = "controller-0 is not the active controller"
            LOG.info(msg + ", try to swact the host")
            host_helper.swact_host(active_controller_name)
            active_controller_name = system_helper.get_active_controller_name()
            assert active_controller_name == 'controller-0', msg

        LOG.fixture_step(
            "Checking if  a USB flash drive is plugged in controller-0 node... "
        )
        usb_device = install_helper.get_usb_device_name()
        assert usb_device, "No USB found in controller-0"
        parts_info = install_helper.get_usb_device_partition_info(
            usb_device=usb_device)

        part1 = "{}1".format(usb_device)
        part2 = "{}2".format(usb_device)

        if len(parts_info) < 3:
            skip(
                "USB {} is not partitioned;  Create two partitions using fdisk; partition 1 = {}1, "
                "size = 2G, bootable; partition 2 = {}2, size equal to the avaialble space."
                .format(usb_device, usb_device, usb_device))

        devices = parts_info.keys()
        LOG.info("Size of {} = {}".format(
            part1, install_helper.get_usb_partition_size(part1)))
        if not (part1 in devices
                and install_helper.get_usb_partition_size(part1) >= 2):
            skip("Insufficient size in {}; at least 2G is required. {}".format(
                part1, parts_info))

        if not (part2 in devices
                and install_helper.get_usb_partition_size(part2) >= 10):
            skip("Insufficient size in {}; at least 2G is required. {}".format(
                part1, parts_info))

        if not install_helper.mount_usb(part2):
            skip("Fail to mount USB for backups")

        LOG.tc_step("Erasing existing files from USB ... ")

        assert install_helper.delete_backup_files_from_usb(
            part2), "Fail to erase existing file from USB"
        _backup_info['usb_parts_info'] = parts_info
        _backup_info['backup_dest_full_path'] = BackupRestore.USB_BACKUP_PATH

    elif backup_dest == 'local':
        _backup_info['dest'] = 'local'

        # save backup files in Test Server which local
        backup_dest_path = BackupVars.get_backup_var('BACKUP_DEST_PATH')
        backup_dest_full_path = '{}/{}'.format(backup_dest_path,
                                               lab['short_name'])
        # ssh to test server
        test_server_attr = dict()
        test_server_attr['name'] = TestFileServer.get_hostname().split('.')[0]
        test_server_attr['server_ip'] = TestFileServer.get_server()
        test_server_attr['prompt'] = r'\[{}@{} {}\]\$ '\
            .format(TestFileServer.get_user(), test_server_attr['name'], TestFileServer.get_user())

        test_server_conn = install_helper.establish_ssh_connection(
            test_server_attr['name'],
            user=TestFileServer.get_user(),
            password=TestFileServer.get_password(),
            initial_prompt=test_server_attr['prompt'])

        test_server_conn.set_prompt(test_server_attr['prompt'])
        test_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key())
        test_server_attr['ssh_conn'] = test_server_conn
        test_server_obj = Server(**test_server_attr)
        _backup_info['dest_server'] = test_server_obj
        # test if backup path for the lab exist in Test server
        if test_server_conn.exec_cmd(
                "test -e {}".format(backup_dest_full_path))[0]:
            test_server_conn.exec_cmd(
                "mkdir -p {}".format(backup_dest_full_path))
            # delete any existing files
        test_server_conn.exec_cmd("rm -rf {}/*".format(backup_dest_full_path))

        _backup_info['usb_parts_info'] = None
        _backup_info['backup_dest_full_path'] = backup_dest_full_path

    collect_logs('before_br')

    _backup_info['is_storage_lab'] = (len(system_helper.get_storage_nodes()) >
                                      0)
    return _backup_info
Example #11
0
def backup_sysconfig_images(backup_info):
    """
    Backup system images on storage lab

    Args:
        backup_info - settings for doing system backup

    Returns:
        None
    """

    backup_dest = backup_info['backup_dest']
    backup_dest_path = backup_info['backup_dest_full_path']
    dest_server = backup_info['dest_server']
    copy_to_usb = backup_info['copy_to_usb']

    install_helper.backup_system(backup_dest=backup_dest,
                                 backup_dest_path=backup_dest_path,
                                 dest_server=dest_server,
                                 copy_to_usb=copy_to_usb)

    # storage lab start backup image files separately if it's a storage lab
    # if number of storage nodes is greater than 0
    if len(system_helper.get_storage_nodes()) > 0:
        LOG.tc_step("Storage lab detected. copying images to backup.")
        image_ids = glance_helper.get_images()
        for img_id in image_ids:
            prop_key = 'store'
            image_properties = glance_helper.get_image_properties(
                img_id, prop_key, rtn_dict=True)
            LOG.debug('image store backends:{}'.format(image_properties))

            if image_properties and image_properties.get(prop_key,
                                                         None) == 'rbd':
                LOG.info('rbd based image, exporting it: {}, store:{}'.format(
                    img_id, image_properties))

                install_helper.export_image(
                    img_id,
                    backup_dest=backup_info['backup_dest'],
                    backup_dest_path=backup_info['backup_dest_full_path'],
                    dest_server=backup_info['dest_server'],
                    copy_to_usb=backup_info['copy_to_usb'])
            else:
                LOG.warn(
                    'No property found!!! for image {}, properties:{}'.format(
                        img_id, image_properties))
                prop_key = 'direct_url'
                direct_url = glance_helper.get_image_properties(
                    img_id, prop_key)[0]
                LOG.info(
                    'found direct_url, still consider it as rbd based image, exporting it: {}, stores:{}'
                    .format(img_id, image_properties))

                if direct_url and direct_url.startswith('rbd://'):
                    install_helper.export_image(
                        img_id,
                        backup_dest=backup_dest,
                        backup_dest_path=backup_dest_path,
                        dest_server=dest_server,
                        copy_to_usb=copy_to_usb)
                else:
                    LOG.warn(
                        'non-rbd based image, skip it:  {}, store:{}'.format(
                            img_id, image_properties))