Ejemplo n.º 1
0
def test_system_alarms_and_events_on_lock_unlock_compute(no_simplex):
    """
    Verify fm alarm-show command

    Test Steps:
    - Delete active alarms
    - Lock a host
    - Check active alarm generated for host lock
    - Check relative values are the same in fm alarm-list and fm alarm-show
    <uuid>
    - Check host lock 'set' event logged via fm event-list
    - Unlock host
    - Check active alarms cleared via fm alarm-list
    - Check host lock 'clear' event logged via fm event-list
    """

    # Remove following step because it's unnecessary and fails the test when
    # alarm is re-generated
    # # Clear the alarms currently present
    # LOG.tc_step("Clear the alarms table")
    # system_helper.delete_alarms()

    # Raise a new alarm by locking a compute node
    # Get the compute
    compute_host = host_helper.get_up_hypervisors()[0]
    if compute_host == system_helper.get_active_controller_name():
        compute_host = system_helper.get_standby_controller_name()
        if not compute_host:
            skip('Standby controller unavailable')

    LOG.tc_step("Lock a nova hypervisor host {}".format(compute_host))
    pre_lock_time = common.get_date_in_format()
    HostsToRecover.add(compute_host)
    host_helper.lock_host(compute_host)

    LOG.tc_step("Check host lock alarm is generated")
    post_lock_alarms = \
        system_helper.wait_for_alarm(field='UUID', entity_id=compute_host,
                                     reason=compute_host,
                                     alarm_id=EventLogID.HOST_LOCK,
                                     strict=False,
                                     fail_ok=False)[1]

    LOG.tc_step(
        "Check related fields in fm alarm-list and fm alarm-show are of the "
        "same values")
    post_lock_alarms_tab = system_helper.get_alarms_table(uuid=True)

    alarms_l = ['Alarm ID', 'Entity ID', 'Severity', 'Reason Text']
    alarms_s = ['alarm_id', 'entity_instance_id', 'severity', 'reason_text']

    # Only 1 alarm since we are now checking the specific alarm ID
    for post_alarm in post_lock_alarms:
        LOG.tc_step(
            "Verify {} for alarm {} in alarm-list are in sync with "
            "alarm-show".format(
                alarms_l, post_alarm))

        alarm_show_tab = table_parser.table(cli.fm('alarm-show', post_alarm)[1])
        alarm_list_tab = table_parser.filter_table(post_lock_alarms_tab,
                                                   UUID=post_alarm)

        for i in range(len(alarms_l)):
            alarm_l_val = table_parser.get_column(alarm_list_tab,
                                                  alarms_l[i])[0]
            alarm_s_val = table_parser.get_value_two_col_table(alarm_show_tab,
                                                               alarms_s[i])

            assert alarm_l_val == alarm_s_val, \
                "{} value in alarm-list: {} is different than alarm-show: " \
                "{}".format(alarms_l[i], alarm_l_val, alarm_s_val)

    LOG.tc_step("Check host lock is logged via fm event-list")
    system_helper.wait_for_events(entity_instance_id=compute_host,
                                  start=pre_lock_time, timeout=60,
                                  event_log_id=EventLogID.HOST_LOCK,
                                  fail_ok=False, **{'state': 'set'})

    pre_unlock_time = common.get_date_in_format()
    LOG.tc_step("Unlock {}".format(compute_host))
    host_helper.unlock_host(compute_host)

    LOG.tc_step("Check host lock active alarm cleared")
    alarm_sets = [(EventLogID.HOST_LOCK, compute_host)]
    system_helper.wait_for_alarms_gone(alarm_sets, fail_ok=False)

    LOG.tc_step("Check host lock clear event logged")
    system_helper.wait_for_events(event_log_id=EventLogID.HOST_LOCK,
                                  start=pre_unlock_time,
                                  entity_instance_id=compute_host,
                                  fail_ok=False, **{'state': 'clear'})
Ejemplo n.º 2
0
def upgrade_host(host, timeout=InstallTimeout.UPGRADE, fail_ok=False, con_ssh=None,
                 auth_info=Tenant.get('admin_platform'), lock=False, unlock=False):
    """
    Upgrade given host
    Args:
        host (str):
        timeout (int): MAX seconds to wait for host to become online after unlocking
        fail_ok (bool):
        con_ssh (SSHClient):
        auth_info (str):
        unlock (bool):
        lock


    Returns (tuple):
        (0, "Host is upgraded and in online state.")
        (1, "Cli host upgrade rejected. Applicable only if ail_ok")
        (2, "Host failed data migration. Applicable only if fail_ok")
        (3, "Host did not come online after upgrade. Applicable if fail_ok ")
        (4, "Host fail lock before starting upgrade". Applicable if lock arg is True and fail_ok")
        (5, "Host fail to unlock after host upgrade.  Applicable if unlock arg is True and fail_ok")
        (6, "Host unlocked after upgrade, but alarms are not cleared after 120 seconds.
        Applicable if unlock arg is True and fail_ok")

    """
    LOG.info("Upgrading host {}...".format(host))

    if lock:
        if system_helper.get_host_values(host, 'administrative', con_ssh=con_ssh)[0] == HostAdminState.UNLOCKED:
            message = "Host is not locked. Locking host  before starting upgrade"
            LOG.info(message)
            rc, output = host_helper.lock_host(host, con_ssh=con_ssh, fail_ok=True)
            if rc != 0 and rc != -1:
                err_msg = "Host {} fail on lock before starting upgrade: {}".format(host, output)
                if fail_ok:
                    return 4, err_msg
                else:
                    raise exceptions.HostError(err_msg)
    if system_helper.is_aio_simplex():
        exitcode, output = simplex_host_upgrade(con_ssh=con_ssh)
        return exitcode, output

    exitcode, output = cli.system('host-upgrade', host, ssh_client=con_ssh, fail_ok=True, auth_info=auth_info,
                                  timeout=timeout)
    if exitcode == 1:
        err_msg = "Host {} cli upgrade host failed: {}".format(host, output)
        if fail_ok:
            return 1, err_msg
        else:
            raise exceptions.HostError(err_msg)

    # sleep for 180 seconds to let host be re-installed with upgrade release
    time.sleep(180)

    if not system_helper.wait_for_host_values(host, timeout=timeout, check_interval=60,
                                                       availability=HostAvailState.ONLINE, con_ssh=con_ssh,
                                                       fail_ok=fail_ok):
        err_msg = "Host {} did not become online  after upgrade".format(host)
        if fail_ok:
            return 3, err_msg
        else:
            raise exceptions.HostError(err_msg)

    if host.strip() == "controller-1":
        rc, output = _wait_for_upgrade_data_migration_complete(timeout=timeout,
                                                               auth_info=auth_info, fail_ok=fail_ok, con_ssh=con_ssh)
        if rc != 0:
            err_msg = "Host {} upgrade data migration failure: {}".format(host, output)
            if fail_ok:
                return 2, err_msg
            else:
                raise exceptions.HostError(err_msg)

    if unlock:
        rc, output = host_helper.unlock_host(host, fail_ok=True, available_only=True)
        if rc != 0:
            err_msg = "Host {} fail to unlock after host upgrade: ".format(host, output)
            if fail_ok:
                return 5, err_msg
            else:
                raise exceptions.HostError(err_msg)

        # wait until  400.001  alarms get cleared
        if not system_helper.wait_for_alarm_gone("400.001", fail_ok=True):
            err_msg = "Alarms did not clear after host {} upgrade and unlock: ".format(host)
            if fail_ok:
                return 6, err_msg
            else:
                raise exceptions.HostError(err_msg)

    LOG.info("Upgrading host {} complete ...".format(host))
    return 0, None
Ejemplo n.º 3
0
    def test_lock_with_vms(self, target_hosts, no_simplex,
                           add_admin_role_func):
        """
        Test lock host with vms on it.

        Args:
            target_hosts (list): targeted host(s) to lock that was prepared
            by the target_hosts test fixture.

        Skip Conditions:
            - Less than 2 hypervisor hosts on the system

        Prerequisites:
            - Hosts storage backing are pre-configured to storage backing
            under test
                ie., 2 or more hosts should support the storage backing under
                test.
        Test Setups:
            - Set instances quota to 10 if it was less than 8
            - Determine storage backing(s) under test. i.e.,storage backings
            supported by at least 2 hosts on the system
            - Create flavors with storage extra specs set based on storage
            backings under test
            - Create vms_to_test that can be live migrated using created flavors
            - Determine target host(s) to perform lock based on which host(s)
            have the most vms_to_test
            - Live migrate vms to target host(s)
        Test Steps:
            - Lock target host
            - Verify lock succeeded and vms status unchanged
            - Repeat above steps if more than one target host
        Test Teardown:
            - Delete created vms and volumes
            - Delete created flavors
            - Unlock locked target host(s)

        """
        storage_backing, host = target_hosts
        vms_num = 5
        vm_helper.ensure_vms_quotas(vms_num=vms_num)

        LOG.tc_step(
            "Boot {} vms with various storage settings".format(vms_num))
        vms = vm_helper.boot_vms_various_types(cleanup='function',
                                               vms_num=vms_num,
                                               storage_backing=storage_backing,
                                               target_host=host)

        LOG.tc_step("Attempt to lock target host {}...".format(host))
        HostsToRecover.add(host)
        host_helper.lock_host(host=host,
                              check_first=False,
                              fail_ok=False,
                              swact=True)

        LOG.tc_step("Verify lock succeeded and vms still in good state")
        vm_helper.wait_for_vms_values(vms=vms, fail_ok=False)
        for vm in vms:
            vm_host = vm_helper.get_vm_host(vm_id=vm)
            assert vm_host != host, "VM is still on {} after lock".format(host)

            vm_helper.wait_for_vm_pingable_from_natbox(
                vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
Ejemplo n.º 4
0
    def test_sriov_robustness(self, sriov_prep, add_admin_role_func):
        """
        Exhaust all CPUs on one compute by spawning VMs with 2 SR-IOV interface

        Args:
            sriov_prep: test fixture to set up test environment and get proper pci nets/hosts

        Setups:
            - select two hosts configured with same pci-sriov providernet
            - add the two hosts to cgcsauto aggregate to limit the vms host to the selected hosts
            - Select one network under above providernet

        Test Steps:
            - Boot 2+ pci-sriov vms with pci-sriov vif over selected network onto same host
            - Verify resource usage for providernet is increased as expected
            - Lock vms host and ensure vms are all migrated to other host
            - Verify vms' pci-sriov interfaces reachable and resource usage for pnet unchanged
            - 'sudo reboot -f' new vms host, and ensure vms are evacuated to initial host
            - Verify vms' pci-sriov interfaces reachable and resource usage for pnet unchanged

        Teardown:
            - Delete vms, volumes, flavor created
            - Remove admin role to tenant
            - Recover hosts if applicable
            - Remove cgcsauto aggregate     - class

        """
        net_type, pci_net, pci_hosts, pnet_id, nics, initial_host, other_host, vfs_use_init, vm_num, vm_vcpus = \
            sriov_prep
        vif_model = 'pci-sriov'

        # proc0_vm, proc1_vm = host_helper.get_logcores_counts(initial_host, functions='VMs')
        # if system_helper.is_hyperthreading_enabled(initial_host):
        #     proc0_vm *= 2
        #     proc1_vm *= 2
        # vm_vcpus = int(min(proc1_vm, proc0_vm) / (vm_num/2))

        # Create flavor with calculated vcpu number
        LOG.tc_step(
            "Create a flavor with dedicated cpu policy and {} vcpus".format(
                vm_vcpus))
        flavor_id = nova_helper.create_flavor(
            name='dedicated_{}vcpu'.format(vm_vcpus), ram=1024,
            vcpus=vm_vcpus)[1]
        ResourceCleanup.add('flavor', flavor_id, scope='module')
        extra_specs = {
            FlavorSpec.CPU_POLICY: 'dedicated',
        }
        # FlavorSpec.PCI_NUMA_AFFINITY: 'preferred'}    # LP1854516
        nova_helper.set_flavor(flavor=flavor_id, **extra_specs)

        # Boot vms with 2 {} vifs each, and wait for pingable
        LOG.tc_step("Boot {} vms with 2 {} vifs each".format(
            vm_num, vif_model))
        vms = []
        for i in range(vm_num):
            sriov_nics = nics.copy()
            sriov_nic2 = sriov_nics[-1].copy()
            sriov_nic2['port-id'] = network_helper.create_port(
                net_id=sriov_nic2.pop('net-id'),
                vnic_type='direct',
                name='sriov_port')[1]
            sriov_nics.append(sriov_nic2)
            LOG.info("Booting vm{}...".format(i + 1))
            vm_id = vm_helper.boot_vm(flavor=flavor_id,
                                      nics=sriov_nics,
                                      cleanup='function',
                                      vm_host=initial_host,
                                      avail_zone='cgcsauto')[1]
            vms.append(vm_id)
            vm_helper.wait_for_vm_pingable_from_natbox(vm_id)

        check_vm_pci_interface(vms=vms, net_type=net_type)
        # TODO: feature unavailable atm. Update required
        # vfs_use_post_boot = nova_helper.get_provider_net_info(pnet_id, field='pci_vfs_used')
        # assert vfs_use_post_boot - vfs_use_init == vm_num * 2, "Number of PCI vfs used is not as expected"

        HostsToRecover.add(pci_hosts)

        LOG.tc_step("Lock host of {} vms: {}".format(vif_model, initial_host))
        host_helper.lock_host(host=initial_host, check_first=False, swact=True)

        LOG.tc_step(
            "Check vms are migrated to other host: {}".format(other_host))
        for vm in vms:
            vm_host = vm_helper.get_vm_host(vm_id=vm)
            assert other_host == vm_host, "VM did not move to {} after locking {}".format(
                other_host, initial_host)

        check_vm_pci_interface(vms,
                               net_type=net_type,
                               ping_timeout=VMTimeout.DHCP_RETRY)
        # TODO: feature unavailable atm. Update required
        # vfs_use_post_lock = nova_helper.get_provider_net_info(pnet_id, field='pci_vfs_used')
        # assert vfs_use_post_boot == vfs_use_post_lock, "Number of PCI vfs used after locking host is not as expected"

        LOG.tc_step("Unlock {}".format(initial_host))
        host_helper.unlock_host(initial_host)

        LOG.tc_step("Reboot {} and ensure vms are evacuated to {}".format(
            other_host, initial_host))
        vm_helper.evacuate_vms(other_host,
                               vms,
                               post_host=initial_host,
                               wait_for_host_up=True)
        check_vm_pci_interface(vms, net_type=net_type)
Ejemplo n.º 5
0
    def test_pcipt_robustness(self, pcipt_prep):
        """
        TC3_robustness: PCI-passthrough by locking and rebooting pci_vm host

        Args:
            pcipt_prep: test fixture to set up test environment and get proper pci nets/hosts/seg_id

        Setups:
            - select a providernet with pcipt interfaces configured
            - get pci hosts configured with same above providernet
            - get one network under above providernet (or two for CX4 nic)

        Test Steps:
            - Boot 2 pcipt vms with pci-passthrough vif over selected network
            - Verify resource usage for providernet is increased as expected
            - Lock pci_vm host and ensure vm migrated to other host (or fail to lock if no other pcipt host available)
            - (Delete above tested pcipt vm if only two pcipt hosts available)
            - Lock host for another pcipt vm, and lock is successful
            - Verify vms' pci-pt interfaces reachable and resource usage for pnet as expected
            - 'sudo reboot -f' pci_vm host, and ensure vm evacuated or up on same host if no other pcipt host available
            - Repeat above step for another pcipt vm
            - Verify vms' pci-pt interfaces reachable and resource usage for pnet unchanged

        Teardown:
            - Delete vms, volumes, flavor created
            - Recover hosts if applicable

        """
        net_type, pci_net_name, pci_hosts, pnet_id, nics, min_vcpu_host, seg_id, vm_num, vm_vcpus, pfs_use_init = \
            pcipt_prep
        vif_model = 'pci-passthrough'

        # Create flavor with calculated vcpu number
        LOG.fixture_step(
            "Create a flavor with dedicated cpu policy and {} vcpus".format(
                vm_vcpus))
        flavor_id = nova_helper.create_flavor(
            name='dedicated_{}vcpu'.format(vm_vcpus), ram=1024,
            vcpus=vm_vcpus)[1]
        ResourceCleanup.add('flavor', flavor_id, scope='module')
        extra_specs = {
            FlavorSpec.CPU_POLICY: 'dedicated',
        }
        # FlavorSpec.PCI_NUMA_AFFINITY: 'preferred'}    # LP1854516
        nova_helper.set_flavor(flavor=flavor_id, **extra_specs)

        # Boot vms with 2 {} vifs each, and wait for pingable
        LOG.tc_step("Boot {} vms with 2 {} vifs each".format(
            vm_num, vif_model))
        vms = []
        for i in range(vm_num):
            LOG.info("Booting pci-passthrough vm{}".format(i + 1))
            vm_id = vm_helper.boot_vm(flavor=flavor_id,
                                      nics=nics,
                                      cleanup='function')[1]
            vms.append(vm_id)
            vm_helper.wait_for_vm_pingable_from_natbox(vm_id)
            vm_helper.add_vlan_for_vm_pcipt_interfaces(vm_id,
                                                       seg_id,
                                                       init_conf=True)

        # TODO: feature unavailable atm. Update required
        # pfs_use_post_boot = nova_helper.get_provider_net_info(pnet_id, field='pci_pfs_used')
        # resource_change = 2 if isinstance(seg_id, dict) else 1
        # assert pfs_use_post_boot - pfs_use_init == vm_num * resource_change, "Number of PCI pfs used is not as expected"

        check_vm_pci_interface(vms=vms, net_type=net_type)
        HostsToRecover.add(pci_hosts)

        # pfs_use_pre_action = pfs_use_post_boot
        iter_count = 2 if len(pci_hosts) < 3 else 1
        for i in range(iter_count):
            if i == 1:
                LOG.tc_step(
                    "Delete a pcipt vm and test lock and reboot pcipt host again for success pass"
                )
                vm_helper.delete_vms(vms=vms[1])
                vms.pop()
                # TODO: feature unavailable atm. Update required
                # pfs_use_pre_action -= resource_change
                # common.wait_for_val_from_func(expt_val=pfs_use_pre_action, timeout=30, check_interval=3,
                #                               func=nova_helper.get_provider_net_info,
                #                               providernet_id=pnet_id, field='pci_pfs_used')

            LOG.tc_step("Test lock {} vms hosts started - iter{}".format(
                vif_model, i + 1))
            for vm in vms:
                pre_lock_host = vm_helper.get_vm_host(vm)
                assert pre_lock_host in pci_hosts, "VM is not booted on pci_host"

                LOG.tc_step("Lock host of {} vms: {}".format(
                    vif_model, pre_lock_host))
                code, output = host_helper.lock_host(host=pre_lock_host,
                                                     check_first=False,
                                                     swact=True,
                                                     fail_ok=True)
                post_lock_host = vm_helper.get_vm_host(vm)
                assert post_lock_host in pci_hosts, "VM is not on pci host after migrating"

                if len(pci_hosts) < 3 and i == 0:
                    assert 5 == code, "Expect host-lock fail due to migration of vm failure. Actual: {}".format(
                        output)
                    assert pre_lock_host == post_lock_host, "VM host should not change when no other host to migrate to"
                else:
                    assert 0 == code, "Expect host-lock successful. Actual: {}".format(
                        output)
                    assert pre_lock_host != post_lock_host, "VM host did not change"
                    LOG.tc_step("Unlock {}".format(pre_lock_host))

                check_vm_pci_interface(vms, net_type=net_type)
                host_helper.unlock_host(pre_lock_host, available_only=True)
            # TODO: feature unavailable atm. Update required
            # pfs_use_post_lock = nova_helper.get_provider_net_info(pnet_id, field='pci_pfs_used')
            # assert pfs_use_pre_action == pfs_use_post_lock, "Number of PCI pfs used after host-lock is not as expected"

            LOG.tc_step("Test evacuate {} vms started - iter{}".format(
                vif_model, i + 1))
            for vm in vms:
                pre_evac_host = vm_helper.get_vm_host(vm)

                LOG.tc_step(
                    "Reboot {} and ensure {} vm are evacuated when applicable".
                    format(pre_evac_host, vif_model))
                code, output = vm_helper.evacuate_vms(pre_evac_host,
                                                      vm,
                                                      fail_ok=True,
                                                      wait_for_host_up=True)

                if len(pci_hosts) < 3 and i == 0:
                    assert 1 == code, "Expect vm stay on same host due to migration fail. Actual:{}".format(
                        output)
                    vm_helper.wait_for_vm_status(vm_id=vm)
                else:
                    assert 0 == code, "Expect vm evacuated to other host. Actual: {}".format(
                        output)
                    post_evac_host = vm_helper.get_vm_host(vm)
                    assert post_evac_host in pci_hosts, "VM is not on pci host after evacuation"

                check_vm_pci_interface(vms, net_type=net_type)
Ejemplo n.º 6
0
def test_vm_with_config_drive(hosts_per_stor_backing):
    """
    Skip Condition:
        - no host with local_image backend

    Test Steps:
        - Launch a vm using config drive
        - Add test data to config drive on vm
        - Do some operations (reboot vm for simplex, cold migrate and lock host for non-simplex) and
            check test data persisted in config drive after each operation
    Teardown:
        - Delete created vm, volume, flavor

    """
    guest_os = 'cgcs-guest'
    # guest_os = 'tis-centos-guest'  # CGTS-6782
    img_id = glance_helper.get_guest_image(guest_os)
    hosts_num = len(hosts_per_stor_backing.get('local_image', []))
    if hosts_num < 1:
        skip("No host with local_image storage backing")

    volume_id = cinder_helper.create_volume(name='vol_inst1',
                                            source_id=img_id,
                                            guest_image=guest_os)[1]
    ResourceCleanup.add('volume', volume_id, scope='function')

    block_device = {
        'source': 'volume',
        'dest': 'volume',
        'id': volume_id,
        'device': 'vda'
    }
    vm_id = vm_helper.boot_vm(name='config_drive',
                              config_drive=True,
                              block_device=block_device,
                              cleanup='function',
                              guest_os=guest_os,
                              meta={'foo': 'bar'})[1]

    LOG.tc_step("Confirming the config drive is set to True in vm ...")
    assert str(vm_helper.get_vm_values(vm_id, "config_drive")[0]) == 'True', \
        "vm config-drive not true"

    LOG.tc_step("Add date to config drive ...")
    check_vm_config_drive_data(vm_id)

    vm_host = vm_helper.get_vm_host(vm_id)
    instance_name = vm_helper.get_vm_instance_name(vm_id)
    LOG.tc_step("Check config_drive vm files on hypervisor after vm launch")
    check_vm_files_on_hypervisor(vm_id,
                                 vm_host=vm_host,
                                 instance_name=instance_name)

    if not system_helper.is_aio_simplex():
        LOG.tc_step("Cold migrate VM")
        vm_helper.cold_migrate_vm(vm_id)

        LOG.tc_step("Check config drive after cold migrate VM...")
        check_vm_config_drive_data(vm_id)

        LOG.tc_step("Lock the compute host")
        compute_host = vm_helper.get_vm_host(vm_id)
        HostsToRecover.add(compute_host)
        host_helper.lock_host(compute_host, swact=True)

        LOG.tc_step("Check config drive after locking VM host")
        check_vm_config_drive_data(vm_id, ping_timeout=VMTimeout.DHCP_RETRY)
        vm_host = vm_helper.get_vm_host(vm_id)

    else:
        LOG.tc_step("Reboot vm")
        vm_helper.reboot_vm(vm_id)

        LOG.tc_step("Check config drive after vm rebooted")
        check_vm_config_drive_data(vm_id)

    LOG.tc_step("Check vm files exist after nova operations")
    check_vm_files_on_hypervisor(vm_id,
                                 vm_host=vm_host,
                                 instance_name=instance_name)
Ejemplo n.º 7
0
def test_lock_unlock_host(host_type, collect_kpi):
    """
    Verify lock unlock host

    Test Steps:
        - Select a host per given type. If type is controller, select standby controller.
        - Lock selected host and ensure it is successfully locked
        - Unlock selected host and ensure it is successfully unlocked

    """
    init_time = None
    if collect_kpi:
        init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)

    LOG.tc_step("Select a {} node from system if any".format(host_type))
    if host_type == 'controller':
        if system_helper.is_aio_simplex():
            host = 'controller-0'
        else:
            host = system_helper.get_standby_controller_name()
            assert host, "No standby controller available"

    else:
        if host_type == 'compute' and (system_helper.is_aio_duplex()
                                       or system_helper.is_aio_simplex()):
            skip("No compute host on AIO system")
        elif host_type == 'storage' and not system_helper.is_storage_system():
            skip("System does not have storage nodes")

        hosts = system_helper.get_hosts(personality=host_type,
                                        availability=HostAvailState.AVAILABLE,
                                        operational=HostOperState.ENABLED)

        assert hosts, "No good {} host on system".format(host_type)
        host = hosts[0]

    LOG.tc_step(
        "Lock {} host - {} and ensure it is successfully locked".format(
            host_type, host))
    HostsToRecover.add(host)
    host_helper.lock_host(host, swact=False)

    # wait for services to stabilize before unlocking
    time.sleep(20)

    # unlock standby controller node and verify controller node is successfully unlocked
    LOG.tc_step(
        "Unlock {} host - {} and ensure it is successfully unlocked".format(
            host_type, host))
    host_helper.unlock_host(host)

    LOG.tc_step("Check helm list after host unlocked")
    con_ssh = ControllerClient.get_active_controller()
    con_ssh.exec_cmd('helm list', fail_ok=False)

    if collect_kpi:
        lock_kpi_name = HostLock.NAME.format(host_type)
        unlock_kpi_name = HostUnlock.NAME.format(host_type)
        unlock_host_type = host_type
        if container_helper.is_stx_openstack_deployed():
            if system_helper.is_aio_system():
                unlock_host_type = 'compute'
        else:
            lock_kpi_name += '_platform'
            unlock_kpi_name += '_platform'
            if unlock_host_type == 'compute':
                unlock_host_type = 'compute_platform'

        LOG.info("Collect kpi for lock/unlock {}".format(host_type))
        code_lock, out_lock = kpi_log_parser.record_kpi(
            local_kpi_file=collect_kpi,
            kpi_name=lock_kpi_name,
            host=None,
            log_path=HostLock.LOG_PATH,
            end_pattern=HostLock.END.format(host),
            start_pattern=HostLock.START.format(host),
            start_path=HostLock.START_PATH,
            init_time=init_time)

        time.sleep(30)  # delay in sysinv log vs nova hypervisor list
        code_unlock, out_unlock = kpi_log_parser.record_kpi(
            local_kpi_file=collect_kpi,
            kpi_name=unlock_kpi_name,
            host=None,
            log_path=HostUnlock.LOG_PATH,
            end_pattern=HostUnlock.END[unlock_host_type].format(host),
            init_time=init_time,
            start_pattern=HostUnlock.START.format(host),
            start_path=HostUnlock.START_PATH)

        assert code_lock == 0, 'Failed to collect kpi for host-lock {}. ' \
                               'Error: \n'.format(host, out_lock)
        assert code_unlock == 0, 'Failed to collect kpi for host-unlock {}. ' \
                                 'Error: \n'.format(host, out_lock)
Ejemplo n.º 8
0
def test_snat_computes_lock_reboot(snat_setups):
    """
    test vm external access after host compute reboot with all rest of computes locked

    Args:
        snat_setups (tuple): returns vm id and fip. Enable snat, create vm and attach floating ip.

    Test Setups (module):
        - Find a tenant router that is dvr or non-dvr based on the parameter
        - Enable SNAT on tenant router
        - boot a vm and attach a floating ip
        - Ping vm from NatBox

    Steps:
        - Ping VM {} from NatBox
        - Lock all nova hosts except the vm host
        - Ping external from vm
        - Reboot VM host
        - Wait for vm host to complete reboot
        - Verify vm is recovered after host reboot complete and can still ping outside

    Test Teardown:
        - Unlock all hosts
        - Delete the created vm     (module)
        - Disable SNAT on router    (module)

    """
    hypervisors = host_helper.get_hypervisors(state='up')
    if len(hypervisors) > 3:
        skip("More than 3 hypervisors on system. Skip to reduce run time.")

    vm_ = snat_setups[0]
    LOG.tc_step("Ping VM {} from NatBox".format(vm_))
    vm_helper.wait_for_vm_pingable_from_natbox(vm_, timeout=60, use_fip=True)

    vm_host = vm_helper.get_vm_host(vm_)
    LOG.info("VM host is {}".format(vm_host))
    assert vm_host in hypervisors, "vm host is not in nova hypervisor-list"

    hosts_should_lock = set(hypervisors) - {vm_host}
    hosts_already_locked = set(system_helper.get_hosts(administrative='locked'))
    hosts_to_lock = list(hosts_should_lock - hosts_already_locked)
    LOG.tc_step("Lock all compute hosts {} except vm host {}".format(hosts_to_lock, vm_host))
    for host_ in hosts_to_lock:
        HostsToRecover.add(host_, scope='function')
        host_helper.lock_host(host_, swact=True)

    vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_, timeout=120)
    LOG.tc_step("Ping external from vm {}".format(vm_))
    vm_helper.ping_ext_from_vm(vm_, use_fip=True)

    LOG.tc_step("Evacuate vm and expect VM to stay on same host")
    code, output = vm_helper.evacuate_vms(host=vm_host, vms_to_check=vm_, fail_ok=True)
    assert code > 0, "Actual: {}".format(output)

    LOG.tc_step("Verify vm is recovered and can still ping outside")
    host_helper.wait_for_hosts_ready(hosts=vm_host)
    vm_helper.wait_for_vm_status(vm_id=vm_)
    vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_)
    vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_, use_fip=True, timeout=60)
    vm_helper.ping_ext_from_vm(vm_, use_fip=True)
def test_robustness_service_function_chaining(protocol, nsh_aware, same_host,
                                              add_protocol, symmetric,
                                              check_system,
                                              add_admin_role_module):
    """
        Test Service Function Chaining

        Test Steps:
            - Check if the system is compatible
            - Boot the source VM, dest VM & SFC VM in same host or diff host based on <same_host: True or False>
            - Install necessary software and package inside guest for packet forwarding test
            - Create port pair using nsh_ware <True:False>
            - Create port pair group
            - Create SFC flow classifier using protocol <tcp:icmp:udp>
            - Create port Chain
            - Check packet forwarding from source to dest vm via SFC vm
            - Migrate VM by force_lock compute host
            - Check packet forwarding from source to dest vm via SFC vm
            - Create new flow classifier with new protocol (add_protocol)
            - Update port chain with new flow classifier
            - Check packet forwarding from source to dest vm via SFC vm with new classifier
            - Evacuate VM by rebooting compute host
            - Verify VM evacuated
            - Check packet forwarding from source to dest vm via SFC vm with new classifier

        Test Teardown:
            - Delete port chain, port pair group, port pair, flow classifier, vms, volumes created

    """
    nsh_aware = True if nsh_aware == 'nsh_aware' else False
    same_host = True if same_host == 'same_host' else False
    symmetric = True if symmetric == 'symmetric' else False

    LOG.tc_step("Check if the system is compatible to run this test")
    computes = check_system

    LOG.tc_step("Boot the VM in same host: {}".format(same_host))
    hosts_to_boot = [computes[0]] * 3 if same_host else computes[0:3]
    LOG.info("Boot the VM in following compute host 1:{}, 2:{}, 3:{}".format(
        hosts_to_boot[0], hosts_to_boot[1], hosts_to_boot[2]))

    LOG.tc_step("Boot the source and dest VM")
    vm_ids = []
    vm_ids, source_vm_id, dest_vm_id, internal_net_id, mgmt_net_id, mgmt_nic = _setup_vm(
        vm_ids, hosts_to_boot)
    vm_helper.ping_vms_from_vm(to_vms=source_vm_id,
                               from_vm=dest_vm_id,
                               net_types=['mgmt'],
                               retry=10)

    LOG.tc_step("Boot the SFC VM")
    sfc_vm_ids = []
    sfc_vm_ids, sfc_vm_under_test, ingress_port_id, egress_port_id = _setup_sfc_vm(
        sfc_vm_ids, hosts_to_boot, mgmt_nic, internal_net_id)
    vm_helper.ping_vms_from_vm(to_vms=source_vm_id,
                               from_vm=sfc_vm_under_test,
                               net_types=['mgmt'],
                               retry=10)

    # if protocol != 'icmp':
    LOG.tc_step("Install software package nc in vm {} {}".format(
        source_vm_id, dest_vm_id))
    _install_sw_packages_in_vm(source_vm_id)
    _install_sw_packages_in_vm(dest_vm_id)

    LOG.tc_step("copy vxlan tool in sfc vm {}".format(sfc_vm_under_test))
    vm_helper.scp_to_vm_from_natbox(vm_id=sfc_vm_under_test,
                                    source_file='/home/cgcs/sfc/vxlan_tool.py',
                                    dest_file='/root/vxlan_tool.py')

    LOG.tc_step("Create port pair")
    port_pair_ids = []
    port_pair_id = _setup_port_pair(nsh_aware, ingress_port_id, egress_port_id)
    port_pair_ids.append(port_pair_id)

    LOG.tc_step("Create port pair group")
    port_pair_group_ids = []
    port_pair_group_id = _setup_port_pair_groups(port_pair_id)
    port_pair_group_ids.append(port_pair_group_id)

    name = 'sfc_flow_classifier'
    LOG.tc_step("Create flow classifier:{}".format(name))
    flow_classifier, dest_vm_internal_net_ip = _setup_flow_classifier(
        name, source_vm_id, dest_vm_id, protocol)

    LOG.tc_step("Create port chain")
    port_chain_id = _setup_port_chain(port_pair_group_id, flow_classifier,
                                      symmetric)

    LOG.tc_step(
        "Execute vxlan.py tool and verify {} packet received VM1 to VM2".
        format(protocol))
    _check_packets_forwarded_in_sfc_vm(source_vm_id,
                                       dest_vm_id,
                                       sfc_vm_ids,
                                       dest_vm_internal_net_ip,
                                       protocol,
                                       nsh_aware,
                                       symmetric,
                                       load_balancing=False)

    LOG.tc_step("Force lock {}".format(hosts_to_boot))
    if not same_host:
        for host_to_boot in hosts_to_boot:
            HostsToRecover.add(host_to_boot)
            lock_code, lock_output = host_helper.lock_host(host_to_boot,
                                                           force=True,
                                                           check_first=True)
            assert lock_code == 0, "Failed to force lock {}. Details: {}".format(
                host_to_boot, lock_output)
    else:
        HostsToRecover.add(hosts_to_boot[0])
        lock_code, lock_output = host_helper.lock_host(hosts_to_boot[0],
                                                       force=True,
                                                       check_first=True)
        assert lock_code == 0, "Failed to force lock {}. Details: {}".format(
            hosts_to_boot[0], lock_output)

    # Expect VMs to migrate off force-locked host (non-gracefully)
    LOG.tc_step(
        "Wait for 'Active' status of VMs after host force lock completes")
    vm_helper.wait_for_vms_values(vm_ids, fail_ok=False)

    LOG.tc_step(
        "Execute vxlan.py tool and verify {} packet received VM1 to VM2".
        format(protocol))
    _check_packets_forwarded_in_sfc_vm(source_vm_id,
                                       dest_vm_id,
                                       sfc_vm_ids,
                                       dest_vm_internal_net_ip,
                                       protocol,
                                       nsh_aware,
                                       symmetric,
                                       load_balancing=False)

    LOG.tc_step(
        "Create new flow classifier with protocol {}".format(add_protocol))
    flow_classifier_name = 'new_sfc_flow_classifier'
    new_flow_classifier, dest_vm_internal_net_ip = _setup_flow_classifier(
        flow_classifier_name, source_vm_id, dest_vm_id, add_protocol)

    LOG.tc_step("Update port chain with new flow classifier:".format(
        new_flow_classifier))
    network_helper.set_sfc_port_chain(port_chain_id,
                                      port_pair_groups=port_pair_group_id,
                                      flow_classifiers=new_flow_classifier)

    LOG.tc_step(
        "Execute vxlan.py tool and verify {} packet received VM1 to VM2".
        format(add_protocol))
    _check_packets_forwarded_in_sfc_vm(source_vm_id,
                                       dest_vm_id,
                                       sfc_vm_ids,
                                       dest_vm_internal_net_ip,
                                       add_protocol,
                                       nsh_aware,
                                       symmetric,
                                       load_balancing=False)

    LOG.info("Get the host to reboot where the VMs launched")
    hosts_to_reboot = vm_helper.get_vms_hosts(vm_ids=vm_ids)

    LOG.tc_step(
        "Reboot VMs host {} and ensure vms are evacuated to other host".format(
            hosts_to_reboot))
    vm_helper.evacuate_vms(host=hosts_to_reboot,
                           vms_to_check=vm_ids,
                           ping_vms=True)

    LOG.tc_step(
        "Execute vxlan.py tool and verify {} packet received VM1 to VM2".
        format(add_protocol))
    _check_packets_forwarded_in_sfc_vm(source_vm_id,
                                       dest_vm_id,
                                       sfc_vm_ids,
                                       dest_vm_internal_net_ip,
                                       add_protocol,
                                       nsh_aware,
                                       symmetric,
                                       load_balancing=False)
Ejemplo n.º 10
0
def test_modify_mtu_data_interface(mtu_range, revert_data_mtu):
    """
    23) Change the MTU value of the data interface using CLI
    Verify that MTU on data interfaces on all compute node can be modified by cli
    The min mtu for data interface can be 1500,9000 or 9216, in which case MTU is unchangable. Need to confirm
    Args:
        mtu_range (str): A string that contain the mtu want to be tested
        revert_data_mtu: A fixture to restore changed mtus if any to their original values

    Setup:
        - Nothing

    Test Steps:
        - lock standby controller
        - modify the imtu value of the compute node
        - unlock the controller
        - check the compute node have expected mtu

    Teardown:
        - Revert data mtu

    """

    hypervisors = host_helper.get_hypervisors(state='up')
    if len(hypervisors) < 2:
        skip("Less than two hypervisors available.")

    if system_helper.is_aio_duplex():
        standby = system_helper.get_standby_controller_name()
        if not standby:
            skip("Standby controller unavailable on CPE system. Unable to lock host")
        hypervisors = [standby]
    else:
        if len(hypervisors) > 2:
            hypervisors = random.sample(hypervisors, 2)

    LOG.tc_step("Delete vms to reduce lock time")
    vm_helper.delete_vms()

    mtu = __get_mtu_to_mod(providernet_name='-data', mtu_range=mtu_range)

    LOG.tc_step("Modify data MTU to {} for hosts: {}".format(mtu, hypervisors))

    net_type = 'data'

    active_controller = system_helper.get_active_controller_name()
    hosts = hypervisors[:]
    if active_controller in hosts:
        hosts.remove(active_controller)
        hosts.append(active_controller)

    for host in hosts:
        interfaces = get_ifs_to_mod(host, net_type, mtu)
        revert_ifs = list(interfaces)
        if not revert_ifs:
            LOG.info('Skip host:{} because there is no interface to set MTU'.format(host))
            continue

        host_helper.lock_host(host, swact=True)

        revert_ifs.reverse()
        changed_ifs = []
        for interface in revert_ifs:
            LOG.tc_step('Checking the max MTU for the IF:{} on host:{}'.format(interface, host))
            max_mtu, cur_mtu, nic_name = get_max_allowed_mtus(host=host, network_type=net_type, if_name=interface)

            LOG.info('Checking the max MTU for the IF:{}, max MTU: {}, host:{}'.format(
                interface, max_mtu or 'NOT SET', host))

            expecting_pass = not max_mtu or mtu <= max_mtu
            if not expecting_pass:
                LOG.warn('Expecting to fail in changing MTU: changing to:{}, max-mtu:{}'.format(mtu, max_mtu))

            pre_mtu = int(host_helper.get_host_interface_values(host, interface, 'imtu')[0])

            LOG.tc_step('Modify MTU of IF:{} on host:{} to:{}, expeting: {}'.format(
                interface, host, mtu, 'PASS' if expecting_pass else 'FAIL'))

            code, res = host_helper.modify_mtu_on_interface(host, interface, mtu_val=mtu, network_type=net_type,
                                                            lock_unlock=False, fail_ok=True)
            msg_result = "PASS" if expecting_pass else "FAIL"
            msg = "Failed to modify data MTU, expecting to {}, \nnew MTU:{}, max MTU:{}, old MTU:{}, " \
                  "Return code:{}; Details: {}".format(msg_result, pre_mtu, max_mtu, pre_mtu, code, res)

            if 0 == code:
                if mtu != cur_mtu:
                    changed_ifs.append(interface)
                    HOSTS_IF_MODIFY_ARGS.append((host, pre_mtu, mtu, max_mtu, interface, net_type))
                assert expecting_pass, msg
            else:
                assert not expecting_pass, msg

            LOG.info('OK, modification of MTU of data interface {} as expected: {}'.format(msg_result, msg_result))

        host_helper.unlock_host(host)
        for interface in revert_ifs:
            if interface in changed_ifs:
                actual_mtu = int(host_helper.get_host_interface_values(host,
                                                                       interface=interface, fields=['imtu'])[0])
                assert actual_mtu == mtu, \
                    'Actual MTU after modification did not match expected, expected:{}, actual:{}'.format(
                        mtu, actual_mtu)
        changed_ifs[:] = []

    if not HOSTS_IF_MODIFY_ARGS:
        skip('No data interface changed!')
        return

    HOSTS_IF_MODIFY_ARGS.reverse()
Ejemplo n.º 11
0
def lock_unlock_host(backup_info, con_ssh, vms):
    """
    Do lock & unlock hosts test before system backup.

    Args:
        backup_info:
            - options for system backup

        con_ssh:
            - current ssh connection to the target

        vms:
            - VMs on which their host to test
    Return:
        None
    """

    active_controller_name = system_helper.get_active_controller_name()

    target_vm = random.choice(vms)
    LOG.info('lock and unlock the host of VM:{}'.format(target_vm))

    target_host = vm_helper.get_vm_host(target_vm, con_ssh=con_ssh)
    if target_host == active_controller_name:
        if not system_helper.is_aio_simplex():
            LOG.warning(
                'Attempt to lock the active controller on a non-simplex system'
            )
            host_helper.swact_host()

    active_controller_name = system_helper.get_active_controller_name()

    LOG.info('lock and unlock:{}'.format(target_host))

    host_helper.lock_host(target_host)
    if not system_helper.is_aio_simplex():
        LOG.info('check if the VM is pingable')
        vm_helper.ping_vms_from_natbox(target_vm)
    else:
        LOG.info(
            'skip pinging vm after locking the only node in a simlex system')

    LOG.info('unlock:{}'.format(target_host))
    host_helper.unlock_host(target_host)

    system_helper.wait_for_host_values(target_host,
                                       administrative='unlocked',
                                       availability='available',
                                       vim_progress_status='services-enabled')
    for tried in range(5):
        pingable, message = vm_helper.ping_vms_from_natbox(target_vm,
                                                           fail_ok=(tried < 4))
        if pingable:
            LOG.info('failed to ping VM:{}, try again in 20 seconds'.format(
                target_vm))
            time.sleep(20)
        else:
            LOG.info('Succeeded to ping VM:{}'.format(target_vm))
            break
    if backup_info.get('dest', 'local') == 'usb':
        if active_controller_name != 'controller-0':
            LOG.info(
                'current active_controller: ' + active_controller_name +
                ', restore to controller-0 in case it was not after swact')
            host_helper.swact_host()
            active_controller_name = system_helper.get_active_controller_name()
            LOG.info(
                'current active_controller should be restored to controller-0, actual:'
                + active_controller_name)
Ejemplo n.º 12
0
def lock_unlock_hosting_node(vm_type, vm_id, force_lock=False):
    host = vm_helper.get_vm_host(vm_id)
    host_helper.lock_host(host, force=force_lock)
    host_helper.unlock_host(host)

    rescue_vm(vm_type, vm_id)