Ejemplo n.º 1
0
def test_lab_setup_kpi(collect_kpi):
    """
    This test extracts the time required to run lab_setup.sh only.
    """

    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = ProjVar.get_var("LAB_NAME")
    log_path = LabSetup.LOG_PATH
    kpi_name = LabSetup.NAME
    host = "controller-0"
    start_pattern = LabSetup.START
    end_pattern = LabSetup.END

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=kpi_name,
                              log_path=log_path,
                              lab_name=lab_name,
                              host=host,
                              start_pattern=start_pattern,
                              end_pattern=end_pattern,
                              sudo=True,
                              topdown=True,
                              uptime=15,
                              fail_ok=False)
Ejemplo n.º 2
0
def test_system_install_kpi(collect_kpi):
    """
    This is the time to install the full system from beginning to end.

    Caveat is that it is designed to work with auto-install due to the way the
    end_pattern is constructed.
    """

    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = ProjVar.get_var("LAB_NAME")
    host = "controller-0"
    kpi_name = SystemInstall.NAME
    log_path = SystemInstall.LOG_PATH
    start_pattern = SystemInstall.START
    start_path = SystemInstall.START_PATH
    end_pattern = SystemInstall.END

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=kpi_name,
                              log_path=log_path,
                              lab_name=lab_name,
                              host=host,
                              start_pattern=start_pattern,
                              end_pattern=end_pattern,
                              start_path=start_path,
                              sudo=True,
                              topdown=True,
                              start_pattern_init=True,
                              fail_ok=False)
Ejemplo n.º 3
0
def test_kpi_cyclictest_vm(collect_kpi, prepare_test_session,
                           get_rt_guest_image, get_hypervisor,
                           add_admin_role_func):
    if not collect_kpi:
        skip("KPI only test.  Skip due to kpi collection is not enabled")

    hypervisor = get_hypervisor
    testable_hypervisors[hypervisor]['for_vm_test'] = True
    LOG.info('Hypervisor chosen to host rt vm: {}'.format(hypervisor))

    vm_id, vcpu_count, non_rt_core = create_rt_vm(hypervisor)
    vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id)

    cyclictest_dir = '/root/cyclictest/'
    program = os.path.join(os.path.normpath(cyclictest_dir),
                           os.path.basename(CYCLICTEST_EXE))
    program_active_con = os.path.join(os.path.normpath(CYCLICTEST_DIR),
                                      os.path.basename(CYCLICTEST_EXE))

    cpu_info = {
        'vm_cores': [id_ for id_ in range(vcpu_count) if id_ != non_rt_core]
    }

    with vm_helper.ssh_to_vm_from_natbox(vm_id) as vm_ssh:
        prep_test_on_host(vm_ssh,
                          vm_id,
                          program_active_con,
                          ControllerClient.get_active_controller().host,
                          cyclictest_dir=cyclictest_dir)
        run_log, hist_file = run_cyclictest(vm_ssh,
                                            program,
                                            vm_id,
                                            cyclictest_dir=cyclictest_dir,
                                            cpu_info=cpu_info)

        LOG.info("Process and upload test results")
        local_run_log, local_hist_file = fetch_results_from_target(
            target_ssh=vm_ssh,
            target_host=vm_id,
            run_log=run_log,
            hist_file=hist_file,
            is_guest=True)

    testable_hypervisors[hypervisor]['for_vm_test'] = False

    avg_val, six_nines_val = calculate_results(run_log=local_run_log,
                                               hist_file=local_hist_file,
                                               cores_to_ignore=None,
                                               num_cores=(vcpu_count - 1))

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=CyclicTest.NAME_VM_AVG,
                              kpi_val=avg_val,
                              uptime=15,
                              unit=CyclicTest.UNIT)
    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=CyclicTest.NAME_VM_6_NINES,
                              kpi_val=six_nines_val,
                              uptime=15,
                              unit=CyclicTest.UNIT)
Ejemplo n.º 4
0
def _test_heat_kpi(collect_kpi):
    """
    Time to launch heat stacks.  Only applies to labs where .heat_resources is
    present.
    """

    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = ProjVar.get_var("LAB_NAME")
    log_path = HeatStacks.LOG_PATH
    kpi_name = HeatStacks.NAME
    host = "controller-0"
    start_pattern = HeatStacks.START
    end_pattern = HeatStacks.END

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=kpi_name,
                              log_path=log_path,
                              lab_name=lab_name,
                              host=host,
                              start_pattern=start_pattern,
                              end_pattern=end_pattern,
                              sudo=True,
                              topdown=True,
                              start_pattern_init=True,
                              uptime=15,
                              fail_ok=False)
Ejemplo n.º 5
0
def collected_upgrade_controller0_kpi(lab, collect_kpi, init_time=None):
    """

    Args:
        lab:
        collect_kpi:
        init_time

    Returns:

    """

    if not collect_kpi:
        LOG.info("KPI only test.  Skip due to kpi collection is not enabled")
        return

    lab_name = lab['short_name']
    log_path = UpgradeController0.LOG_PATH
    kpi_name = UpgradeController0.NAME
    host = "controller-1"
    start_pattern = UpgradeController0.START
    start_path = UpgradeController0.START_PATH
    end_pattern = UpgradeController0.END

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name,
                              log_path=log_path, lab_name=lab_name, host=host,
                              start_pattern=start_pattern, start_path=start_path,
                              end_pattern=end_pattern, init_time=init_time, sudo=True, topdown=True)
Ejemplo n.º 6
0
def test_node_install_kpi(collect_kpi):
    """
    This test measures the install time for each node in the system.
    """

    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = ProjVar.get_var("LAB_NAME")
    hosts = system_helper.get_hosts()
    print("System has hosts: {}".format(hosts))

    log_path = NodeInstall.LOG_PATH
    start_cmd = 'head -n 1 {}'.format(log_path)
    end_cmd = 'tail -n 1 {}'.format(log_path)
    date_cmd = '{} -n 1 /var/log/bash.log'
    with host_helper.ssh_to_host('controller-0') as con0_ssh:
        bash_start = con0_ssh.exec_sudo_cmd(date_cmd.format('head'),
                                            fail_ok=False)[1]
        bash_end = con0_ssh.exec_sudo_cmd(date_cmd.format('tail'),
                                          fail_ok=False)[1]
    bash_start = re.findall(TIMESTAMP_PATTERN, bash_start.strip())[0]
    bash_end = re.findall(TIMESTAMP_PATTERN, bash_end.strip())[0]
    date_ = bash_start.split('T')[0]

    def _get_time_delta(start_, end_):
        start_ = start_.replace(',', '.')
        end_ = end_.replace(',', '.')
        start_t = '{}T{}'.format(date_, start_)
        end_t = '{}T{}'.format(date_, end_)

        time_delta = common.get_timedelta_for_isotimes(start_t,
                                                       end_t).total_seconds()
        if time_delta < 0:
            end_t = '{}T{}'.format(bash_end.split('T')[0], end_)
            time_delta = common.get_timedelta_for_isotimes(
                start_t, end_t).total_seconds()
        return time_delta

    for host in hosts:
        with host_helper.ssh_to_host(hostname=host) as host_ssh:
            start_output = host_ssh.exec_sudo_cmd(start_cmd,
                                                  fail_ok=False)[1].strip()
            end_output = host_ssh.exec_sudo_cmd(end_cmd,
                                                fail_ok=False)[1].strip()

        kpi_name = NodeInstall.NAME.format(host)
        start_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, start_output)[0]
        end_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, end_output)[0]

        install_duration = _get_time_delta(start_time, end_time)
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                                  kpi_name=kpi_name,
                                  log_path=log_path,
                                  lab_name=lab_name,
                                  kpi_val=install_duration,
                                  fail_ok=False)
Ejemplo n.º 7
0
def test_kpi_cyclictest_hypervisor(collect_kpi, prepare_test_session,
                                   get_hypervisor):
    if not collect_kpi:
        skip("KPI only test.  Skip due to kpi collection is not enabled")

    global testable_hypervisors
    chosen_hypervisor = get_hypervisor
    cpu_info = testable_hypervisors[chosen_hypervisor]
    cpu_info['for_host_test'] = True

    LOG.info(
        'Hypervisor chosen to run cyclictest: {}'.format(chosen_hypervisor))
    active_controller_name = system_helper.get_active_controller_name()
    program = os.path.join(os.path.normpath(CYCLICTEST_DIR),
                           os.path.basename(CYCLICTEST_EXE))
    LOG.debug('program={}'.format(program))

    with host_helper.ssh_to_host(chosen_hypervisor) as target_ssh:
        prep_test_on_host(target_ssh, chosen_hypervisor, program,
                          active_controller_name)
        run_log, hist_file = run_cyclictest(target_ssh,
                                            program,
                                            chosen_hypervisor,
                                            cpu_info=cpu_info)

        LOG.info("Process and upload test results")
        local_run_log, local_hist_file = fetch_results_from_target(
            target_ssh=target_ssh,
            target_host=chosen_hypervisor,
            active_con_name=active_controller_name,
            run_log=run_log,
            hist_file=hist_file)

    testable_hypervisors[chosen_hypervisor]['for_host_test'] = False

    avg_val, six_nines_val = calculate_results(run_log=local_run_log,
                                               hist_file=local_hist_file,
                                               cores_to_ignore=None,
                                               num_cores=len(
                                                   cpu_info['vm_cores']))

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=CyclicTest.NAME_HYPERVISOR_AVG,
                              kpi_val=six_nines_val,
                              uptime=15,
                              unit=CyclicTest.UNIT)
    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=CyclicTest.NAME_HYPERVISOR_6_NINES,
                              kpi_val=six_nines_val,
                              uptime=15,
                              unit=CyclicTest.UNIT)
Ejemplo n.º 8
0
def test_swact_controller_platform(wait_for_con_drbd_sync_complete,
                                   collect_kpi):
    """
    Verify swact active controller

    Test Steps:
        - Swact active controller
        - Verify standby controller and active controller are swapped
        - Verify nodes are ready in kubectl get nodes

    """
    if system_helper.is_aio_simplex():
        skip("Simplex system detected")

    if not wait_for_con_drbd_sync_complete:
        skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS)

    LOG.tc_step('retrieve active and available controllers')
    pre_active_controller, pre_standby_controller = system_helper.get_active_standby_controllers(
    )
    assert pre_standby_controller, "No standby controller available"

    collect_kpi = None if container_helper.is_stx_openstack_deployed(
    ) else collect_kpi
    init_time = None
    if collect_kpi:
        init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)

    LOG.tc_step(
        "Swact active controller and ensure active controller is changed")
    host_helper.swact_host(hostname=pre_active_controller)

    LOG.tc_step("Check hosts are Ready in kubectl get nodes after swact")
    kube_helper.wait_for_nodes_ready(hosts=(pre_active_controller,
                                            pre_standby_controller),
                                     timeout=30)

    if collect_kpi:
        kpi_name = SwactPlatform.NAME
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                                  kpi_name=kpi_name,
                                  init_time=init_time,
                                  log_path=SwactPlatform.LOG_PATH,
                                  end_pattern=SwactPlatform.END,
                                  host=pre_standby_controller,
                                  start_host=pre_active_controller,
                                  start_pattern=SwactPlatform.START,
                                  start_path=SwactPlatform.START_PATH,
                                  uptime=1,
                                  fail_ok=False)
Ejemplo n.º 9
0
def collect_upgrade_start_kpi(lab, collect_kpi):

    lab_name = lab['short_name']
    log_path = UpgradeStart.LOG_PATH
    kpi_name = UpgradeStart.NAME
    host = "controller-0"
    start_pattern = UpgradeStart.START
    end_pattern = UpgradeStart.END

    try:

        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name,
                                  log_path=log_path, lab_name=lab_name, host=host,
                                  start_pattern=start_pattern,
                                  end_pattern=end_pattern, sudo=True, topdown=True, uptime=15)
    except ValueError as evalue:

        LOG.info("Unable to collect upgrade start kpi for lab {}: {}".format(lab_name, evalue))
Ejemplo n.º 10
0
def collect_upgrade_complete_kpi(lab, collect_kpi):
    """
    This measures the time to run upgrade-activate.
    """

    if not collect_kpi:
        LOG.info("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = lab['short_name']
    host = "controller-0"

    kpi_name = UpgradeComplete.NAME
    log_path = UpgradeComplete.LOG_PATH
    start_pattern = UpgradeComplete.START
    end_pattern = UpgradeComplete.END

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name,
                              log_path=log_path, lab_name=lab_name, host=host,
                              start_pattern=start_pattern,
                              end_pattern=end_pattern, sudo=True, topdown=True, uptime=15)
Ejemplo n.º 11
0
def test_kpi_cinder_volume_creation(collect_kpi):
    """
    KPI test  - cinder  volume creation
    Args:
        collect_kpi:

    Test Steps:
        - Create a 20g cinder volume using default tis guest
        - Collect duration kpi from cinder create cli sent to volume available

    """
    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled.")

    LOG.tc_step(
        "Create a 20g volume from default tis guest and collect image download rate, "
        "image conversion rate, and total volume creation time")
    # init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)
    image = glance_helper.get_guest_image(guest_os='tis-centos-guest-qcow2',
                                          cleanup='function')

    vol_id = cinder_helper.create_volume(name='20g',
                                         source_id=image,
                                         size=20,
                                         cleanup='function')[1]
    vol_updated = cinder_helper.get_volume_show_values(
        vol_id, 'updated_at')[0].split('.')[0]

    # Logs no longer available for image downloading and conversion.
    # code_download, out_download = \
    #     kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ImageDownload.NAME,
    #                               host=None,
    #                               log_path=ImageDownload.LOG_PATH,
    #                               end_pattern=ImageDownload.GREP_PATTERN,
    #                               python_pattern=ImageDownload.PYTHON_PATTERN,
    #                               init_time=init_time, uptime=1,
    #                               unit=ImageDownload.UNIT)
    # code_conv, out_conv = \
    #     kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ImageConversion.NAME,
    #                               host=None,
    #                               log_path=ImageConversion.LOG_PATH,
    #                               end_pattern=ImageConversion.GREP_PATTERN,
    #                               python_pattern=ImageConversion.PYTHON_PATTERN,
    #                               init_time=init_time, uptime=1,
    #                               unit=ImageConversion.UNIT)
    code_create, out_create = \
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=VolCreate.NAME, host=None,
                                  log_path=VolCreate.LOG_PATH, end_pattern=vol_updated,
                                  start_pattern=VolCreate.START, uptime=1)
    #
    # assert code_download == 0, out_download
    # assert code_conv == 0, out_conv
    assert code_create == 0, out_create
Ejemplo n.º 12
0
    def test_kpi_evacuate(self, vm_type, get_hosts, collect_kpi):
        if not collect_kpi:
            skip("KPI only test. Skip due to kpi collection is not enabled.")
        if not system_helper.is_avs() and vm_type in ('dpdk', 'avp'):
            skip('avp vif unsupported by OVS')

        def operation(vm_id_, host_):
            vm_helper.evacuate_vms(host=host_,
                                   vms_to_check=vm_id_,
                                   ping_vms=True)

        vm_test, vm_observer = vm_helper.launch_vm_pair(
            vm_type=vm_type, storage_backing='local_image')

        host_src_evacuation, host_observer = self._prepare_test(
            vm_test, vm_observer, get_hosts.copy(), with_router=True)
        time.sleep(60)
        with_router_kpi = vm_helper.get_traffic_loss_duration_on_operation(
            vm_test, vm_observer, operation, vm_test, host_src_evacuation)
        assert with_router_kpi > 0, "Traffic loss duration is not properly detected"
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                                  kpi_name=Evacuate.NAME.format(
                                      vm_type, 'with'),
                                  kpi_val=with_router_kpi / 1000,
                                  uptime=5)

        host_helper.wait_for_hosts_ready(hosts=host_src_evacuation)

        if len(get_hosts) > 2:
            host_src_evacuation, host_observer = self._prepare_test(
                vm_test, vm_observer, get_hosts.copy(), with_router=False)
            time.sleep(60)
            without_router_kpi = vm_helper.get_traffic_loss_duration_on_operation(
                vm_test, vm_observer, operation, vm_test, host_src_evacuation)
            assert without_router_kpi > 0, "Traffic loss duration is not properly detected"
            kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                                      kpi_name=Evacuate.NAME.format(
                                          vm_type, 'no'),
                                      kpi_val=without_router_kpi / 1000,
                                      uptime=5)
Ejemplo n.º 13
0
def collect_upgrade_orchestration_kpi(lab, collect_kpi):
    """

    Args:
        lab:
        collect_kpi:

    Returns:

    """
    if not collect_kpi:
        LOG.info("KPI only test. Skip due to kpi collection is not enabled")

    lab_name = lab['short_name']
    print("Upgrade host: {}".format(upgrade_host))

    kpi_name = UpgradeOrchestration.NAME.format(upgrade_host)

    orchestration_duration = orchestration_helper.get_current_strategy_phase_duration("upgrade", "apply")

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, lab_name=lab_name,
                              kpi_val=orchestration_duration)
Ejemplo n.º 14
0
def test_swact_uncontrolled_kpi_platform(collect_kpi):
    if not collect_kpi or container_helper.is_stx_openstack_deployed():
        skip(
            "KPI test for platform only. Skip due to kpi collection is not enabled or openstack "
            "application is deployed.")

    start_host, end_host = system_helper.get_active_standby_controllers()
    if not end_host:
        skip("No standby host to swact to")

    init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)
    host_helper.reboot_hosts(hostnames=start_host)
    kpi_name = SwactUncontrolledPlatform.NAME
    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=kpi_name,
                              init_time=init_time,
                              log_path=SwactUncontrolledPlatform.LOG_PATH,
                              end_pattern=SwactUncontrolledPlatform.END,
                              host=end_host,
                              start_host=start_host,
                              start_pattern=SwactUncontrolledPlatform.START,
                              start_path=SwactUncontrolledPlatform.START_PATH,
                              uptime=5,
                              fail_ok=False)
Ejemplo n.º 15
0
def test_drbd_kpi(no_simplex, collect_kpi):
    """
    This test extracts the DRBD sync time from log files
    """

    if not collect_kpi:
        skip("KPI only test.  Skip due to kpi collection is not enabled")

    lab_name = ProjVar.get_var('LAB_NAME')
    log_path = DRBDSync.LOG_PATH
    kpi_name = DRBDSync.NAME
    end_pattern = DRBDSync.GREP_PATTERN
    python_pattern = DRBDSync.PYTHON_PATTERN

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=kpi_name,
                              log_path=log_path,
                              python_pattern=python_pattern,
                              lab_name=lab_name,
                              unit=DRBDSync.UNIT,
                              average_for_all=True,
                              end_pattern=end_pattern,
                              uptime=15,
                              fail_ok=False)
Ejemplo n.º 16
0
def test_kpi_vm_launch_migrate_rebuild(ixia_required, collect_kpi, hosts_per_backing, boot_from):
    """
    KPI test  - vm startup time.
    Args:
        collect_kpi:
        hosts_per_backing
        boot_from

    Test Steps:
        - Create a flavor with 2 vcpus, dedicated cpu policy and storage backing (if boot-from-image)
        - Launch a vm from specified boot source
        - Collect the vm startup time via event log

    """
    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled.")

    # vm launch KPI
    if boot_from != 'volume':
        storage_backing = boot_from
        hosts = hosts_per_backing.get(boot_from)
        if not hosts:
            skip(SkipStorageBacking.NO_HOST_WITH_BACKING.format(boot_from))

        target_host = hosts[0]
        LOG.tc_step("Clear local storage cache on {}".format(target_host))
        storage_helper.clear_local_storage_cache(host=target_host)

        LOG.tc_step("Create a flavor with 2 vcpus, dedicated cpu policy, and {} storage".format(storage_backing))
        boot_source = 'image'
        flavor = nova_helper.create_flavor(name=boot_from, vcpus=2, storage_backing=storage_backing)[1]
    else:
        target_host = None
        boot_source = 'volume'
        storage_backing = keywords.host_helper.get_storage_backing_with_max_hosts()[0]
        LOG.tc_step("Create a flavor with 2 vcpus, and dedicated cpu policy and {} storage".format(storage_backing))
        flavor = nova_helper.create_flavor(vcpus=2, storage_backing=storage_backing)[1]

    ResourceCleanup.add('flavor', flavor)
    nova_helper.set_flavor(flavor, **{FlavorSpec.CPU_POLICY: 'dedicated'})

    host_str = ' on {}'.format(target_host) if target_host else ''
    LOG.tc_step("Boot a vm from {}{} and collect vm startup time".format(boot_from, host_str))

    mgmt_net_id = network_helper.get_mgmt_net_id()
    tenant_net_id = network_helper.get_tenant_net_id()
    internal_net_id = network_helper.get_internal_net_id()
    nics = [{'net-id': mgmt_net_id},
            {'net-id': tenant_net_id},
            {'net-id': internal_net_id}]

    vm_id = vm_helper.boot_vm(boot_from, flavor=flavor, source=boot_source, nics=nics, cleanup='function')[1]

    code_boot, out_boot = \
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=VmStartup.NAME.format(boot_from),
                                  log_path=VmStartup.LOG_PATH, end_pattern=VmStartup.END.format(vm_id),
                                  start_pattern=VmStartup.START.format(vm_id), uptime=1)

    vm_helper.wait_for_vm_pingable_from_natbox(vm_id)
    # Migration KPI
    if ('ixia_ports' in ProjVar.get_var("LAB")) and (len(hosts_per_backing.get(storage_backing)) >= 2):

        LOG.info("Run migrate tests when more than 2 {} hosts available".format(storage_backing))
        LOG.tc_step("Launch an observer vm")

        mgmt_net_observer = network_helper.get_mgmt_net_id(auth_info=Tenant.get_secondary())
        tenant_net_observer = network_helper.get_tenant_net_id(auth_info=Tenant.get_secondary())
        nics_observer = [{'net-id': mgmt_net_observer},
                         {'net-id': tenant_net_observer},
                         {'net-id': internal_net_id}]
        vm_observer = vm_helper.boot_vm('observer', flavor=flavor, source=boot_source,
                                        nics=nics_observer, cleanup='function', auth_info=Tenant.get_secondary())[1]

        vm_helper.wait_for_vm_pingable_from_natbox(vm_observer)
        vm_helper.setup_kernel_routing(vm_observer)
        vm_helper.setup_kernel_routing(vm_id)
        vm_helper.route_vm_pair(vm_observer, vm_id)

        if 'local_lvm' != boot_from:
            # live migration unsupported for boot-from-image vm with local_lvm storage
            LOG.tc_step("Collect live migrate KPI for vm booted from {}".format(boot_from))

            def operation_live(vm_id_):
                code, msg = vm_helper.live_migrate_vm(vm_id=vm_id_)
                assert 0 == code, msg
                vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_)
                # kernel routing
                vm_helper.ping_between_routed_vms(vm_id, vm_observer, vshell=False)

            time.sleep(30)
            duration = vm_helper.get_traffic_loss_duration_on_operation(vm_id, vm_observer, operation_live, vm_id)
            assert duration > 0, "No traffic loss detected during live migration for {} vm".format(boot_from)
            kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=LiveMigrate.NAME.format(boot_from),
                                      kpi_val=duration, uptime=1, unit='Time(ms)')

            vim_duration = vm_helper.get_live_migrate_duration(vm_id=vm_id)
            kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=LiveMigrate.NOVA_NAME.format(boot_from),
                                      kpi_val=vim_duration, uptime=1, unit='Time(s)')

        LOG.tc_step("Collect cold migrate KPI for vm booted from {}".format(boot_from))

        def operation_cold(vm_id_):
            code, msg = vm_helper.cold_migrate_vm(vm_id=vm_id_)
            assert 0 == code, msg
            vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_)
            vm_helper.ping_between_routed_vms(vm_id, vm_observer, vshell=False)

        time.sleep(30)
        duration = vm_helper.get_traffic_loss_duration_on_operation(vm_id, vm_observer, operation_cold, vm_id)
        assert duration > 0, "No traffic loss detected during cold migration for {} vm".format(boot_from)
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ColdMigrate.NAME.format(boot_from),
                                  kpi_val=duration, uptime=1, unit='Time(ms)')

        vim_duration = vm_helper.get_cold_migrate_duration(vm_id=vm_id)
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ColdMigrate.NOVA_NAME.format(boot_from),
                                  kpi_val=vim_duration, uptime=1, unit='Time(s)')

    # Rebuild KPI
    if 'volume' != boot_from:
        LOG.info("Run rebuild test for vm booted from image")

        def operation_rebuild(vm_id_):
            code, msg = vm_helper.rebuild_vm(vm_id=vm_id_)
            assert 0 == code, msg
            vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_)
            vm_helper.ping_vms_from_vm(vm_id, vm_id, net_types=('data', 'internal'))

        LOG.tc_step("Collect vm rebuild KPI for vm booted from {}".format(boot_from))
        time.sleep(30)
        duration = vm_helper.get_ping_loss_duration_on_operation(vm_id, 300, 0.5, operation_rebuild, vm_id)
        assert duration > 0, "No ping loss detected during rebuild for {} vm".format(boot_from)
        kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=Rebuild.NAME.format(boot_from),
                                  kpi_val=duration, uptime=1, unit='Time(ms)')

    # Check the vm boot result at the end after collecting other KPIs
    assert code_boot == 0, out_boot
Ejemplo n.º 17
0
def test_idle_kpi(collect_kpi):
    if not collect_kpi:
        skip("KPI only test. Skip due to kpi collection is not enabled")

    LOG.tc_step("Delete vms and volumes on system if any")
    vm_helper.delete_vms()

    is_aio = system_helper.is_aio_system()
    active_con = system_helper.get_active_controller_name()
    con_ssh = ControllerClient.get_active_controller()
    cpu_arg = ''
    if is_aio:
        LOG.info("AIO system found, check platform cores only")
        cpu_arg = ' -P '
        platform_cores_per_proc = host_helper.get_host_cpu_cores_for_function(
            hostname=active_con,
            func='Platform',
            core_type='log_core',
            thread=None,
            con_ssh=con_ssh)
        platform_cpus = []
        for proc in platform_cores_per_proc:
            platform_cpus += platform_cores_per_proc[proc]

        cpu_arg += ','.join([str(val) for val in platform_cpus])

    LOG.tc_step(
        "Sleep for 5 minutes, then monitor for cpu and memory usage every 10 seconds for 5 minutes"
    )
    time.sleep(300)
    output = con_ssh.exec_cmd(
        'sar -u{} 10 30 -r | grep --color=never "Average"'.format(cpu_arg),
        expect_timeout=600,
        fail_ok=False)[1]

    # Sample output:
    # controller-1:~$ sar -u -P 0,1 1 3 -r | grep Average
    # Average:        CPU     %user     %nice   %system   %iowait    %steal     %idle
    # Average:          0      8.52      0.00      4.92      1.97      0.00     84.59
    # Average:          1     14.19      0.00      4.73      0.00      0.00     81.08
    # Average:    kbmemfree kbmemused  %memused kbbuffers  kbcached  kbcommit   %commit  kbactive   kbinact   kbdirty
    # Average:    105130499  26616873     20.20    203707    782956  63556293     48.24  24702756    529517       579

    lines = output.splitlines()
    start_index = 0
    for i in range(len(lines)):
        if lines(i).startswith('Average:'):
            start_index = i
            break
    lines = lines[start_index:]

    # Parse mem usage stats
    mem_vals = lines.pop(-1).split()
    mem_headers = lines.pop(-1).split()
    mem_usage_index = mem_headers.index('%memused')
    mem_usage = float(mem_vals[mem_usage_index])

    # Parse cpu usage stats
    cpu_headers = lines.pop(0).split()
    cpu_lines = [line.split() for line in lines]
    idle_cpu_index = cpu_headers.index('%idle')
    cpus_idle = [float(cpu_vals[idle_cpu_index]) for cpu_vals in cpu_lines]
    avg_cpu_idle = sum(cpus_idle) / len(cpu_lines)
    avg_cpu_usage = round(100 - avg_cpu_idle, 4)

    cpu_kpi_name = Idle.NAME_CPU
    mem_kpi_name = Idle.NAME_MEM
    if not container_helper.is_stx_openstack_deployed():
        cpu_kpi_name += '_platform'
        mem_kpi_name += '_platform'
    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=cpu_kpi_name,
                              kpi_val=avg_cpu_usage,
                              uptime=5,
                              unit='Percentage',
                              fail_ok=False)

    kpi_log_parser.record_kpi(local_kpi_file=collect_kpi,
                              kpi_name=mem_kpi_name,
                              kpi_val=mem_usage,
                              uptime=5,
                              unit='Percentage',
                              fail_ok=False)
Ejemplo n.º 18
0
    def test_lock_with_vms(self, target_hosts, no_simplex, add_admin_role_func,
                           collect_kpi):
        """
        Test lock host with vms on it.

        Args:
            target_hosts (list): targeted host(s) to lock that was prepared by the target_hosts test fixture.

        Skip Conditions:
            - Less than 2 hypervisor hosts on the system

        Prerequisites:
            - Hosts storage backing are pre-configured to storage backing under test
                ie., 2 or more hosts should support the storage backing under test.
        Test Setups:
            - Set instances quota to 10 if it was less than 8
            - Determine storage backing(s) under test. i.e.,storage backings supported by at least 2 hosts on the system
            - Create flavors with storage extra specs set based on storage backings under test
            - Create vms_to_test that can be live migrated using created flavors
            - Determine target host(s) to perform lock based on which host(s) have the most vms_to_test
            - Live migrate vms to target host(s)
        Test Steps:
            - Lock target host
            - Verify lock succeeded and vms status unchanged
            - Repeat above steps if more than one target host
        Test Teardown:
            - Delete created vms and volumes
            - Delete created flavors
            - Unlock locked target host(s)

        """
        storage_backing, host = target_hosts
        vms_num = 5
        vm_helper.ensure_vms_quotas(vms_num=vms_num)

        LOG.tc_step(
            "Boot {} vms with various storage settings".format(vms_num))
        vms = vm_helper.boot_vms_various_types(cleanup='function',
                                               vms_num=vms_num,
                                               storage_backing=storage_backing,
                                               target_host=host)

        LOG.tc_step("Attempt to lock target host {}...".format(host))
        HostsToRecover.add(host)

        init_time = None
        if collect_kpi:
            init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)

        host_helper.lock_host(host=host,
                              check_first=False,
                              fail_ok=False,
                              swact=True)

        LOG.tc_step("Verify lock succeeded and vms still in good state")
        vm_helper.wait_for_vms_values(vms=vms, fail_ok=False)
        for vm in vms:
            vm_host = vm_helper.get_vm_host(vm_id=vm)
            assert vm_host != host, "VM is still on {} after lock".format(host)

            vm_helper.wait_for_vm_pingable_from_natbox(
                vm_id=vm, timeout=VMTimeout.DHCP_RETRY)

        if collect_kpi:
            LOG.info("Collect kpi for lock host with vms")
            kpi_log_parser.record_kpi(
                local_kpi_file=collect_kpi,
                kpi_name=HostLock.WITH_VM.format(storage_backing),
                host=None,
                log_path=HostLock.LOG_PATH,
                end_pattern=HostLock.END.format(host),
                start_pattern=HostLock.START.format(host),
                start_path=HostLock.START_PATH,
                init_time=init_time,
                uptime=5,
                fail_ok=False)
Ejemplo n.º 19
0
def test_lock_unlock_host(host_type, collect_kpi):
    """
    Verify lock unlock host

    Test Steps:
        - Select a host per given type. If type is controller, select standby controller.
        - Lock selected host and ensure it is successfully locked
        - Unlock selected host and ensure it is successfully unlocked

    """
    init_time = None
    if collect_kpi:
        init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT)

    LOG.tc_step("Select a {} node from system if any".format(host_type))
    if host_type == 'controller':
        if system_helper.is_aio_simplex():
            host = 'controller-0'
        else:
            host = system_helper.get_standby_controller_name()
            assert host, "No standby controller available"

    else:
        if host_type == 'compute' and (system_helper.is_aio_duplex()
                                       or system_helper.is_aio_simplex()):
            skip("No compute host on AIO system")
        elif host_type == 'storage' and not system_helper.is_storage_system():
            skip("System does not have storage nodes")

        hosts = system_helper.get_hosts(personality=host_type,
                                        availability=HostAvailState.AVAILABLE,
                                        operational=HostOperState.ENABLED)

        assert hosts, "No good {} host on system".format(host_type)
        host = hosts[0]

    LOG.tc_step(
        "Lock {} host - {} and ensure it is successfully locked".format(
            host_type, host))
    HostsToRecover.add(host)
    host_helper.lock_host(host, swact=False)

    # wait for services to stabilize before unlocking
    time.sleep(20)

    # unlock standby controller node and verify controller node is successfully unlocked
    LOG.tc_step(
        "Unlock {} host - {} and ensure it is successfully unlocked".format(
            host_type, host))
    host_helper.unlock_host(host)

    LOG.tc_step("Check helm list after host unlocked")
    con_ssh = ControllerClient.get_active_controller()
    con_ssh.exec_cmd('helm list', fail_ok=False)

    if collect_kpi:
        lock_kpi_name = HostLock.NAME.format(host_type)
        unlock_kpi_name = HostUnlock.NAME.format(host_type)
        unlock_host_type = host_type
        if container_helper.is_stx_openstack_deployed():
            if system_helper.is_aio_system():
                unlock_host_type = 'compute'
        else:
            lock_kpi_name += '_platform'
            unlock_kpi_name += '_platform'
            if unlock_host_type == 'compute':
                unlock_host_type = 'compute_platform'

        LOG.info("Collect kpi for lock/unlock {}".format(host_type))
        code_lock, out_lock = kpi_log_parser.record_kpi(
            local_kpi_file=collect_kpi,
            kpi_name=lock_kpi_name,
            host=None,
            log_path=HostLock.LOG_PATH,
            end_pattern=HostLock.END.format(host),
            start_pattern=HostLock.START.format(host),
            start_path=HostLock.START_PATH,
            init_time=init_time)

        time.sleep(30)  # delay in sysinv log vs nova hypervisor list
        code_unlock, out_unlock = kpi_log_parser.record_kpi(
            local_kpi_file=collect_kpi,
            kpi_name=unlock_kpi_name,
            host=None,
            log_path=HostUnlock.LOG_PATH,
            end_pattern=HostUnlock.END[unlock_host_type].format(host),
            init_time=init_time,
            start_pattern=HostUnlock.START.format(host),
            start_path=HostUnlock.START_PATH)

        assert code_lock == 0, 'Failed to collect kpi for host-lock {}. ' \
                               'Error: \n'.format(host, out_lock)
        assert code_unlock == 0, 'Failed to collect kpi for host-unlock {}. ' \
                                 'Error: \n'.format(host, out_lock)