def check_interfaces_and_services(dut,
                                  interfaces,
                                  xcvr_skip_list,
                                  reboot_type=None):
    """
    Perform a further check after reboot-cause, including transceiver status, interface status
    @param localhost: The Localhost object.
    @param dut: The AnsibleHost object of DUT.
    @param interfaces: DUT's interfaces defined by minigraph
    """
    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(dut)

    if reboot_type is not None:
        logging.info("Check reboot cause")
        assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \
            "got reboot-cause failed after rebooted by %s" % reboot_type

        if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
            logging.info(
                "Further checking skipped for %s test which intends to verify reboot-cause only"
                % reboot_type)
            return

    if dut.is_supervisor_node():
        logging.info("skipping interfaces related check for supervisor")
    else:
        logging.info(
            "Wait {} seconds for all the transceivers to be detected".format(
                MAX_WAIT_TIME_FOR_INTERFACES))
        result = wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20,
                            check_all_interface_information, dut, interfaces,
                            xcvr_skip_list)
        assert result, "Not all transceivers are detected or interfaces are up in {} seconds".format(
            MAX_WAIT_TIME_FOR_INTERFACES)

        logging.info("Check transceiver status")
        for asic_index in dut.get_frontend_asic_ids():
            # Get the interfaces pertaining to that asic
            interface_list = get_port_map(dut, asic_index)
            interfaces_per_asic = {
                k: v
                for k, v in interface_list.items() if k in interfaces
            }
            check_transceiver_basic(dut, asic_index, interfaces_per_asic,
                                    xcvr_skip_list)

        logging.info("Check pmon daemon status")
        assert check_pmon_daemon_status(dut), "Not all pmon daemons running."

    if dut.facts["asic_type"] in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)
Exemple #2
0
def test_config_reload_toggle_reset(duthost, k8scluster):
    """
    Test case to ensure that when DUT is joined to master (disable=false, unsaved) but config is saved with disable=true, DUT resets from master after config reload

    Saves config with disable=true

    Joins master, which sets disable=false unsaved

    Performs config reload

    Ensures that DUT has reset from the master after config reload, as disable=true was saved 
    
    Args:
        duthost: DUT host object
        k8scluster: shortcut fixture for getting cluster of Kubernetes master hosts
    """
    dut_cmds = ['sudo config kube server disable on',
                'sudo config save -y']
    duthost.shell_cmds(cmds=dut_cmds)

    ku.join_master(duthost, k8scluster.vip) 

    config_reload(duthost)
    wait_critical_processes(duthost)

    server_connect_exp_status = False
    server_connect_act_status = ku.check_connected(duthost)
    server_connect_status_updated = ku.poll_for_status_change(duthost, server_connect_exp_status)
    pytest_assert(server_connect_status_updated, "Unexpected k8s server connection status after config reload, Expected server connected status: {}, Found server connected status: {}".format(server_connect_exp_status, server_connect_act_status))
Exemple #3
0
def test_power_off_reboot(duthosts, enum_rand_one_per_hwsku_hostname,
                          localhost, conn_graph_facts, xcvr_skip_list,
                          pdu_controller, power_off_delay):
    """
    @summary: This test case is to perform reboot via powercycle and check platform status
    @param duthost: Fixture for DUT AnsibleHost object
    @param localhost: Fixture for interacting with localhost through ansible
    @param conn_graph_facts: Fixture parse and return lab connection graph
    @param xcvr_skip_list: list of DUT's interfaces for which transeiver checks are skipped
    @param pdu_controller: The python object of psu controller
    @param power_off_delay: Pytest parameter. The delay between turning off and on the PSU
    """
    duthost = duthosts[enum_rand_one_per_hwsku_hostname]
    pdu_ctrl = pdu_controller
    if pdu_ctrl is None:
        pytest.skip(
            "No PSU controller for %s, skip rest of the testing in this case" %
            duthost.hostname)

    all_outlets = pdu_ctrl.get_outlet_status()
    # If PDU supports returning output_watts, making sure that all outlets has power.
    no_power = [
        item for item in all_outlets if int(item.get('output_watts', '1')) == 0
    ]
    pytest_assert(not no_power,
                  "Not all outlets have power output: {}".format(no_power))

    # Purpose of this list is to control sequence of turning on PSUs in power off testing.
    # If there are 2 PSUs, then 3 scenarios would be covered:
    # 1. Turn off all PSUs, turn on PSU1, then check.
    # 2. Turn off all PSUs, turn on PSU2, then check.
    # 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check.
    power_on_seq_list = []
    if all_outlets:
        power_on_seq_list = [[item] for item in all_outlets]
        power_on_seq_list.append(all_outlets)

    logging.info("Got all power on sequences {}".format(power_on_seq_list))

    poweroff_reboot_kwargs = {"dut": duthost}

    try:
        for power_on_seq in power_on_seq_list:
            poweroff_reboot_kwargs["pdu_ctrl"] = pdu_ctrl
            poweroff_reboot_kwargs["all_outlets"] = all_outlets
            poweroff_reboot_kwargs["power_on_seq"] = power_on_seq
            poweroff_reboot_kwargs["delay_time"] = power_off_delay
            reboot_and_check(localhost, duthost,
                             conn_graph_facts["device_conn"][duthost.hostname],
                             xcvr_skip_list, REBOOT_TYPE_POWEROFF,
                             _power_off_reboot_helper, poweroff_reboot_kwargs)
    except Exception as e:
        logging.debug("Restore power after test failure")
        for outlet in all_outlets:
            logging.debug("turning on {}".format(outlet))
            pdu_ctrl.turn_on_outlet(outlet)
        # Sleep 120 for dut to boot up
        time.sleep(120)
        wait_critical_processes(duthost)
        raise e
Exemple #4
0
def test_config_reload_toggle_join(duthost, k8scluster):
    """
    Test case to ensure that when DUT is not joined to the master due to (unsaved) disable=true, but config is saved with disable=false, DUT joins after config reload

    Saves config with configured VIP and disable=false

    Sets disable=true without saving config, and ensure that DUT resets from master

    Performs config reload

    Ensures that DUT is joined to master after config reload

    Args:
        duthost: DUT host object
        k8scluster: shortcut fixture for getting cluster of Kubernetes master hosts
    """
    dut_cmds = ['sudo config kube server ip {}'.format(k8scluster.vip),
                'sudo config kube server disable off',
                'sudo config save -y']
    duthost.shell_cmds(cmds=dut_cmds)
  
    duthost.shell('sudo config kube server disable on')
    server_connect_exp_status = False
    server_connect_act_status = ku.check_connected(duthost)
    server_connect_status_updated = ku.poll_for_status_change(duthost, server_connect_exp_status)
    pytest_assert(server_connect_status_updated, "Unexpected k8s server connection status after setting disable=true, Expected server connected status: {}, Found server connected status: {}".format(server_connect_exp_status, server_connect_act_status))
    
    config_reload(duthost)
    wait_critical_processes(duthost)

    server_connect_exp_status = True
    server_connect_act_status = ku.check_connected(duthost)
    server_connect_status_updated = ku.poll_for_status_change(duthost, server_connect_exp_status)
    pytest_assert(server_connect_status_updated, "Unexpected k8s server connection status after config reload, Expected server connected status: {}, Found server connected status: {}".format(server_connect_exp_status, server_connect_act_status))
Exemple #5
0
def test_reload_configuration(duthost, conn_graph_facts):
    """
    @summary: This test case is to reload the configuration and check platform status
    """
    interfaces = conn_graph_facts["device_conn"]
    asic_type = duthost.facts["asic_type"]

    logging.info("Reload configuration")
    duthost.shell("sudo config reload -y &>/dev/null", executable="/bin/bash")

    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(duthost)

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, check_interface_information, duthost, interfaces), \
        "Not all transceivers are detected in 300 seconds"

    logging.info("Check transceiver status")
    check_transceiver_basic(duthost, interfaces)

    if asic_type in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(duthost)

        logging.info("Check sysfs")
        check_sysfs(duthost)
def restart_service_and_check(localhost, dut, service, interfaces):
    """
    Restart specified service and check platform status
    """

    logging.info("Restart the %s service" % service)
    dut.command("sudo systemctl restart %s" % service)

    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(dut)

    logging.info("Wait some time for all the transceivers to be detected")
    pytest_assert(
        wait_until(300, 20, check_interface_information, dut, interfaces),
        "Not all interface information are detected within 300 seconds")

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    if dut.facts["asic_type"] in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)

    logging.info("Check that critical processes are healthy for 60 seconds")
    check_critical_processes(dut, 60)
Exemple #7
0
    def test_memory_exhaustion(self, duthost, localhost):
        dut_ip = duthost.mgmt_ip
        hostname = duthost.hostname
        dut_datetime = duthost.get_now_time()

        # Use `tail /dev/zero` to run out of memory completely. Since this command will cause the
        # DUT reboot, we need to run it in the background (using &) to avoid pytest getting stuck.
        # We also need to add `nohup` to protect it.
        cmd = 'nohup tail /dev/zero &'
        res = duthost.shell(cmd)
        if not res.is_successful:
            pytest.fail('DUT {} run command {} failed'.format(hostname, cmd))

        # Waiting for SSH connection shutdown
        pytest_assert(
            self.check_ssh_state(localhost, dut_ip, SSH_STATE_ABSENT,
                                 SSH_SHUTDOWN_TIMEOUT),
            'DUT {} did not shutdown'.format(hostname))
        # Waiting for SSH connection startup
        pytest_assert(
            self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED,
                                 SSH_STARTUP_TIMEOUT),
            'DUT {} did not startup'.format(hostname))
        # Wait until all critical processes are healthy.
        wait_critical_processes(duthost)
        # Verify DUT uptime is later than the time when the test case started running.
        dut_uptime = duthost.get_up_time()
        pytest_assert(dut_uptime > dut_datetime,
                      "Device {} did not reboot".format(hostname))
Exemple #8
0
def test_features_state(duthosts, enum_dut_hostname, localhost):
    """Checks whether the state of each feature is valid or not.
    Args:
      duthosts: Fixture returns a list of Ansible object DuT.
      enum_dut_hostname: Fixture returns name of DuT.

    Returns:
      None.
    """
    duthost = duthosts[enum_dut_hostname]
    logger.info("Checking the state of each feature in 'CONFIG_DB' ...")
    if not wait_until(180, FEATURE_STATE_VERIFYING_INTERVAL_SECS, 0,
                      verify_features_state, duthost):
        logger.warn(
            "Not all states of features in 'CONFIG_DB' are valid, rebooting DUT {}"
            .format(duthost.hostname))
        reboot(duthost, localhost)
        # Some services are not ready immeidately after reboot
        wait_critical_processes(duthost)

    pytest_assert(
        wait_until(FEATURE_STATE_VERIFYING_THRESHOLD_SECS,
                   FEATURE_STATE_VERIFYING_INTERVAL_SECS, 0,
                   verify_features_state, duthost),
        "Not all service states are valid!")
    logger.info("The states of features in 'CONFIG_DB' are all valid.")
Exemple #9
0
def reload_testbed(duthosts, enum_rand_one_per_hwsku_frontend_hostname):
    """
        Reload dut after test function finished
    """
    duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
    yield None
    logging.info("Reloading config and restarting swss...")
    config_reload(duthost)
    wait_critical_processes(duthost)
def test_interface_binding(duthosts, rand_one_dut_hostname, dut_dhcp_relay_data):
    duthost = duthosts[rand_one_dut_hostname]
    skip_release(duthost, ["201811", "201911", "202106"])
    config_reload(duthost)
    wait_critical_processes(duthost)
    wait_until(120, 5, 0, check_interface_status, duthosts, rand_one_dut_hostname)
    output = duthost.shell("docker exec -it dhcp_relay ss -nlp | grep dhcp6relay")["stdout"].encode("utf-8")
    logger.info(output)
    for dhcp_relay in dut_dhcp_relay_data:
        assert "*:{}".format(dhcp_relay['downlink_vlan_iface']['name']) in output, "{} is not found in {}".format("*:{}".format(dhcp_relay['downlink_vlan_iface']['name']), output)
Exemple #11
0
def reload_testbed_on_failed(request, duthosts,
                             enum_rand_one_per_hwsku_frontend_hostname):
    """
        Reload dut after test function finished
    """
    duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
    yield None
    if request.node.rep_call.failed:
        # if test case failed, means bgp session down or port channel status not recovered, execute config reload
        logging.info("Reloading config and restarting swss...")
        config_reload(duthost)
        wait_critical_processes(duthost)
Exemple #12
0
def test_interface_binding(duthosts, rand_one_dut_hostname, dut_dhcp_relay_data):
    duthost = duthosts[rand_one_dut_hostname]
    skip_release(duthost, ["201811", "201911", "202106"])
    if not check_interface_status(duthost):
        config_reload(duthost)
        wait_critical_processes(duthost)
        pytest_assert(wait_until(120, 5, 0, check_interface_status, duthost))
    output = duthost.shell("docker exec -it dhcp_relay ss -nlp | grep dhcrelay", module_ignore_errors=True)["stdout"].encode("utf-8")
    logger.info(output)
    for dhcp_relay in dut_dhcp_relay_data:
        assert "{}:67".format(dhcp_relay['downlink_vlan_iface']['name']) in output, "{} is not found in {}".format("{}:67".format(dhcp_relay['downlink_vlan_iface']['name']), output)
        for iface in dhcp_relay['uplink_interfaces']:
            assert "{}:67".format(iface) in output, "{} is not found in {}".format("{}:67".format(iface), output)
Exemple #13
0
def test_TSA_B_C_with_no_neighbors(duthost, bgpmon_setup_teardown, nbrhosts, tbinfo):
    """
    Test TSA, TSB, TSC with no neighbors on ASIC0 in case of multi-asic and single-asic.
    """
    bgp_neighbors = {}
    asic_index = 0 if duthost.is_multi_asic else DEFAULT_ASIC_ID


    try:

        routes_4 = parse_rib(duthost, 4)
        routes_6 = parse_rib(duthost, 6)
        # Remove the Neighbors for the particular BGP instance
        bgp_neighbors = remove_bgp_neighbors(duthost, asic_index)

        # Issue TSA on DUT
        output = duthost.shell("TSA")['stdout_lines']

        # Set the DUT in maintenance state
        # Verify ASIC0 has no neighbors message.
        pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors")

        # Recover to Normal state
        duthost.shell("TSB")['stdout_lines']

        # Verify DUT is in Normal state, and ASIC0 has no neighbors message.
        pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors")

        # Check the traffic state
        duthost.shell("TSC")['stdout_lines']

        # Verify DUT is in Normal state, and ASIC0 has no neighbors message.
        pytest_assert(verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS, asic_index), "ASIC is not having no neighbors")

    finally:
        # Restore BGP neighbors
        restore_bgp_neighbors(duthost, asic_index, bgp_neighbors)

        # Recover to Normal state
        duthost.shell("TSB")
        wait_critical_processes(duthost)

        # Wait until bgp sessions are established on DUT
        pytest_assert(wait_until(100, 10, 0, duthost.check_bgp_session_state, bgp_neighbors.keys()),
                      "Not all BGP sessions are established on DUT")

        # Wait until all routes are announced to neighbors
        pytest_assert(wait_until(300, 3, 0, verify_all_routes_announce_to_neighs,duthost, nbrhosts, routes_4, 4),
                      "Not all ipv4 routes are announced to neighbors")
        pytest_assert(wait_until(300, 3, 0, verify_all_routes_announce_to_neighs,duthost, nbrhosts, routes_6, 6),
                      "Not all ipv6 routes are announced to neighbors")
Exemple #14
0
def restart_service_and_check(localhost, dut, enum_frontend_asic_index,
                              service, interfaces, xcvr_skip_list):
    """
    Restart specified service and check platform status
    """
    logging.info("Restart the %s service on asic %s" %
                 (service, enum_frontend_asic_index))

    asichost = dut.asic_instance(enum_frontend_asic_index)
    service_name = asichost.get_docker_name(service)
    dut.command("sudo systemctl restart {}".format(service_name))

    for container in dut.get_default_critical_services_list():
        if is_service_hiting_start_limit(dut, container) is True:
            logging.info(
                "{} hits start limit and clear reset-failed flag".format(
                    container))
            dut.shell(
                "sudo systemctl reset-failed {}.service".format(container))
            dut.shell("sudo systemctl start {}.service".format(container))

    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(dut)

    logging.info("Wait some time for all the transceivers to be detected")
    pytest_assert(
        wait_until(300, 20, check_interface_information, dut,
                   enum_frontend_asic_index, interfaces, xcvr_skip_list),
        "Not all interface information are detected within 300 seconds")

    logging.info("Check transceiver status on asic %s" %
                 enum_frontend_asic_index)
    check_transceiver_basic(dut, enum_frontend_asic_index, interfaces,
                            xcvr_skip_list)

    if dut.facts["asic_type"] in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)

    logging.info("Check that critical processes are healthy for 60 seconds")
    check_critical_processes(dut, 60)
Exemple #15
0
def test_TSA_B_C_with_no_neighbors(duthost, bgpmon_setup_teardown):
    """
    Test TSA, TSB, TSC with no neighbors on ASIC0 in case of multi-asic and single-asic.
    """
    bgp_neighbors = {}
    asic_index = 0 if duthost.is_multi_asic else DEFAULT_ASIC_ID

    try:
        # Remove the Neighbors for the particular BGP instance
        bgp_neighbors = remove_bgp_neighbors(duthost, asic_index)

        # Issue TSA on DUT
        output = duthost.shell("TSA")['stdout_lines']

        # Set the DUT in maintenance state
        # Verify ASIC0 has no neighbors message.
        pytest_assert(
            verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS,
                                          asic_index),
            "ASIC is not having no neighbors")

        # Recover to Normal state
        duthost.shell("TSB")['stdout_lines']

        # Verify DUT is in Normal state, and ASIC0 has no neighbors message.
        pytest_assert(
            verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS,
                                          asic_index),
            "ASIC is not having no neighbors")

        # Check the traffic state
        duthost.shell("TSC")['stdout_lines']

        # Verify DUT is in Normal state, and ASIC0 has no neighbors message.
        pytest_assert(
            verify_traffic_shift_per_asic(duthost, output, TS_NO_NEIGHBORS,
                                          asic_index),
            "ASIC is not having no neighbors")

    finally:
        # Restore BGP neighbors
        restore_bgp_neighbors(duthost, asic_index, bgp_neighbors)

        # Recover to Normal state
        duthost.shell("TSB")
        wait_critical_processes(duthost)
def test_memory_exhaustion(duthosts, enum_frontend_dut_hostname, localhost):
    """validate kernel will panic and reboot the DUT when runs out of memory and hits oom event"""

    duthost = duthosts[enum_frontend_dut_hostname]
    dut_ip = duthost.mgmt_ip
    hostname = duthost.hostname
    dut_datetime = duthost.get_now_time()

    # Use `tail /dev/zero` to run out of memory completely. Since this command will cause DUT reboot,
    # we need to run it in the background (using &) to avoid pytest getting stuck. We also need to
    # add `nohup` to protect it.
    cmd = 'nohup tail /dev/zero &'
    res = duthost.shell(cmd)
    if not res.is_successful:
        raise Exception('DUT {} run command {} failed'.format(hostname, cmd))

    logging.info('waiting for ssh to drop on {}'.format(hostname))
    res = localhost.wait_for(host=dut_ip,
                             port=SONIC_SSH_PORT,
                             state='absent',
                             search_regex=SONIC_SSH_REGEX,
                             delay=10,
                             timeout=120,
                             module_ignore_errors=True)
    pytest_assert(not res.is_failed and 'Timeout' not in res.get('msg', ''),
                  'DUT {} did not shutdown'.format(hostname))

    logging.info('waiting for ssh to startup on {}'.format(hostname))
    res = localhost.wait_for(host=dut_ip,
                             port=SONIC_SSH_PORT,
                             state='started',
                             search_regex=SONIC_SSH_REGEX,
                             delay=10,
                             timeout=120,
                             module_ignore_errors=True)
    pytest_assert(not res.is_failed and 'Timeout' not in res.get('msg', ''),
                  'DUT {} did not startup'.format(hostname))

    # Wait until all critical processes are healthy.
    wait_critical_processes(duthost)

    # Verify DUT uptime is later than the time when the test case started running.
    dut_uptime = duthost.get_up_time()
    pytest_assert(dut_uptime > dut_datetime,
                  "Device {} did not reboot".format(hostname))
def test_reload_configuration(duthosts, rand_one_dut_hostname,
                              conn_graph_facts, xcvr_skip_list):
    """
    @summary: This test case is to reload the configuration and check platform status
    """
    duthost = duthosts[rand_one_dut_hostname]
    interfaces = conn_graph_facts["device_conn"][duthost.hostname]
    asic_type = duthost.facts["asic_type"]

    if config_force_option_supported(duthost):
        assert wait_until(300, 20, 0, config_system_checks_passed, duthost)

    logging.info("Reload configuration")
    duthost.shell("sudo config reload -y &>/dev/null", executable="/bin/bash")

    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(duthost)

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, 0, check_all_interface_information, duthost, interfaces, xcvr_skip_list), \
        "Not all transceivers are detected in 300 seconds"

    logging.info("Check transceiver status")
    for asic_index in duthost.get_frontend_asic_ids():
        # Get the interfaces pertaining to that asic
        interface_list = get_port_map(duthost, asic_index)
        interfaces_per_asic = {
            k: v
            for k, v in interface_list.items() if k in interfaces
        }
        check_transceiver_basic(duthost, asic_index, interfaces_per_asic,
                                xcvr_skip_list)

    if asic_type in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(duthost)

        logging.info("Check sysfs")
        check_sysfs(duthost)
Exemple #18
0
def check_interfaces_and_services(dut, interfaces, reboot_type=None):
    """
    Perform a further check after reboot-cause, including transceiver status, interface status
    @param localhost: The Localhost object.
    @param dut: The AnsibleHost object of DUT.
    @param interfaces: DUT's interfaces defined by minigraph
    """
    logging.info("Wait until all critical services are fully started")
    wait_critical_processes(dut)

    if reboot_type is not None:
        logging.info("Check reboot cause")
        assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \
            "got reboot-cause failed after rebooted by %s" % reboot_type

        if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
            logging.info(
                "Further checking skipped for %s test which intends to verify reboot-cause only"
                % reboot_type)
            return

    logging.info("Wait %d seconds for all the transceivers to be detected" %
                 MAX_WAIT_TIME_FOR_INTERFACES)
    assert wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, dut, interfaces), \
        "Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    logging.info("Check pmon daemon status")
    assert check_pmon_daemon_status(dut), "Not all pmon daemons running."

    if dut.facts["asic_type"] in ["mellanox"]:

        from .mellanox.check_hw_mgmt_service import check_hw_management_service
        from .mellanox.check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)
Exemple #19
0
 def teardown(self, duthost, localhost, pdu_controller):
     yield
     # If the SSH connection is not established, or any critical process is exited,
     # try to recover the DUT by PDU reboot.
     dut_ip = duthost.mgmt_ip
     hostname = duthost.hostname
     if not self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED):
         if pdu_controller is None:
             logging.error(
                 "No PDU controller for {}, failed to recover DUT!".format(
                     hostname))
             return
         self.pdu_reboot(pdu_controller)
         # Waiting for SSH connection startup
         pytest_assert(
             self.check_ssh_state(localhost, dut_ip, SSH_STATE_STARTED,
                                  SSH_STARTUP_TIMEOUT),
             'Recover {} by PDU reboot failed'.format(hostname))
         # Wait until all critical processes are healthy.
         wait_critical_processes(duthost)
Exemple #20
0
def test_config_reload_no_toggle(duthost, k8scluster):
    """
    Test case to ensure that when DUT starts as joined to master, and config is saved with disable=false, DUT is still joined to master after config reload
    
    Joins master

    Performs config reload

    Ensures that DUT is still joined to master after config reload

    Args:
        duthost: DUT host object
        k8scluster: shortcut fixture for getting cluster of Kubernetes master hosts
    """
    ku.join_master(duthost, k8scluster.vip) # Assertion within to ensure successful join
    duthost.shell('sudo config save -y')
    config_reload(duthost)
    wait_critical_processes(duthost)

    server_connect_exp_status = True
    server_connect_act_status = ku.check_connected(duthost)
    server_connect_status_updated = ku.poll_for_status_change(duthost, server_connect_exp_status)
    pytest_assert(server_connect_status_updated, "Unexpected k8s server connection status after config reload, Expected server connected status: {}, Found server connected status: {}".format(server_connect_exp_status, server_connect_act_status))