Beispiel #1
0
def setup_ntp(ptfhost, duthost):
    """setup ntp client and server"""

    # enable ntp server
    ptfhost.service(name="ntp", state="started")

    # setup ntp on dut to sync with ntp server
    config_facts  = duthost.config_facts(host=duthost.hostname, source="running")['ansible_facts']
    ntp_servers = config_facts.get('NTP_SERVER', {})
    for ntp_server in ntp_servers:
        duthost.command("config ntp del %s" % ntp_server)

    ptfip = ptfhost.host.options['inventory_manager'].get_host(ptfhost.hostname).vars['ansible_host']
    duthost.command("config ntp add %s" % ptfip)

    wait_until(120, 5, check_ntp_status, ptfhost)

    yield

    # stop ntp server
    ptfhost.service(name="ntp", state="stopped")

    # reset ntp client configuration
    duthost.command("config ntp del %s" % ptfip)
    for ntp_server in ntp_servers:
        duthost.command("config ntp add %s" % ntp_server)
Beispiel #2
0
    def setup_reboot_standby(self, duthost2, localhost, delay=10, timeout=180):
        dut2_ports = natsorted(
            g_vars['dut2_port_alias']
            ['port_name_map'].keys())[:len(g_vars['dut2_all_interfaces'])]
        for port in dut2_ports:
            duthost2.shell("config interface shutdown {}".format(port))
        duthost2.shell("config save -y")
        duthost2.shell("nohup reboot &", module_ignore_errors=True)
        time.sleep(20)

        yield
        # waiting for ssh to startup
        dut_ip = duthost2.host.options['inventory_manager'].get_host(
            duthost2.hostname).address
        localhost.wait_for(host=dut_ip,
                           port=SONIC_SSH_PORT,
                           state='started',
                           search_regex=SONIC_SSH_REGEX,
                           delay=delay,
                           timeout=timeout)

        wait_until(120, 10, duthost2.critical_services_fully_started)
        for port in dut2_ports:
            duthost2.shell("config interface startup {}".format(port))
        duthost2.shell("config save -y")
        time.sleep(5)
def turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, mocker):
    """
    @summary: Turn off PSUs, check all FAN speed are set to 100% according to thermal
              control policy file.
    """
    logging.info("Turn off PSU %s" % str(psu["psu_id"]))
    psu_ctrl.turn_off_psu(psu["psu_id"])
    time.sleep(5)

    psu_under_test = None
    psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)")
    cli_psu_status = dut.command(CMD_PLATFORM_PSUSTATUS)
    for line in cli_psu_status["stdout_lines"][2:]:
        assert psu_line_pattern.match(line), "Unexpected PSU status output"
        fields = line.split()
        if fields[2] != "OK":
            psu_under_test = fields[1]

    assert psu_under_test is not None, "No PSU is turned off"
    logging.info('Wait and check all FAN speed turn to 100%...')
    wait_until(THERMAL_CONTROL_TEST_WAIT_TIME,
               THERMAL_CONTROL_TEST_CHECK_INTERVAL, mocker.check_all_fan_speed,
               100)

    psu_ctrl.turn_on_psu(psu["psu_id"])
def test_thermal_control_fan_status(testbed_devices, mocker_factory):
    """
    @summary: Make FAN absence, over speed and under speed, check logs and LED color.
    """
    dut = testbed_devices["dut"]
    loganalyzer = LogAnalyzer(ansible_host=dut, marker_prefix='thermal_control')
    loganalyzer.load_common_config()

    with ThermalPolicyFileContext(dut, THERMAL_POLICY_VALID_FILE):
        fan_mocker = mocker_factory(dut, 'FanStatusMocker')
        if fan_mocker is None:
            pytest.skip("No FanStatusMocker for %s, skip rest of the testing in this case" % dut.facts['asic_type'])

        logging.info('Mock FAN status data...')
        fan_mocker.mock_data()  # make data random
        restart_thermal_control_daemon(dut)
        wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed,
                   60)
        check_thermal_algorithm_status(dut, mocker_factory, False)

        single_fan_mocker = mocker_factory(dut, 'SingleFanMocker')
        time.sleep(THERMAL_CONTROL_TEST_WAIT_TIME)

        if single_fan_mocker.is_fan_removable():
            loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_RE]
            with loganalyzer:
                logging.info('Mocking an absence FAN...')
                single_fan_mocker.mock_absence()
                check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)

            loganalyzer.expect_regex = [LOG_EXPECT_FAN_REMOVE_CLEAR_RE]
            with loganalyzer:
                logging.info('Make the absence FAN back to presence...')
                single_fan_mocker.mock_presence()
                check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_RE]
        with loganalyzer:
            logging.info('Mocking an over speed FAN...')
            single_fan_mocker.mock_over_speed()
            check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_OVER_SPEED_CLEAR_RE]
        with loganalyzer:
            logging.info('Make the over speed FAN back to normal...')
            single_fan_mocker.mock_normal_speed()
            check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_RE]
        with loganalyzer:
            logging.info('Mocking an under speed FAN...')
            single_fan_mocker.mock_under_speed()
            check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)

        loganalyzer.expect_regex = [LOG_EXPECT_FAN_UNDER_SPEED_CLEAR_RE]
        with loganalyzer:
            logging.info('Make the under speed FAN back to normal...')
            single_fan_mocker.mock_normal_speed()
            check_cli_output_with_mocker(dut, single_fan_mocker, CMD_PLATFORM_FANSTATUS, THERMAL_CONTROL_TEST_WAIT_TIME)
def test_set_psu_fan_speed(duthost, mocker_factory):
    hwsku = duthost.facts["hwsku"]
    psu_num = SWITCH_MODELS[hwsku]['psus']['number']
    hot_swappable = SWITCH_MODELS[hwsku]['psus']['hot_swappable']
    if not hot_swappable:
        pytest.skip(
            'The SKU {} does not support this test case.'.format(hwsku))

    logging.info('Create mocker, it may take a few seconds...')
    single_fan_mocker = mocker_factory(duthost, 'SingleFanMocker')
    logging.info('Mock FAN absence...')
    single_fan_mocker.mock_absence()
    assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, duthost, 10, operator.eq), \
        'Current cooling state is {}'.format(get_cooling_cur_state(duthost))

    logging.info('Wait {} seconds for the policy to take effect...'.format(
        THERMAL_CONTROL_TEST_CHECK_INTERVAL))
    time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL)
    full_speeds = []
    for index in range(psu_num):
        speed = get_psu_speed(duthost, index)
        full_speeds.append(speed)

    logging.info('Full speed={}'.format(full_speeds))
    logging.info('Mock FAN presence...')
    single_fan_mocker.mock_presence()
    assert wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, check_cooling_cur_state, duthost, 10, operator.ne), \
        'Current cooling state is {}'.format(get_cooling_cur_state(duthost))
    logging.info('Wait {} seconds for the policy to take effect...'.format(
        THERMAL_CONTROL_TEST_CHECK_INTERVAL))
    time.sleep(THERMAL_CONTROL_TEST_CHECK_INTERVAL)
    cooling_cur_state = get_cooling_cur_state(duthost)
    logging.info('Cooling level changed to {}'.format(cooling_cur_state))
    current_speeds = []
    for index in range(psu_num):
        speed = get_psu_speed(duthost, index)
        current_speeds.append(speed)

    logging.info('Current speed={}'.format(current_speeds))
    index = 0
    if cooling_cur_state < 6:
        cooling_cur_state = 6
    expect_multiple = float(10) / cooling_cur_state
    while index < psu_num:
        full_speed = full_speeds[index]
        current_speed = current_speeds[index]
        index += 1
        if not full_speed or not current_speed:
            continue

        actual_multiple = float(full_speed) / current_speed
        if expect_multiple > actual_multiple:
            assert actual_multiple > expect_multiple * (1 -
                                                        PSU_SPEED_TOLERANCE)
        elif expect_multiple < actual_multiple:
            assert actual_multiple < expect_multiple * (1 +
                                                        PSU_SPEED_TOLERANCE)
Beispiel #6
0
    def __verify_lag_minlink(self,
                             host,
                             lag_name,
                             intf,
                             neighbor_intf,
                             po_interfaces,
                             po_flap,
                             deselect_time,
                             wait_timeout=30):
        delay = 5

        try:
            host.shutdown(neighbor_intf)

            # Let PortalChannel react to neighbor interface shutdown
            time.sleep(deselect_time)

            # Verify PortChannel interfaces are up correctly
            for po_intf in po_interfaces.keys():
                if po_intf != intf:
                    command = 'bash -c "teamdctl %s state dump" | python -c "import sys, json; print json.load(sys.stdin)[\'ports\'][\'%s\'][\'runner\'][\'selected\']"' % (
                        lag_name, po_intf)
                    wait_until(wait_timeout, delay, self.__check_shell_output,
                               self.duthost, command)

            # Refresh lag facts
            lag_facts = self.__get_lag_facts()

            # Verify lag member is marked deselected for the shutdown port and all other lag member interfaces are marked selected
            for po_intf in po_interfaces.keys():
                if po_intf != intf:
                    assert lag_facts['lags'][lag_name]['po_stats']['ports'][
                        po_intf]['runner']['selected']
                else:
                    assert not lag_facts['lags'][lag_name]['po_stats'][
                        'ports'][po_intf]['runner']['selected']

            # Verify PortChannel's interface are marked down/up correctly if it should down/up
            if po_flap == True:
                assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Down'
            else:
                assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Up'
        finally:
            # Bring back port in case test error and left testbed in unknow stage
            # Bring up neighbor interface
            host.no_shutdown(neighbor_intf)

            # Verify PortChannel interfaces are up correctly
            for po_intf in po_interfaces.keys():
                if po_intf != intf:
                    command = 'bash -c "teamdctl %s state dump" | python -c "import sys, json; print json.load(sys.stdin)[\'ports\'][\'%s\'][\'link\'][\'up\']"' % (
                        lag_name, po_intf)
                    wait_until(wait_timeout, delay, self.__check_shell_output,
                               self.duthost, command)
def test_thermal_control_psu_absence(testbed_devices, psu_controller, mocker_factory):
    """
    @summary: Turn off/on PSUs, check thermal control is working as expect.
    """
    dut = testbed_devices["dut"]
    psu_num = get_psu_num(dut)
    if psu_num < 2:
        pytest.skip("At least 2 PSUs required for rest of the testing in this case")

    logging.info("Create PSU controller for testing")
    psu_ctrl = psu_controller
    if psu_ctrl is None:
        pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % dut.hostname)

    logging.info("To avoid DUT being shutdown, need to turn on PSUs that are not powered")
    turn_all_psu_on(psu_ctrl)

    logging.info("Initialize test results")
    psu_test_results = {}
    if not check_all_psu_on(dut, psu_test_results):
        pytest.skip("Some PSU are still down, skip rest of the testing in this case")

    with ThermalPolicyFileContext(dut, THERMAL_POLICY_VALID_FILE):
        fan_mocker = mocker_factory(dut, 'FanStatusMocker')
        if fan_mocker is None:
            pytest.skip("No FanStatusMocker for %s, skip rest of the testing in this case" % dut.facts['asic_type'])

        logging.info('Mock FAN status data...')
        fan_mocker.mock_data()  # make data random
        restart_thermal_control_daemon(dut)
        logging.info('Wait and check all FAN speed turn to 60%...')
        wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed,
                   60)

        check_thermal_algorithm_status(dut, mocker_factory, False)

        logging.info('Shutdown first PSU and check thermal control result...')
        all_psu_status = psu_ctrl.get_psu_status()
        psu = all_psu_status[0]
        turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, fan_mocker)
        psu_test_results.clear()
        if not check_all_psu_on(dut, psu_test_results):
            pytest.skip("Some PSU are still down, skip rest of the testing in this case")

        logging.info('Shutdown second PSU and check thermal control result...')
        psu = all_psu_status[1]
        turn_off_psu_and_check_thermal_control(dut, psu_ctrl, psu, fan_mocker)
        psu_test_results.clear()
        if not check_all_psu_on(dut, psu_test_results):
            pytest.skip("Some PSU are still down, skip rest of the testing in this case")

        logging.info('Wait and check all FAN speed turn to 65%...')
        wait_until(THERMAL_CONTROL_TEST_WAIT_TIME, THERMAL_CONTROL_TEST_CHECK_INTERVAL, fan_mocker.check_all_fan_speed,
                   65)
Beispiel #8
0
    def run_lag_fallback_test(self, lag_name):
        logging.info("Start checking lag fall back for: %s" % lag_name)

        lag_facts = self.__get_lag_facts()
        intf, po_interfaces = self.__get_lag_intf_info(lag_facts, lag_name)
        po_fallback = lag_facts['lags'][lag_name]['po_config']['runner'][
            'fallback']

        # Figure out remote VM and interface info for the lag member and run lag fallback test
        peer_device = self.vm_neighbors[intf]['name']
        neighbor_intf = self.vm_neighbors[intf]['port']
        vm_host = self.nbrhosts[peer_device]['host']

        wait_timeout = 120
        delay = 5
        try:
            # Shut down neighbor interface
            vm_host.shutdown(neighbor_intf)
            wait_until(wait_timeout, delay, self.__check_intf_state, vm_host,
                       neighbor_intf, False)

            # Refresh lag facts
            lag_facts = self.__get_lag_facts()

            # Get teamshow result
            teamshow_result = self.duthost.shell('teamshow')
            logging.debug("Teamshow result: %s" % teamshow_result)

            # Verify lag members
            # 1. All other lag should keep selected state
            # 2. Shutdown port should keep selected state if fallback enabled
            # 3. Shutdown port should marded as deselected if fallback disabled
            #  is marked deselected for the shutdown port and all other lag member interfaces are marked selected
            for po_intf in po_interfaces.keys():
                if po_intf != intf or po_fallback:
                    assert lag_facts['lags'][lag_name]['po_stats']['ports'][
                        po_intf]['runner']['selected']
                else:
                    assert not lag_facts['lags'][lag_name]['po_stats'][
                        'ports'][po_intf]['runner']['selected']

            # The portchannel should marked Up/Down correctly according to po fallback setting
            if po_fallback:
                assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Up'
            else:
                assert lag_facts['lags'][lag_name]['po_intf_stat'] == 'Down'

        finally:
            # Bring up neighbor interface
            vm_host.no_shutdown(neighbor_intf)
            wait_until(wait_timeout, delay, self.__check_intf_state, vm_host,
                       neighbor_intf, True)
def restart_service_and_check(localhost, dut, service, interfaces):
    """
    Restart specified service and check platform status
    """

    logging.info("Restart the %s service" % service)
    dut.command("sudo systemctl restart %s" % service)

    logging.info("Wait until all critical services are fully started")
    check_critical_services(dut)

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, check_interface_information, dut, interfaces), \
        "Not all interface information are detected within 300 seconds"

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    if dut.facts["asic_type"] in ["mellanox"]:

        current_file_dir = os.path.dirname(os.path.realpath(__file__))
        sub_folder_dir = os.path.join(current_file_dir, "mellanox")
        if sub_folder_dir not in sys.path:
            sys.path.append(sub_folder_dir)
        from check_hw_mgmt_service import check_hw_management_service
        from check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)
Beispiel #10
0
    def test_fastboot(self, duthost, localhost, testbed):

        duthost.command('sudo config save -y')
        reboot(duthost, localhost, reboot_type='fast')
        assert wait_until(300, 20, duthost.critical_services_fully_started
                          ), "Not all critical services are fully started"
        self.basic_check_after_reboot(duthost, localhost, testbed)
Beispiel #11
0
    def test_standby_up(self, duthost, duthost2, ptfhost, testbed):
        dut1_status = duthost.shell(
            "mclagdctl -i {} dump state|grep 'keepalive'".format(
                g_vars['mclag_domain_id']))['stdout'].split(":")[-1].strip()
        dut2_status = duthost2.shell(
            "mclagdctl -i {} dump state|grep 'keepalive'".format(
                g_vars['mclag_domain_id']))['stdout'].split(":")[-1].strip()
        assert dut1_status == dut2_status == "OK", "Mclag keepalive status should be OK on both peers after active reboot up"

        # before send pkts, wait until standby mclag re-aggregate successfully due to router_mac change
        assert wait_until(150, 10, check_teamd_status, duthost2, g_vars['dut1_router_mac']), \
                "Standby teamd status should be up and sysid should be same as active's mac"

        ptf_runner(
            ptfhost,
            "ptftests",
            "mclag_test.MclagTest",
            platform_dir="ptftests",
            params={
                "router_mac":
                g_vars['dut1_router_mac'],
                "router_mac_dut2":
                g_vars['dut2_router_mac'],
                "testbed_type":
                testbed['topo'],
                "switch_info":
                "/tmp/mclag/mclag_switch_info_{}.txt".format(test_scenario),
                "test_scenario":
                test_scenario,
                "ignore_ports": []
            },
            log_file="/tmp/mclag/log/mclag_{}_[{}]_[{}].log".format(
                test_scenario, self.__class__.__name__,
                sys._getframe().f_code.co_name))
 def test_ntp(self, duthost):
     force_ntp=" ntpd -gq"
     duthost.service(name='ntp' , state='stopped')
     logging.info("Ntp restart in mgmt vrf")
     execute_dut_command(duthost, force_ntp)
     duthost.service(name='ntp' , state='restarted')
     assert wait_until(100, 10, self.check_ntp_status , duthost), "Ntp not started"
Beispiel #13
0
def test_ntp(duthost, setup_ntp):
    """ verify the LLDP message on DUT """

    duthost.service(name='ntp', state='stopped')
    duthost.command("ntpd -gq")
    duthost.service(name='ntp', state='restarted')
    assert wait_until(120, 5, check_ntp_status, duthost), "Ntp not in sync"
Beispiel #14
0
def test_reload_configuration(testbed_devices, conn_graph_facts):
    """
    @summary: This test case is to reload the configuration and check platform status
    """
    ans_host = testbed_devices["dut"]
    interfaces = conn_graph_facts["device_conn"]
    asic_type = ans_host.facts["asic_type"]

    logging.info("Reload configuration")
    ans_host.command("sudo config reload -y")

    logging.info("Wait until all critical services are fully started")
    check_critical_services(ans_host)

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, check_interface_information, ans_host, interfaces), \
        "Not all transceivers are detected in 300 seconds"

    logging.info("Check transceiver status")
    check_transceiver_basic(ans_host, interfaces)

    if asic_type in ["mellanox"]:

        current_file_dir = os.path.dirname(os.path.realpath(__file__))
        sub_folder_dir = os.path.join(current_file_dir, "mellanox")
        if sub_folder_dir not in sys.path:
            sys.path.append(sub_folder_dir)
        from check_hw_mgmt_service import check_hw_management_service
        from check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(ans_host)

        logging.info("Check sysfs")
        check_sysfs(ans_host)
Beispiel #15
0
def setup_ntp(ptfhost, duthost, creds):
    """setup ntp client and server"""
    if creds.get('proxy_env'):
        # If testbed is behaind proxy then force ntpd inside ptf use local time
        ptfhost.lineinfile(path="/etc/ntp.conf",
                           line="server 127.127.1.0 prefer")

    # enable ntp server
    ntp_en_res = ptfhost.service(name="ntp", state="started")

    # setup ntp on dut to sync with ntp server
    config_facts = duthost.config_facts(host=duthost.hostname,
                                        source="running")['ansible_facts']
    ntp_servers = config_facts.get('NTP_SERVER', {})
    for ntp_server in ntp_servers:
        duthost.command("config ntp del %s" % ntp_server)

    ptfip = ptfhost.host.options['inventory_manager'].get_host(
        ptfhost.hostname).vars['ansible_host']
    duthost.command("config ntp add %s" % ptfip)
    pytest_assert(wait_until(120, 5, check_ntp_status, ptfhost), \
        "NTP server was not started in PTF container {}; NTP service start result {}".format(ptfhost.hostname, ntp_en_res))

    yield

    # stop ntp server
    ptfhost.service(name="ntp", state="stopped")
    # reset ntp client configuration
    duthost.command("config ntp del %s" % ptfip)
    for ntp_server in ntp_servers:
        duthost.command("config ntp add %s" % ntp_server)
Beispiel #16
0
def check_interfaces_and_services(dut, interfaces, reboot_type=None):
    """
    Perform a further check after reboot-cause, including transceiver status, interface status
    @param localhost: The Localhost object.
    @param dut: The AnsibleHost object of DUT.
    @param interfaces: DUT's interfaces defined by minigraph
    """
    logging.info("Wait until all critical services are fully started")
    check_critical_services(dut)

    if reboot_type is not None:
        logging.info("Check reboot cause")
        reboot_cause = reboot_ctrl_dict[reboot_type]["cause"]
        assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_cause), \
            "got reboot-cause failed after rebooted by %s" % reboot_cause

        if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
            logging.info(
                "Further checking skipped for %s test which intends to verify reboot-cause only"
                % reboot_type)
            return

    logging.info("Wait %d seconds for all the transceivers to be detected" %
                 MAX_WAIT_TIME_FOR_INTERFACES)
    assert wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, dut, interfaces), \
        "Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    logging.info("Check pmon daemon status")
    assert check_pmon_daemon_status(dut), "Not all pmon daemons running."

    if dut.facts["asic_type"] in ["mellanox"]:

        current_file_dir = os.path.dirname(os.path.realpath(__file__))
        sub_folder_dir = os.path.join(current_file_dir, "mellanox")
        if sub_folder_dir not in sys.path:
            sys.path.append(sub_folder_dir)
        from check_hw_mgmt_service import check_hw_management_service
        from check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)
Beispiel #17
0
 def test_reboot(self, localhost, testbed_devices, testbed):
     duthost = testbed_devices["dut"]
     duthost.command('sudo config save -y')
     reboot(duthost, localhost)
     assert wait_until(300, 20, duthost.critical_services_fully_started
                       ), "Not all critical services are fully started"
     self.basic_check_after_reboot(duthost, localhost, testbed_devices,
                                   testbed)
Beispiel #18
0
def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"):
    """
    Perform the specified type of reboot and check platform status.
    """
    logging.info("Run %s reboot on DUT" % reboot_type)
    if reboot_type == "cold":
        reboot_cmd = "reboot"
        reboot_timeout = 300
    elif reboot_type == "fast":
        reboot_cmd = "fast-reboot"
        reboot_timeout = 180
    elif reboot_type == "warm":
        reboot_cmd = "warm-reboot"
        reboot_timeout = 180
    else:
        assert False, "Reboot type %s is not supported" % reboot_type
    process, queue = dut.command(reboot_cmd, module_async=True)

    logging.info("Wait for DUT to go down")
    res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120,
        module_ignore_errors=True)
    if "failed" in res:
        if process.is_alive():
            logging.error("Command '%s' is not completed" % reboot_cmd)
            process.terminate()
        logging.error("reboot result %s" % str(queue.get()))
        assert False, "DUT did not go down"

    logging.info("Wait for DUT to come back")
    localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout)

    logging.info("Wait until all critical services are fully started")
    check_critical_services(dut)

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \
        "Not all transceivers are detected in 300 seconds"

    logging.info("Check interface status")
    check_interface_status(dut, interfaces)

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    if dut.facts["asic_type"] in ["mellanox"]:

        current_file_dir = os.path.dirname(os.path.realpath(__file__))
        sub_folder_dir = os.path.join(current_file_dir, "mellanox")
        if sub_folder_dir not in sys.path:
            sys.path.append(sub_folder_dir)
        from check_hw_mgmt_service import check_hw_management_service
        from check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)
def check_critical_services(dut):
    """
    @summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all
        services must be "active". SubState of all services must be "running".
    @param dut: The AnsibleHost object of DUT. For interacting with DUT.
    """
    logging.info("Wait until all critical services are fully started")
    assert wait_until(300, 20, _all_critical_services_fully_started,
                      dut), "Not all critical services are fully started"
def verify_drop_counters(duthost, dut_iface, get_cnt_cli_cmd, column_key):
    """ Verify drop counter incremented on specific interface """
    get_drops = lambda: int(get_pkt_drops(duthost, get_cnt_cli_cmd)[dut_iface][column_key].replace(",", ""))
    check_drops_on_dut = lambda: PKT_NUMBER == get_drops()
    if not wait_until(5, 1, check_drops_on_dut):
        fail_msg = "'{}' drop counter was not incremented on iface {}. DUT {} == {}; Sent == {}".format(
            column_key, dut_iface, column_key, get_drops(), PKT_NUMBER
        )
        pytest.fail(fail_msg)
Beispiel #21
0
def reboot(duthost,
           localhost,
           delay=10,
           timeout=180,
           wait=120,
           basic_check=True):
    """
    cold reboots DUT
    :param duthost: DUT host object
    :param localhost:  local host object
    :param delay: delay between ssh availability checks
    :param timeout: timeout for waiting ssh port state change
    :param wait: time to wait for DUT to initialize
    :param basic_check: check duthost.critical_services_fully_started after DUT initialize
    :return:
    """

    dut_ip = duthost.host.options['inventory_manager'].get_host(
        duthost.hostname).address
    duthost.shell("nohup reboot &")

    logging.info('waiting for ssh to drop')
    res = localhost.wait_for(host=dut_ip,
                             port=SONIC_SSH_PORT,
                             state='absent',
                             search_regex=SONIC_SSH_REGEX,
                             delay=delay,
                             timeout=timeout)

    if res.is_failed:
        raise Exception('DUT did not shutdown')

    # TODO: add serial output during reboot for better debuggability
    #       This feature requires serial information to be present in
    #       testbed information

    logging.info('waiting for ssh to startup')
    res = localhost.wait_for(host=dut_ip,
                             port=SONIC_SSH_PORT,
                             state='started',
                             search_regex=SONIC_SSH_REGEX,
                             delay=delay,
                             timeout=timeout)

    if res.is_failed:
        raise Exception('DUT did not startup')

    logging.info('ssh has started up')

    logging.info('waiting for switch to initialize')
    time.sleep(wait)

    if basic_check:
        assert wait_until(timeout, 10, duthost.critical_services_fully_started), \
               "All critical services should fully started!{}".format(duthost.CRITICAL_SERVICES)
Beispiel #22
0
def setup_bgp_graceful_restart(duthost, nbrhosts):

    config_facts = duthost.config_facts(host=duthost.hostname,
                                        source="running")['ansible_facts']
    bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {})

    for k, nbr in nbrhosts.items():
        logger.info("enable graceful restart on neighbor {}".format(k))
        logger.info("bgp asn {}".format(nbr['conf']['bgp']['asn']))
        res = nbr['host'].eos_config(lines=["graceful-restart restart-time 300"], \
                               parents=["router bgp {}".format(nbr['conf']['bgp']['asn'])])
        logger.info("abc {}".format(res))
        res = nbr['host'].eos_config(lines=["graceful-restart"], \
                               parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv4"])
        logger.info("abc {}".format(res))
        res = nbr['host'].eos_config(lines=["graceful-restart"], \
                               parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv6"])
        logger.info("abc {}".format(res))

    # change graceful restart option will clear the bgp session.
    # so, let's wait for all bgp sessions to be up
    logger.info("bgp neighbors: {}".format(bgp_neighbors.keys()))
    if not wait_until(300, 10, duthost.check_bgp_session_state,
                      bgp_neighbors.keys()):
        pytest.fail(
            "not all bgp sessions are up after enable graceful restart")

    yield

    for k, nbr in nbrhosts.items():
        # start bgpd if not started
        nbr['host'].start_bgpd()
        logger.info("disable graceful restart on neighbor {}".format(k))
        nbr['host'].eos_config(lines=["no graceful-restart"], \
                               parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv4"])
        nbr['host'].eos_config(lines=["no graceful-restart"], \
                               parents=["router bgp {}".format(nbr['conf']['bgp']['asn']), "address-family ipv6"])

    if not wait_until(300, 10, duthost.check_bgp_session_state,
                      bgp_neighbors.keys()):
        pytest.fail(
            "not all bgp sessions are up after disable graceful restart")
def test_techsupport(request, config, duthost, testbed):
    """
    test the "show techsupport" command in a loop
    :param config: fixture to configure additional setups_list on dut.
    :param duthost: DUT host
    :param testbed: testbed
    """
    loop_range = request.config.getoption("--loop_num") or DEFAULT_LOOP_RANGE
    loop_delay = request.config.getoption("--loop_delay") or DEFAULT_LOOP_DELAY
    since = request.config.getoption("--logs_since") or str(randint(1, 23)) + " minute ago"

    logger.debug("Loop_range is {} and loop_delay is {}".format(loop_range, loop_delay))

    for i in range(loop_range):
        logger.debug("Running show techsupport ... ")
        wait_until(300, 20, execute_command, duthost, str(since))
        tar_file = [j for j in pytest.tar_stdout.split('\n') if j != ''][-1]
        stdout = duthost.command("rm -rf {}".format(tar_file))
        logger.debug("Sleeping for {} seconds".format(loop_delay))
        time.sleep(loop_delay)
Beispiel #24
0
    def testFastreboot(self, sflowbase_config, duthost, localhost, partial_ptf_runner, ptfhost):

        config_sflow(duthost,sflow_status='enable')
        verify_show_sflow(duthost,status='up',collector=['collector0','collector1'])
        duthost.command('sudo config save -y')
        reboot(duthost, localhost,reboot_type='fast')
        assert wait_until(300, 20, duthost.critical_services_fully_started), "Not all critical services are fully started"
        verify_show_sflow(duthost,status='up',collector=['collector0','collector1'])
        for intf in var['sflow_ports']:
            var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost,intf)
            verify_sflow_interfaces(duthost,intf,'up',512)
        var['portmap'] = json.dumps(var['sflow_ports'])
        ptfhost.copy(content=var['portmap'],dest="/tmp/sflow_ports.json")
        partial_ptf_runner(
              enabled_sflow_interfaces=var['sflow_ports'].keys(),
              active_collectors="['collector0','collector1']" )
Beispiel #25
0
def check_critical_services(dut):
    """
    @summary: Use systemctl to check whether all the critical services have expected status. ActiveState of all
        services must be "active". SubState of all services must be "running".
    @param dut: The AnsibleHost object of DUT. For interacting with DUT.
    """
    logging.info("Wait until all critical services are fully started")
    assert wait_until(300, 20, dut.critical_services_fully_started
                      ), "Not all critical services are fully started"

    logging.info("Check critical service status")
    for service in dut.CRITICAL_SERVICES:
        status = dut.get_service_props(service)
        assert status["ActiveState"] == "active", \
            "ActiveState of %s is %s, expected: active" % (service, status["ActiveState"])
        assert status["SubState"] == "running", \
            "SubState of %s is %s, expected: active" % (service, status["SubState"])
Beispiel #26
0
 def testRebootSflowDisable(self, sflowbase_config, duthost,
                            testbed_devices, localhost, partial_ptf_runner,
                            ptfhost):
     config_sflow(duthost, sflow_status='disable')
     verify_show_sflow(duthost, status='down')
     partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(),
                        active_collectors="[]")
     duthost.command('sudo config save -y')
     reboot(duthost, localhost)
     assert wait_until(300, 20, duthost.critical_services_fully_started
                       ), "Not all critical services are fully started"
     verify_show_sflow(duthost, status='down')
     for intf in var['sflow_ports']:
         var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost, intf)
     var['portmap'] = json.dumps(var['sflow_ports'])
     ptfhost.copy(content=var['portmap'], dest="/tmp/sflow_ports.json")
     partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(),
                        active_collectors="[]")
Beispiel #27
0
 def testRebootSflowEnable(self, sflowbase_config, duthost, testbed_devices,
                           localhost, partial_ptf_runner, ptfhost):
     duthost = testbed_devices["dut"]
     duthost.command("config sflow polling-interval 80")
     verify_show_sflow(duthost, status='up', polling_int=80)
     duthost.command('sudo config save -y')
     reboot(duthost, localhost)
     assert wait_until(300, 20, duthost.critical_services_fully_started
                       ), "Not all critical services are fully started"
     verify_show_sflow(duthost,
                       status='up',
                       collector=['collector0', 'collector1'],
                       polling_int=80)
     for intf in var['sflow_ports']:
         var['sflow_ports'][intf]['ifindex'] = get_ifindex(duthost, intf)
         verify_sflow_interfaces(duthost, intf, 'up', 512)
     var['portmap'] = json.dumps(var['sflow_ports'])
     ptfhost.copy(content=var['portmap'], dest="/tmp/sflow_ports.json")
     partial_ptf_runner(enabled_sflow_interfaces=var['sflow_ports'].keys(),
                        active_collectors="['collector0','collector1']")
     # Test Polling
     partial_ptf_runner(polling_int=80,
                        active_collectors="['collector0','collector1']")
Beispiel #28
0
def wait_until_fan_speed_set_to_default(dut, timeout=300, interval=10):
    wait_until(timeout, interval, fan_speed_set_to_default, dut)
Beispiel #29
0
def test_bgp_gr_helper_routes_perserved(duthost, nbrhosts,
                                        setup_bgp_graceful_restart):
    """
    Verify that DUT routes are preserved when peer performed graceful restart
    """

    config_facts = duthost.config_facts(host=duthost.hostname,
                                        source="running")['ansible_facts']
    bgp_neighbors = config_facts.get('BGP_NEIGHBOR', {})
    po = config_facts.get('PORTCHANNEL', {})
    dev_nbr = config_facts.get('DEVICE_NEIGHBOR', {})

    rtinfo_v4 = duthost.get_ip_route_info(ipaddress.ip_address(u'0.0.0.0'))
    if len(rtinfo_v4['nexthops']) == 0:
        pytest.skip("there is no next hop for v4 default route")

    rtinfo_v6 = duthost.get_ip_route_info(ipaddress.ip_address(u'::'))
    if len(rtinfo_v6['nexthops']) == 0:
        pytest.skip("there is no next hop for v6 default route")

    ifnames_v4 = [nh[1] for nh in rtinfo_v4['nexthops']]
    ifnames_v6 = [nh[1] for nh in rtinfo_v6['nexthops']]

    ifnames_common = [ifname for ifname in ifnames_v4 if ifname in ifnames_v6]
    ifname = ifnames_common[0]

    # get neighbor device connected ports
    nbr_ports = []
    if ifname.startswith("PortChannel"):
        for member in po[ifname]['members']:
            nbr_ports.append(dev_nbr[member]['port'])
    else:
        pytest.skip(
            "Do not support peer device not connected via port channel")
    logger.info("neighbor device connected ports {}".format(nbr_ports))

    # get nexthop ip
    for nh in rtinfo_v4['nexthops']:
        if nh[1] == ifname:
            bgp_nbr_ipv4 = nh[0]

    for nh in rtinfo_v6['nexthops']:
        if nh[1] == ifname:
            bgp_nbr_ipv6 = nh[0]

    # get the bgp neighbor
    bgp_nbr = bgp_neighbors[str(bgp_nbr_ipv4)]
    nbr_hostname = bgp_nbr['name']
    nbrhost = nbrhosts[nbr_hostname]['host']
    exabgp_sessions = ['exabgp_v4', 'exabgp_v6']
    pytest_assert(nbrhost.check_bgp_session_state([], exabgp_sessions), \
            "exabgp sessions {} are not up before graceful restart".format(exabgp_sessions))

    # shutdown Rib agent, starting gr process
    logger.info("shutdown rib process on neighbor {}".format(nbr_hostname))
    nbrhost.kill_bgpd()

    # wait till DUT enter NSF state
    pytest_assert(wait_until(60, 5, duthost.check_bgp_session_nsf, bgp_nbr_ipv4), \
            "neighbor {} does not enter NSF state".format(bgp_nbr_ipv4))
    pytest_assert(wait_until(60, 5, duthost.check_bgp_session_nsf, bgp_nbr_ipv6), \
            "neighbor {} does not enter NSF state".format(bgp_nbr_ipv6))

    # confirm ip route still there
    rtinfo_v4 = duthost.get_ip_route_info(ipaddress.ip_address(u'0.0.0.0'))
    pytest_assert(ipaddress.ip_address(bgp_nbr_ipv4) in [ nh[0] for nh in rtinfo_v4['nexthops'] ], \
        "cannot find nexthop {} in the new default route nexthops. {}".format(bgp_nbr_ipv4, rtinfo_v4))

    rtinfo_v6 = duthost.get_ip_route_info(ipaddress.ip_address(u'::'))
    pytest_assert(ipaddress.ip_address(bgp_nbr_ipv6) in [ nh[0] for nh in rtinfo_v6['nexthops'] ], \
        "cannot find nexthop {} in the new default route nexthops. {}".format(bgp_nbr_ipv6, rtinfo_v6))

    # shutdown the connected ports from nbr
    for nbr_port in nbr_ports:
        nbrhost.shutdown(nbr_port)

    try:
        # start Rib agent
        logger.info("startup rib process on neighbor {}".format(nbr_hostname))
        nbrhost.start_bgpd()

        # wait for exabgp sessions to establish
        pytest_assert(wait_until(300, 10, nbrhost.check_bgp_session_state, [], exabgp_sessions), \
            "exabgp sessions {} are not coming back".format(exabgp_sessions))
    except:
        raise
    finally:
        # unshut the connected ports from nbr
        for nbr_port in nbr_ports:
            nbrhost.no_shutdown(nbr_port)

    # confirm bgp session up
    graceful_restarted_bgp_sessions = [str(bgp_nbr_ipv4), str(bgp_nbr_ipv6)]
    pytest_assert(wait_until(300, 10, duthost.check_bgp_session_state, graceful_restarted_bgp_sessions), \
            "graceful restarted bgp sessions {} are not coming back".format(graceful_restarted_bgp_sessions))
Beispiel #30
0
def reboot_and_check(localhost,
                     dut,
                     interfaces,
                     reboot_type=REBOOT_TYPE_COLD,
                     reboot_helper=None,
                     reboot_kwargs=None):
    """
    Perform the specified type of reboot and check platform status.
    @param localhost: The Localhost object.
    @param dut: The AnsibleHost object of DUT.
    @param interfaces: DUT's interfaces defined by minigraph
    @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX.
    @param reboot_helper: The helper function used only by power off reboot
    @param reboot_kwargs: The argument used by reboot_helper
    """
    logging.info("Run %s reboot on DUT" % reboot_type)

    assert reboot_type in reboot_ctrl_dict.keys(
    ), "Unknown reboot type %s" % reboot_type

    reboot_timeout = reboot_ctrl_dict[reboot_type]["timeout"]
    reboot_cause = reboot_ctrl_dict[reboot_type]["cause"]

    dut_datetime = datetime.strptime(
        dut.command('date -u +"%Y-%m-%d %H:%M:%S"')["stdout"],
        "%Y-%m-%d %H:%M:%S")

    if reboot_type == REBOOT_TYPE_POWEROFF:
        assert reboot_helper is not None, "A reboot function must be provided for power off reboot"

        reboot_helper(reboot_kwargs)

        localhost.wait_for(host=dut.hostname,
                           port=22,
                           state="stopped",
                           delay=10,
                           timeout=120)
    else:
        reboot_cmd = reboot_ctrl_dict[reboot_type]["command"]
        reboot_task, reboot_res = dut.command(reboot_cmd,
                                              module_ignore_errors=True,
                                              module_async=True)

        logging.info("Wait for DUT to go down")
        res = localhost.wait_for(host=dut.hostname,
                                 port=22,
                                 state="stopped",
                                 timeout=180,
                                 module_ignore_errors=True)
        if "failed" in res:
            try:
                logging.error(
                    "Wait for switch down failed, try to kill any possible stuck reboot task"
                )
                pid = dut.command("pgrep -f '%s'" % reboot_cmd)["stdout"]
                dut.command("kill -9 %s" % pid)
                reboot_task.terminate()
                logging.error("Result of command '%s': " +
                              str(reboot_res.get(timeout=0)))
            except Exception as e:
                logging.error(
                    "Exception raised while cleanup reboot task and get result: "
                    + repr(e))

    logging.info("Wait for DUT to come back")
    localhost.wait_for(host=dut.hostname,
                       port=22,
                       state="started",
                       delay=10,
                       timeout=reboot_timeout)

    logging.info("Check the uptime to verify whether reboot was performed")
    dut_uptime = datetime.strptime(
        dut.command("uptime -s")["stdout"], "%Y-%m-%d %H:%M:%S")
    assert float(dut_uptime.strftime("%s")) - float(
        dut_datetime.strftime("%s")) > 10, "Device did not reboot"

    logging.info("Wait until all critical services are fully started")
    check_critical_services(dut)

    logging.info("Check reboot cause")
    check_reboot_cause(dut, reboot_cause)

    if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
        logging.info(
            "Further checking skipped for %s test which intends to verify reboot-cause only"
            .format(reboot_type))
        return

    logging.info("Wait some time for all the transceivers to be detected")
    assert wait_until(300, 20, check_interface_information, dut, interfaces), \
        "Not all transceivers are detected or interfaces are up in 300 seconds"

    logging.info("Check transceiver status")
    check_transceiver_basic(dut, interfaces)

    logging.info("Check pmon daemon status")
    assert check_pmon_daemon_status(dut), "Not all pmon daemons running."

    if dut.facts["asic_type"] in ["mellanox"]:

        current_file_dir = os.path.dirname(os.path.realpath(__file__))
        sub_folder_dir = os.path.join(current_file_dir, "mellanox")
        if sub_folder_dir not in sys.path:
            sys.path.append(sub_folder_dir)
        from check_hw_mgmt_service import check_hw_management_service
        from check_sysfs import check_sysfs

        logging.info("Check the hw-management service")
        check_hw_management_service(dut)

        logging.info("Check sysfs")
        check_sysfs(dut)