def run_test(self, duthost, storm_hndle, expect_regex, syslog_marker, action): """ Storm generation/restoration on all ports and verification Args: duthost (AnsibleHost): DUT instance storm_hndle (PFCMultiStorm): class PFCMultiStorm intance expect_regex (list): list of expect regexs to be matched in the syslog syslog_marker (string): marker prefix written to the syslog action (string): storm/restore action """ loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=syslog_marker) ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages") reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) loganalyzer.ignore_regex.extend(reg_exp) loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend(expect_regex) loganalyzer.match_regex = [] with loganalyzer: if action == "storm": storm_hndle.start_pfc_storm() elif action == "restore": storm_hndle.stop_pfc_storm() time.sleep(5)
def storm_detect_path(self, dut, port, action): """ Storm detection action and associated verifications Args: dut(AnsibleHost) : DUT instance port(string) : DUT port action(string) : PTF test action Returns: loganalyzer(Loganalyzer) : instance """ restore_time = self.timers['pfc_wd_restore_time_large'] detect_time = self.timers['pfc_wd_detect_time'] loganalyzer = LogAnalyzer( ansible_host=self.dut, marker_prefix="pfc_function_storm_detect_{}_port_{}".format( action, port)) marker = loganalyzer.init() ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages") reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) loganalyzer.ignore_regex.extend(reg_exp) loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE]) loganalyzer.match_regex = [] if action != "dontcare": start_wd_on_ports(dut, port, restore_time, detect_time, action) if not self.pfc_wd['fake_storm']: self.storm_hndle.start_storm() if action == "dontcare": self.traffic_inst.fill_buffer() start_wd_on_ports(dut, port, restore_time, detect_time, "drop") # placing this here to cover all action types. for 'dontcare' action, wd is started much later after the pfc storm is started if self.pfc_wd['fake_storm']: PfcCmd.set_storm_status(dut, self.queue_oid, "enabled") time.sleep(5) # storm detect logger.info("Verify if PFC storm is detected on port {}".format(port)) loganalyzer.analyze(marker) self.stats.get_pkt_cnts(self.queue_oid, begin=True) # test pfcwd functionality on a storm self.traffic_inst.verify_wd_func( action if action != "dontcare" else "drop") return loganalyzer
def execute_test(self, duthost, syslog_marker, ignore_regex=None, expect_regex=None, expect_errors=False): """ Helper function that loads each template on the DUT and verifies the expected behavior Args: duthost (AnsibleHost): instance syslog_marker (string): marker prefix name to be inserted in the syslog ignore_regex (string): file containing regexs to be ignored by loganalyzer expect_regex (string): regex pattern that is expected to be present in the syslog expect_erros (bool): if the test expects an error msg in the syslog or not. Default: False Returns: None """ loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=syslog_marker) if ignore_regex: ignore_file = os.path.join(TEMPLATES_DIR, ignore_regex) reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) loganalyzer.ignore_regex.extend(reg_exp) if expect_regex: loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend(expect_regex) loganalyzer.match_regex = [] with loganalyzer(fail=not expect_errors): cmd = "sonic-cfggen -j {}/{}.json --write-to-db".format( DUT_RUN_DIR, syslog_marker) out = duthost.command(cmd) pytest_assert( out["rc"] == 0, "Failed to execute cmd {}: Error: {}".format( cmd, out["stderr"]))
def test_pfcwd_port_toggle(self, request, fake_storm, setup_pfc_test, fanout_graph_facts, tbinfo, ptfhost, duthosts, rand_one_dut_hostname, fanouthosts): """ Test PfCWD functionality after toggling port Test verifies the following: 1. Select the port and lossless queue 2. Start PFCWD on selected test port 3. Start PFC storm on selected test port and lossless queue 4. Verify that PFC storm is detected 5. Stop PFC storm on selected test port and lossless queue 6. Verify that PFC storm is restored 7. Toggle test port (put administrativelly down and then up) 8. Verify that PFC storm is not detected Args: request(object) : pytest request object fake_storm(fixture) : Module scoped fixture for enable/disable fake storm setup_pfc_test(fixture) : Module scoped autouse fixture for PFCWD fanout_graph_facts(fixture) : Fanout graph info tbinfo(fixture) : Testbed info ptfhost(AnsibleHost) : PTF host instance duthost(AnsibleHost) : DUT instance fanouthosts(AnsibleHost): Fanout instance """ duthost = duthosts[rand_one_dut_hostname] setup_info = setup_pfc_test self.fanout_info = fanout_graph_facts self.ptf = ptfhost self.dut = duthost self.fanout = fanouthosts self.timers = setup_info['pfc_timers'] self.ports = setup_info['selected_test_ports'] self.neighbors = setup_info['neighbors'] dut_facts = self.dut.facts self.peer_dev_list = dict() self.fake_storm = fake_storm self.storm_hndle = None action = "dontcare" for idx, port in enumerate(self.ports): logger.info("") logger.info("--- Testing port toggling with PFCWD enabled on {} ---".format(port)) self.setup_test_params(port, setup_info['vlan'], init=not idx) self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd) pfc_wd_restore_time_large = request.config.getoption("--restore-time") # wait time before we check the logs for the 'restore' signature. 'pfc_wd_restore_time_large' is in ms. self.timers['pfc_wd_wait_for_restore_time'] = int(pfc_wd_restore_time_large / 1000 * 2) try: # Verify that PFC storm is detected and restored self.stats = PfcPktCntrs(self.dut, action) logger.info("{} on port {}".format(WD_ACTION_MSG_PFX[action], port)) self.run_test(self.dut, port, action) # Toggle test port and verify that PFC storm is not detected loganalyzer = LogAnalyzer(ansible_host=self.dut, marker_prefix="pfc_function_storm_detect_{}_port_{}".format(action, port)) marker = loganalyzer.init() ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages") reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) loganalyzer.ignore_regex.extend(reg_exp) loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE]) loganalyzer.match_regex = [] port_toggle(self.dut, tbinfo, ports=[port]) logger.info("Verify that PFC storm is not detected on port {}".format(port)) result = loganalyzer.analyze(marker, fail=False) if result["total"]["expected_missing_match"] == 0: pytest.fail(result) except Exception as e: pytest.fail(str(e)) finally: if self.storm_hndle: logger.info("--- Stop PFC storm on port {}".format(port)) self.storm_hndle.stop_storm() else: logger.info("--- Disabling fake storm on port {} queue {}".format(port, self.queue_oid)) PfcCmd.set_storm_status(self.dut, self.queue_oid, "disabled") logger.info("--- Stop PFCWD ---") self.dut.command("pfcwd stop")
def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname, request): """ Advance reboot log analysis. This fixture starts log analysis at the beginning of the test. At the end, the collected expect messages are verified and timing of start/stop is calculated. Args: duthosts : List of DUT hosts rand_one_dut_hostname: hostname of a randomly selected DUT """ duthost = duthosts[rand_one_dut_hostname] test_name = request.node.name if "warm" in test_name: reboot_type = "warm" elif "fast" in test_name: reboot_type = "fast" else: reboot_type = "unknown" # Currently, advanced reboot test would skip for kvm platform if the test has no device_type marker for vs. # Doing the same skip logic in this fixture to avoid running loganalyzer without the test executed if duthost.facts['platform'] == 'x86_64-kvm_x86_64-r0': device_marks = [ arg for mark in request.node.iter_markers(name='device_type') for arg in mark.args ] if 'vs' not in device_marks: pytest.skip('Testcase not supported for kvm') loganalyzer = LogAnalyzer( ansible_host=duthost, marker_prefix="test_advanced_reboot_{}".format(test_name), additional_files={ '/var/log/swss/sairedis.rec': 'recording on: /var/log/swss/sairedis.rec', '/var/log/frr/bgpd.log': '' }) marker = loganalyzer.init() loganalyzer.load_common_config() ignore_file = os.path.join(TEMPLATES_DIR, "ignore_boot_messages") expect_file = os.path.join(TEMPLATES_DIR, "expect_boot_messages") ignore_reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) expect_reg_exp = loganalyzer.parse_regexp_file(src=expect_file) loganalyzer.ignore_regex.extend(ignore_reg_exp) loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend(expect_reg_exp) loganalyzer.match_regex = [] yield result = loganalyzer.analyze(marker, fail=False) analyze_result = {"time_span": dict(), "offset_from_kexec": dict()} offset_from_kexec = dict() for key, messages in result["expect_messages"].items(): if "syslog" in key or "bgpd.log" in key: analyze_log_file(duthost, messages, analyze_result, offset_from_kexec) elif "sairedis.rec" in key: analyze_sairedis_rec(messages, analyze_result, offset_from_kexec) for marker, time_data in analyze_result["offset_from_kexec"].items(): marker_start_time = time_data.get("timestamp", {}).get("Start") reboot_start_time = analyze_result.get("reboot_time", {}).get("timestamp", {}).get("Start") if reboot_start_time and reboot_start_time != "N/A" and marker_start_time: time_data["time_taken"] = (datetime.strptime(marker_start_time, FMT) -\ datetime.strptime(reboot_start_time, FMT)).total_seconds() else: time_data["time_taken"] = "N/A" get_data_plane_report(analyze_result, reboot_type) result_summary = get_report_summary(analyze_result, reboot_type) logging.info(json.dumps(analyze_result, indent=4)) logging.info(json.dumps(result_summary, indent=4)) report_file_name = request.node.name + "_report.json" summary_file_name = request.node.name + "_summary.json" report_file_dir = os.path.realpath((os.path.join(os.path.dirname(__file__),\ "../logs/platform_tests/"))) report_file_path = report_file_dir + "/" + report_file_name summary_file_path = report_file_dir + "/" + summary_file_name if not os.path.exists(report_file_dir): os.makedirs(report_file_dir) with open(report_file_path, 'w') as fp: json.dump(analyze_result, fp, indent=4) with open(summary_file_path, 'w') as fp: json.dump(result_summary, fp, indent=4)
def advanceboot_loganalyzer(duthosts, rand_one_dut_hostname): """ Advance reboot log analysis. This fixture starts log analysis at the beginning of the test. At the end, the collected expect messages are verified and timing of start/stop is calculated. Args: duthosts : List of DUT hosts rand_one_dut_hostname: hostname of a randomly selected DUT """ duthost = duthosts[rand_one_dut_hostname] loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="test_advanced_reboot") marker = loganalyzer.init() loganalyzer.load_common_config() ignore_file = os.path.join(TEMPLATES_DIR, "ignore_boot_messages") expect_file = os.path.join(TEMPLATES_DIR, "expect_boot_messages") ignore_reg_exp = loganalyzer.parse_regexp_file(src=ignore_file) expect_reg_exp = loganalyzer.parse_regexp_file(src=expect_file) loganalyzer.ignore_regex.extend(ignore_reg_exp) loganalyzer.expect_regex = [] loganalyzer.expect_regex.extend(expect_reg_exp) loganalyzer.match_regex = [] yield result = loganalyzer.analyze(marker, fail=False) messages = result["expect_messages"].values() if not messages: logging.error("Expected messages not found in syslog") return messages = messages[0] service_restart_times = dict() service_patterns = { "Stopping": re.compile(r'.*Stopping.*service.*'), "Stopped": re.compile(r'.*Stopped.*service.*'), "Starting": re.compile(r'.*Starting.*service.*'), "Started": re.compile(r'.*Started.*service.*') } def service_time_check(message, status): time = message.split(duthost.hostname)[0].strip() service_name = message.split(status + " ")[1].split()[0] service_dict = service_restart_times.get(service_name, {"timestamp": {}}) timestamps = service_dict.get("timestamp") if status in timestamps: service_dict[status + " count"] = service_dict.get(status + " count", 1) + 1 timestamps[status] = time service_restart_times.update({service_name: service_dict}) for message in messages: for status, pattern in service_patterns.items(): if re.search(pattern, message): service_time_check(message, status) loganalyzer.save_extracted_log(dest="/tmp/log/syslog") logging.info(json.dumps(service_restart_times, indent=4)) FMT = "%b %d %H:%M:%S.%f" for _, timings in service_restart_times.items(): timestamps = timings["timestamp"] timings["stop_time"] = (datetime.strptime(timestamps["Stopped"], FMT) -\ datetime.strptime(timestamps["Stopping"], FMT)).total_seconds() \ if "Stopped" in timestamps and "Stopping" in timestamps else None timings["start_time"] = (datetime.strptime(timestamps["Started"], FMT) -\ datetime.strptime(timestamps["Starting"], FMT)).total_seconds() \ if "Started" in timestamps and "Starting" in timestamps else None timings["reboot_time"] = (datetime.strptime(timestamps["Started"], FMT) -\ datetime.strptime(timestamps["Stopped"], FMT)).total_seconds() \ if "Started" in timestamps and "Stopped" in timestamps else None files = glob.glob('/tmp/*-report.json') if files: filepath = files[0] with open(filepath) as json_file: report = json.load(json_file) service_restart_times.update(report) result = service_restart_times logging.info(json.dumps(result, indent=4))
class TestPfcwdWb(SetupPfcwdFunc): """ Test PFCwd warm-reboot function and supporting methods """ def storm_detect_path(self, port, queue, first_detect_after_wb=False): """ Storm detection action and associated verifications Args: port(string) : DUT port queue(int): queue on the port that will be stormed first_detect_after_wb(bool): first detect iteration after warm reboot (default: False) """ # for the first iteration after wb, do not write a marker to the log but specify the start msg from # where to search the logs start_marker = None if first_detect_after_wb: start_marker = "NOTICE swss#orchagent: :- setWarmStartState: orchagent warm start state changed to initialized" self.loganalyzer = LogAnalyzer(ansible_host=self.dut, marker_prefix="pfcwd_wb_storm_detect_port_{}_queue_{}".format(port, queue), start_marker=start_marker) marker = self.loganalyzer.init() time.sleep(5) ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages") reg_exp = self.loganalyzer.parse_regexp_file(src=ignore_file) self.loganalyzer.ignore_regex.extend(reg_exp) self.loganalyzer.expect_regex = [] self.loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE]) self.loganalyzer.match_regex = [] # ongoing storm. no need to start a new one if not first_detect_after_wb: if not self.pfc_wd['fake_storm']: self.storm_handle[port][queue].start_storm() time.sleep(15 * len(self.pfc_wd['queue_indices'])) else: PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled") time.sleep(5) else: # for the first iteration after wb, check the log for detect msgs for the ongoing storms self.loganalyzer.expected_matches_target = len(self.ports) * len(self.pfc_wd['queue_indices']) time.sleep(20) # storm detect check logger.info("Verify if PFC storm is detected on port {} queue {}".format(port, queue)) self.loganalyzer.analyze(marker) def storm_restore_path(self, port, queue): """ Storm restoration action and associated verifications Args: port(string) : DUT port queue(int): queue on the port where storm would be restored """ marker = self.loganalyzer.update_marker_prefix("pfcwd_wb_storm_restore_port_{}_queue_{}".format(port, queue)) time.sleep(5) ignore_file = os.path.join(TEMPLATES_DIR, "ignore_pfc_wd_messages") reg_exp = self.loganalyzer.parse_regexp_file(src=ignore_file) self.loganalyzer.ignore_regex.extend(reg_exp) self.loganalyzer.expect_regex = [] self.loganalyzer.expect_regex.extend([EXPECT_PFC_WD_RESTORE_RE]) self.loganalyzer.match_regex = [] self.loganalyzer.expected_matches_target = 0 if not self.pfc_wd['fake_storm']: self.storm_handle[port][queue].stop_storm() time.sleep(15) else: PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled") time.sleep(5) # storm restore check logger.info("Verify if PFC storm is restored on port {}".format(port)) self.loganalyzer.analyze(marker) def defer_fake_storm(self, port, queue, start_defer, stop_defer): time.sleep(start_defer) DUT_ACTIVE.wait() PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled") time.sleep(stop_defer) DUT_ACTIVE.wait() PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled") def run_test(self, port, queue, detect=True, storm_start=True, first_detect_after_wb=False, storm_defer=False): """ Test method that invokes the storm detection and restoration path which includes the traffic test Args: port(string) : DUT port queue(int): queue on the port which would be stormed/restored detect(bool): if the detect logic needs to be called (default: True) storm_start(bool): used to decide certain actions in the detect logic (default: True) first_detect_after_wb(bool): used to decide certain actions in the detect logic (default: False) storm_defer(bool): use the storm defer logic or not (default: False) """ # for deferred storm, return to main loop for next action which is warm boot if storm_defer: if not self.pfc_wd['fake_storm']: self.storm_handle[port][queue].start_storm() self.storm_handle[port][queue].stop_storm() else: thread = InterruptableThread( target=self.defer_fake_storm, args=(port, queue, self.pfc_wd['storm_start_defer'], self.pfc_wd['storm_stop_defer'])) thread.daemon = True thread.start() self.storm_threads.append(thread) return if detect: if storm_start or first_detect_after_wb: logger.info("--- Storm detection path for port {} queue {} ---".format(port, queue)) self.storm_detect_path(port, queue, first_detect_after_wb=first_detect_after_wb) else: logger.info("--- Storm restoration path for port {} queue {} ---".format(port, queue)) self.storm_restore_path(port, queue) # test pfcwd functionality on a storm/restore self.traffic_inst.verify_wd_func(detect=detect) @pytest.fixture(autouse=True) def pfcwd_wb_test_cleanup(self): """ Cleanup method """ yield # stop all threads that might stuck in wait DUT_ACTIVE.set() for thread in self.storm_threads: thread_exception = thread.join(timeout=0.1, suppress_exception=True) if thread_exception: logger.debug("Exception in thread %r:", thread) logger.debug( "".join(traceback.format_exception(*thread_exception)) ) self.stop_all_storm() time.sleep(5) logger.info("--- Stop PFC WD ---") self.dut.command("pfcwd stop") def stop_all_storm(self): """ Stop all the storms after each test run """ if self.storm_handle: logger.info("--- Stopping storm on all ports ---") for port in self.storm_handle: for queue in self.storm_handle[port]: if self.storm_handle[port][queue]: logger.info("--- Stop pfc storm on port {} queue {}".format(port, queue)) self.storm_handle[port][queue].stop_storm() else: logger.info("--- Disabling fake storm on port {} queue {}".format(port, queue)) PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "disabled") def pfcwd_wb_helper(self, fake_storm, testcase_actions, setup_pfc_test, fanout_graph_facts, ptfhost, duthost, localhost, fanouthosts, two_queues): """ Helper method that initializes the vars and starts the test execution Args: fake_storm(bool): if fake storm is enabled or disabled testcase_actions(list): list of actions that the test will go through setup_pfc_test(fixture): module scoped autouse fixture fanout_graph_facts(fixture): fanout info ptfhost(AnsibleHost): PTF instance duthost(AnsibleHost): DUT instance localhost(AnsibleHost): local instance fanouthosts(AnsibleHost): fanout instance """ setup_info = setup_pfc_test self.fanout_info = fanout_graph_facts self.ptf = ptfhost self.dut = duthost self.fanout = fanouthosts self.timers = setup_info['pfc_timers'] self.ports = setup_info['selected_test_ports'] self.neighbors = setup_info['neighbors'] dut_facts = self.dut.facts self.peer_dev_list = dict() self.seed = int(datetime.datetime.today().day) self.two_queues = two_queues self.storm_handle = dict() bitmask = 0 storm_deferred = 0 storm_restored = 0 self.max_wait = 0 self.fake_storm = fake_storm self.oid_map = dict() self.storm_threads = [] for t_idx, test_action in enumerate(testcase_actions): if 'warm-reboot' in test_action: reboot(self.dut, localhost, reboot_type="warm") continue # one of the factors to decide if the storm needs to be started storm_restored = bitmask and (bitmask & 2) # if the action prior to the warm-reboot was a 'storm_defer', ensure that all the storms are # stopped storm_deferred = bitmask and (bitmask & 4) if storm_deferred: logger.info("Wait for all the deferred storms to start and stop ...") join_all(self.storm_threads, self.max_wait) self.storm_threads = [] self.storm_handle = dict() bitmask = (1 << ACTIONS[test_action]) for p_idx, port in enumerate(self.ports): logger.info("") logger.info("--- Testing on {} ---".format(port)) self.setup_test_params(port, setup_info['vlan'], p_idx) for q_idx, queue in enumerate(self.pfc_wd['queue_indices']): if not t_idx or storm_deferred: if not q_idx: self.storm_handle[port] = dict() self.storm_handle[port][queue] = None # setup the defer parameters if the storm is deferred currently if (bitmask & 4): self.storm_defer_setup() if not self.pfc_wd['fake_storm']: self.storm_setup(port, queue, storm_defer=(bitmask & 4)) else: self.oid_map[(port, queue)] = PfcCmd.get_queue_oid(self.dut, port, queue) self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd, queue) self.run_test(port, queue, detect=(bitmask & 1), storm_start=not t_idx or storm_deferred or storm_restored, first_detect_after_wb=(t_idx == 2 and not p_idx and not q_idx and not storm_deferred), storm_defer=(bitmask & 4)) @pytest.fixture(params=['no_storm', 'storm', 'async_storm']) def testcase_action(self, request): """ Parameters to invoke the pfcwd warm boot test Args: request(pytest) : pytest request object Yields: testcase_action(string) : testcase to execute """ yield request.param def test_pfcwd_wb(self, fake_storm, testcase_action, setup_pfc_test, fanout_graph_facts, ptfhost, duthosts, rand_one_dut_hostname, localhost, fanouthosts, two_queues): """ Tests PFCwd warm reboot with various testcase actions Args: fake_storm(fixture): fake storm status testcase_action(fixture): testcase to execute (values: 'no_storm', 'storm', 'async_storm') 'no_storm' : PFCwd storm detection/restore before and after warm reboot 'storm' : PFC storm started and detected before warm-reboot. Storm is ongoing during warm boot and lasts past the warm boot finish. Verifies if the storm is detected after warm-reboot. PFC storm is stopped and 465 restored after warm boot 'async_storm': PFC storm asynchronously starts at a random time and lasts a random period at fanout. Warm reboot is done. Wait for all the storms to finish and then verify the storm detect/restore logic setup_pfc_test(fixture) : Module scoped autouse fixture for PFCwd fanout_graph_facts(fixture) : fanout graph info ptfhost(AnsibleHost) : ptf host instance duthost(AnsibleHost) : DUT instance localhost(AnsibleHost) : localhost instance fanouthosts(AnsibleHost): fanout instance """ duthost = duthosts[rand_one_dut_hostname] logger.info("--- {} ---".format(TESTCASE_INFO[testcase_action]['desc'])) self.pfcwd_wb_helper(fake_storm, TESTCASE_INFO[testcase_action]['test_sequence'], setup_pfc_test, fanout_graph_facts, ptfhost, duthost, localhost, fanouthosts, two_queues)