def recover_ports(duthosts, fanouthosts): """Module level fixture that automatically do following job: 1. Build global candidate test ports 2. Save fanout port state before the test 3. Restore fanout and DUT after test Args: duthosts: DUT object enum_dut_portname_module_fixture (str): DUT port name fanouthosts: Fanout objects """ global cadidate_test_ports fanout_original_port_states = {} logger.info('Collecting existing port configuration for DUT and fanout...') for duthost in duthosts: # Only do the sampling when there are no candidates if duthost.hostname in cadidate_test_ports.keys(): continue all_ports = build_test_candidates(duthost, fanouthosts, 'all_ports') # Test all ports takes too much time (sometimes more than an hour), # so we choose 3 ports randomly as the cadidates ports candidates = random.sample(all_ports, min(3, len(all_ports))) cadidate_test_ports[duthost.hostname] = {} for dut_port, fanout, fanout_port in candidates: auto_neg_mode = fanout.get_auto_negotiation_mode(fanout_port) if auto_neg_mode is not None: cadidate_test_ports[duthost.hostname][dut_port] = (duthost, dut_port, fanout, fanout_port) pytest_require( len(cadidate_test_ports) > 0, "Skip test due to fanout port does not support setting auto-neg mode" ) for _, _, fanout, fanout_port in cadidate_test_ports[ duthost.hostname].values(): speed = fanout.get_speed(fanout_port) if not fanout in fanout_original_port_states: fanout_original_port_states[fanout] = {} fanout_original_port_states[fanout][fanout_port] = (auto_neg_mode, speed) yield logger.info('Recovering port configuration for fanout...') for fanout, port_data in fanout_original_port_states.items(): for port, state in port_data.items(): fanout.set_auto_negotiation_mode(port, state[0]) fanout.set_speed(port, state[1]) logger.info('Recovering port configuration for DUT...') for duthost in duthosts: config_reload(duthost)
def run_link_flap_test(self, dut, fanouthosts, port): """ Test runner of link flap test. Args: dut: DUT host object fanouthosts: List of fanout switch instances. """ candidates = build_test_candidates(dut, fanouthosts, port, self.completeness_level) pytest_require(candidates, "Didn't find any port that is admin up and present in the connection graph") for dut_port, fanout, fanout_port in candidates: toggle_one_link(dut, dut_port, fanout, fanout_port)
def recover_ports(duthosts, enum_dut_portname_module_fixture, fanouthosts): """Module level fixture that automatically do following job: 1. Build global candidate test ports 2. Save fanout port state before the test 3. Restor fanout and DUT after test Args: duthosts: DUT object enum_dut_portname_module_fixture (str): DUT port name fanouthosts: Fanout objects """ global cadidate_test_ports fanout_original_port_states = {} dutname, portname = decode_dut_port_name(enum_dut_portname_module_fixture) logger.info('Collecting existing port configuration for DUT and fanout...') for duthost in duthosts: if dutname == 'unknown' or dutname == duthost.hostname: all_ports = build_test_candidates(duthost, fanouthosts, portname) # Test all ports takes too much time (sometimes more than an hour), # so we choose 3 ports randomly as the cadidates ports cadidate_test_ports[duthost] = random.sample(all_ports, 3) for _, fanout, fanout_port in cadidate_test_ports[duthost]: auto_neg_mode = fanout.get_auto_negotiation_mode(fanout_port) speed = fanout.get_speed(fanout_port) if not fanout in fanout_original_port_states: fanout_original_port_states[fanout] = {} fanout_original_port_states[fanout][fanout_port] = ( auto_neg_mode, speed) yield logger.info('Recovering port configuration for fanout...') for fanout, port_data in fanout_original_port_states.items(): for port, state in port_data.items(): fanout.set_auto_negotiation_mode(port, state[0]) fanout.set_speed(port, state[1]) logger.info('Recovering port configuration for DUT...') for duthost in duthosts: config_reload(duthost)
def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hwsku_frontend_hostname, fanouthosts, bring_up_dut_interfaces, tbinfo): """ Validates that continuous link flap works as expected Test steps: 1.) Flap all interfaces one by one in 1-3 iteration to cause BGP Flaps. 2.) Flap all interfaces on peer (FanOutLeaf) one by one 1-3 iteration to cause BGP Flaps. 3.) Watch for memory (show system-memory) ,orchagent CPU Utilization and Redis_memory. Pass Criteria: All routes must be re-learned with < 5% increase in Redis and ORCH agent CPU consumption below threshold after 3 mins after stopping flaps. """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] orch_cpu_threshold = request.config.getoption("--orch_cpu_threshold") # Record memory status at start memory_output = duthost.shell("show system-memory")["stdout"] logging.info("Memory Status at start: %s", memory_output) # Record Redis Memory at start start_time_redis_memory = duthost.shell( "redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\\1/'" )["stdout"] logging.info("Redis Memory: %s M", start_time_redis_memory) # Record ipv4 route counts at start sumv4, sumv6 = duthost.get_ip_route_summary() totalsv4 = sumv4.get('Totals', {}) totalsv6 = sumv6.get('Totals', {}) start_time_ipv4_route_counts = totalsv4.get('routes', 0) start_time_ipv6_route_counts = totalsv6.get('routes', 0) logging.info("IPv4 routes: start {}, summary {}".format( start_time_ipv4_route_counts, sumv4)) logging.info("IPv6 routes: start {}, summary {}".format( start_time_ipv6_route_counts, sumv6)) # Make Sure Orch CPU < orch_cpu_threshold before starting test. logging.info( "Make Sure orchagent CPU utilization is less that %d before link flap", orch_cpu_threshold) pytest_assert( wait_until(100, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold), "Orch CPU utilization {} > orch cpu threshold {} before link flap". format( duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'") ["stdout"], orch_cpu_threshold)) # Flap all interfaces one by one on DUT for iteration in range(3): logging.info("%d Iteration flap all interfaces one by one on DUT", iteration + 1) port_toggle(duthost, tbinfo, watch=True) # Flap all interfaces one by one on Peer Device for iteration in range(3): logging.info( "%d Iteration flap all interfaces one by one on Peer Device", iteration + 1) candidates = build_test_candidates(duthost, fanouthosts, 'all_ports') pytest_require( candidates, "Didn't find any port that is admin up and present in the connection graph" ) for dut_port, fanout, fanout_port in candidates: toggle_one_link(duthost, dut_port, fanout, fanout_port, watch=True) config_facts = duthost.get_running_config_facts() for portchannel in config_facts['PORTCHANNEL'].keys(): pytest_assert( check_portchannel_status(duthost, portchannel, "up", verbose=True), "Fail: dut interface {}: link operational down".format( portchannel)) # Make Sure all ipv4/ipv6 routes are relearned with jitter of ~5 if not wait_until(120, 2, 0, check_bgp_routes, duthost, start_time_ipv4_route_counts, start_time_ipv6_route_counts): endv4, endv6 = duthost.get_ip_route_summary() failmsg = [] failmsg.append( "IP routes are not equal after link flap: before ipv4 {} ipv6 {}, after ipv4 {} ipv6 {}" .format(sumv4, sumv6, endv4, endv6)) nei_meta = config_facts.get('DEVICE_NEIGHBOR_METADATA', {}) for k in nei_meta.keys(): nbrhost = nbrhosts[k]['host'] if isinstance(nbrhost, EosHost): res = nbrhost.eos_command(commands=['show ip bgp sum']) elif isinstance(nbrhost, SonicHost): res = nbrhost.command('vtysh -c "show ip bgp sum"') else: res = "" failmsg.append(res['stdout']) pytest.fail(str(failmsg)) # Record memory status at end memory_output = duthost.shell("show system-memory")["stdout"] logging.info("Memory Status at end: %s", memory_output) # Record orchagent CPU utilization at end orch_cpu = duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'")["stdout"] logging.info("Orchagent CPU Util at end: %s", orch_cpu) # Record Redis Memory at end end_time_redis_memory = duthost.shell( "redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\\1/'" )["stdout"] logging.info("Redis Memory at start: %s M", start_time_redis_memory) logging.info("Redis Memory at end: %s M", end_time_redis_memory) # Calculate diff in Redis memory incr_redis_memory = float(end_time_redis_memory) - float( start_time_redis_memory) logging.info("Redis absolute difference: %d", incr_redis_memory) # Check redis memory only if it is increased else default to pass if incr_redis_memory > 0.0: percent_incr_redis_memory = (incr_redis_memory / float(start_time_redis_memory)) * 100 logging.info("Redis Memory percentage Increase: %d", percent_incr_redis_memory) pytest_assert( percent_incr_redis_memory < 5, "Redis Memory Increase more than expected: {}".format( percent_incr_redis_memory)) # Orchagent CPU should consume < orch_cpu_threshold at last. logging.info("watch orchagent CPU utilization when it goes below %d", orch_cpu_threshold) pytest_assert( wait_until(45, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold), "Orch CPU utilization {} > orch cpu threshold {} before link flap". format( duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'") ["stdout"], orch_cpu_threshold))
def test_cont_link_flap(self, request, duthosts, rand_one_dut_hostname, fanouthosts, bring_up_dut_interfaces, tbinfo): """ Validates that continuous link flap works as expected Test steps: 1.) Flap all interfaces one by one in 1-3 iteration to cause BGP Flaps. 2.) Flap all interfaces on peer (FanOutLeaf) one by one 1-3 iteration to cause BGP Flaps. 3.) Watch for memory (show system-memory) ,orchagent CPU Utilization and Redis_memory. Pass Criteria: All routes must be re-learned with < 5% increase in Redis and ORCH agent CPU consumption below threshold after 3 mins after stopping flaps. """ duthost = duthosts[rand_one_dut_hostname] orch_cpu_threshold = request.config.getoption("--orch_cpu_threshold") # Record memory status at start memory_output = duthost.shell("show system-memory")["stdout"] logging.info("Memory Status at start: %s", memory_output) # Record Redis Memory at start start_time_redis_memory = duthost.shell( "redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\\1/'" )["stdout"] logging.info("Redis Memory: %s M", start_time_redis_memory) # Record ipv4 route counts at start start_time_ipv4_route_counts = duthost.shell( "show ip route summary | grep Total | awk '{print $2}'")["stdout"] logging.info("IPv4 routes at start: %s", start_time_ipv4_route_counts) # Record ipv6 route counts at start start_time_ipv6_route_counts = duthost.shell( "show ipv6 route summary | grep Total | awk '{print $2}'" )["stdout"] logging.info("IPv6 routes at start %s", start_time_ipv6_route_counts) # Make Sure Orch CPU < orch_cpu_threshold before starting test. logging.info( "Make Sure orchagent CPU utilization is less that %d before link flap", orch_cpu_threshold) pytest_assert( wait_until(100, 2, check_orch_cpu_utilization, duthost, orch_cpu_threshold), "Orch CPU utilization {} > orch cpu threshold {} before link flap". format( duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'") ["stdout"], orch_cpu_threshold)) # Flap all interfaces one by one on DUT for iteration in range(3): logging.info("%d Iteration flap all interfaces one by one on DUT", iteration + 1) port_toggle(duthost, tbinfo, watch=True) # Flap all interfaces one by one on Peer Device for iteration in range(3): logging.info( "%d Iteration flap all interfaces one by one on Peer Device", iteration + 1) candidates = build_test_candidates(duthost, fanouthosts, 'all_ports') pytest_require( candidates, "Didn't find any port that is admin up and present in the connection graph" ) for dut_port, fanout, fanout_port in candidates: toggle_one_link(duthost, dut_port, fanout, fanout_port, watch=True) # Make Sure all ipv4 routes are relearned with jitter of ~5 logging.info("IPv4 routes at start: %s", start_time_ipv4_route_counts) pytest_assert( wait_until(60, 1, check_bgp_routes, duthost, start_time_ipv4_route_counts, True), "Ipv4 routes are not equal after link flap") # Make Sure all ipv6 routes are relearned with jitter of ~5 logging.info("IPv6 routes at start: %s", start_time_ipv6_route_counts) pytest_assert( wait_until(60, 1, check_bgp_routes, duthost, start_time_ipv6_route_counts), "Ipv6 routes are not equal after link flap") # Record memory status at end memory_output = duthost.shell("show system-memory")["stdout"] logging.info("Memory Status at end: %s", memory_output) # Record orchagent CPU utilization at end orch_cpu = duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'")["stdout"] logging.info("Orchagent CPU Util at end: %s", orch_cpu) # Record Redis Memory at end end_time_redis_memory = duthost.shell( "redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\\1/'" )["stdout"] logging.info("Redis Memory at start: %s M", start_time_redis_memory) logging.info("Redis Memory at end: %s M", end_time_redis_memory) # Calculate diff in Redis memory incr_redis_memory = float(end_time_redis_memory) - float( start_time_redis_memory) logging.info("Redis absolute difference: %d", incr_redis_memory) # Check redis memory only if it is increased else default to pass if incr_redis_memory > 0.0: percent_incr_redis_memory = (incr_redis_memory / float(start_time_redis_memory)) * 100 logging.info("Redis Memory percentage Increase: %d", percent_incr_redis_memory) pytest_assert( percent_incr_redis_memory < 5, "Redis Memory Increase more than expected: {}".format( percent_incr_redis_memory)) # Orchagent CPU should consume < orch_cpu_threshold at last. logging.info("watch orchagent CPU utilization when it goes below %d", orch_cpu_threshold) pytest_assert( wait_until(45, 2, check_orch_cpu_utilization, duthost, orch_cpu_threshold), "Orch CPU utilization {} > orch cpu threshold {} before link flap". format( duthost.shell( "show processes cpu | grep orchagent | awk '{print $9}'") ["stdout"], orch_cpu_threshold))