def _get_vendor_id(duthost): if is_broadcom_device(duthost): vendor_id = "brcm" elif is_mellanox_device(duthost): vendor_id = "mlnx" else: error_message = '"{}" does not currently support swap_syncd'.format(duthost.facts["asic_type"]) logger.error(error_message) raise ValueError(error_message) return vendor_id
def swap_syncd(dut): """ Replaces the running syncd container with the RPC version of it. This will download a new Docker image to the DUT and restart the swss service. Args: dut (SonicHost): The target device. """ if is_broadcom_device(dut): vendor_id = "brcm" elif is_mellanox_device(dut): vendor_id = "mlnx" else: error_message = "\"{}\" is not currently supported".format( dut.facts["asic_type"]) _LOGGER.error(error_message) raise ValueError(error_message) docker_syncd_name = "docker-syncd-{}".format(vendor_id) docker_rpc_image = docker_syncd_name + "-rpc" dut.command("systemctl stop swss") delete_container(dut, "syncd") # Set sysctl RCVBUF parameter for tests dut.command("sysctl -w net.core.rmem_max=609430500") # Set sysctl SENDBUF parameter for tests dut.command("sysctl -w net.core.wmem_max=609430500") # TODO: Getting the base image version should be a common utility output = dut.command( "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version") sonic_version = output["stdout_lines"][0].strip() registry = load_docker_registry_info(dut) download_image(dut, registry, docker_rpc_image, sonic_version) tag_image(dut, "{}:latest".format(docker_syncd_name), "{}/{}".format(registry.host, docker_rpc_image), sonic_version) dut.command("systemctl reset-failed swss") dut.command("systemctl start swss") _LOGGER.info( "swss has been restarted, waiting 60 seconds to initialize...") time.sleep(60)
def restore_default_syncd(dut): """ Replaces the running syncd with the default syncd that comes with the image. This will restart the swss service. Args: dut (SonicHost): The target device. """ if is_broadcom_device(dut): vendor_id = "brcm" elif is_mellanox_device(dut): vendor_id = "mlnx" else: error_message = "\"{}\" is not currently supported".format( dut.facts["asic_type"]) _LOGGER.error(error_message) raise ValueError(error_message) docker_syncd_name = "docker-syncd-{}".format(vendor_id) dut.command("systemctl stop swss") delete_container(dut, "syncd") # TODO: Getting the base image version should be a common utility output = dut.command( "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version") sonic_version = output["stdout_lines"][0].strip() tag_image(dut, "{}:latest".format(docker_syncd_name), docker_syncd_name, sonic_version) dut.command("systemctl reset-failed swss") dut.command("systemctl start swss") _LOGGER.info( "swss has been restarted, waiting 60 seconds to initialize...") time.sleep(60) # Remove the RPC image from the DUT docker_rpc_image = docker_syncd_name + "-rpc" registry = load_docker_registry_info(dut) dut.command("docker rmi {}/{}:{}".format(registry.host, docker_rpc_image, sonic_version))
def _get_sai_running_vendor_id(duthost): """ Get the vendor id. Args: duthost (SonicHost): The target device. """ if is_broadcom_device(duthost): vendor_id = "brcm" elif is_mellanox_device(duthost): vendor_id = "mlnx" elif is_barefoot_device(duthost): vendor_id = "bfn" else: error_message = '"{}" does not currently support saitest'.format(duthost.facts["asic_type"]) logger.error(error_message) raise ValueError(error_message) return vendor_id
def get_manufacturer_program_to_check(duthost): if is_mellanox_device(duthost): return CounterpollConstants.SX_SDK
def analyze_log_file(duthost, messages, result, offset_from_kexec): service_restart_times = dict() derived_patterns = OTHER_PATTERNS.get("COMMON") service_patterns = dict() # get platform specific regexes if is_broadcom_device(duthost): derived_patterns.update(OTHER_PATTERNS.get("BRCM")) elif is_mellanox_device(duthost): derived_patterns.update(OTHER_PATTERNS.get("MLNX")) # get image specific regexes if "20191130" in duthost.os_version: derived_patterns.update(OTHER_PATTERNS.get("201911")) service_patterns.update(SERVICE_PATTERNS.get("201911")) else: derived_patterns.update(OTHER_PATTERNS.get("LATEST")) service_patterns.update(SERVICE_PATTERNS.get("LATEST")) if not messages: logging.error("Expected messages not found in syslog") return None def service_time_check(message, status): time = datetime.strptime( message.split(duthost.hostname)[0].strip(), FMT) time = time.strftime(FMT) service_name = message.split(status + " ")[1].split()[0] service_name = service_name.upper() if service_name == "ROUTER": service_name = "RADV" service_dict = service_restart_times.get(service_name, {"timestamp": {}}) timestamps = service_dict.get("timestamp") if status in timestamps: service_dict[status + " count"] = service_dict.get(status + " count", 1) + 1 timestamps[status] = time service_restart_times.update({service_name: service_dict}) reboot_time = "N/A" for message in messages: # Get stopping to started timestamps for services (swss, bgp, etc) for status, pattern in service_patterns.items(): if re.search(pattern, message): service_time_check(message, status) break # Get timestamps of all other entities for state, pattern in derived_patterns.items(): if re.search(pattern, message): timestamp = datetime.strptime( message.split(duthost.hostname)[0].strip(), FMT) state_name = state.split("|")[0].strip() if state_name + "|End" not in derived_patterns.keys(): state_times = get_state_times(timestamp, state, offset_from_kexec) offset_from_kexec.update(state_times) else: state_times = get_state_times(timestamp, state, service_restart_times) service_restart_times.update(state_times) break # Calculate time that services took to stop/start for _, timings in service_restart_times.items(): timestamps = timings["timestamp"] timings["stop_time"] = (datetime.strptime(timestamps["Stopped"], FMT) -\ datetime.strptime(timestamps["Stopping"], FMT)).total_seconds() \ if "Stopped" in timestamps and "Stopping" in timestamps else None timings["start_time"] = (datetime.strptime(timestamps["Started"], FMT) -\ datetime.strptime(timestamps["Starting"], FMT)).total_seconds() \ if "Started" in timestamps and "Starting" in timestamps else None if "Started" in timestamps and "Stopped" in timestamps: timings["time_span"] = (datetime.strptime(timestamps["Started"], FMT) -\ datetime.strptime(timestamps["Stopped"], FMT)).total_seconds() elif "Start" in timestamps and "End" in timestamps: if "last_occurence" in timings: timings["time_span"] = (datetime.strptime(timings["last_occurence"], FMT) -\ datetime.strptime(timestamps["Start"], FMT)).total_seconds() else: timings["time_span"] = (datetime.strptime(timestamps["End"], FMT) -\ datetime.strptime(timestamps["Start"], FMT)).total_seconds() result["time_span"].update(service_restart_times) result["offset_from_kexec"] = offset_from_kexec return result
def swap_syncd(dut, creds): """ Replaces the running syncd container with the RPC version of it. This will download a new Docker image to the DUT and restart the swss service. Args: dut (SonicHost): The target device. """ if is_broadcom_device(dut): vendor_id = "brcm" elif is_mellanox_device(dut): vendor_id = "mlnx" else: error_message = "\"{}\" is not currently supported".format( dut.facts["asic_type"]) _LOGGER.error(error_message) raise ValueError(error_message) docker_syncd_name = "docker-syncd-{}".format(vendor_id) docker_rpc_image = docker_syncd_name + "-rpc" dut.command("config bgp shutdown all" ) # Force image download to go through mgmt network dut.command("systemctl stop swss") delete_container(dut, "syncd") # Set sysctl RCVBUF parameter for tests dut.command("sysctl -w net.core.rmem_max=609430500") # Set sysctl SENDBUF parameter for tests dut.command("sysctl -w net.core.wmem_max=609430500") # TODO: Getting the base image version should be a common utility output = dut.command( "sonic-cfggen -y /etc/sonic/sonic_version.yml -v build_version") sonic_version = output["stdout_lines"][0].strip() def ready_for_swap(): syncd_status = dut.command("docker ps -f name=syncd")["stdout_lines"] if len(syncd_status) > 1: return False swss_status = dut.command("docker ps -f name=swss")["stdout_lines"] if len(swss_status) > 1: return False bgp_summary = dut.command("show ip bgp summary")["stdout_lines"] idle_count = 0 expected_idle_count = 0 for line in bgp_summary: if "Idle (Admin)" in line: idle_count += 1 if "Total number of neighbors" in line: tokens = line.split() expected_idle_count = int(tokens[-1]) return idle_count == expected_idle_count pytest_assert(wait_until(30, 3, ready_for_swap), "Docker and/or BGP failed to shut down") registry = load_docker_registry_info(dut, creds) download_image(dut, registry, docker_rpc_image, sonic_version) tag_image(dut, "{}:latest".format(docker_syncd_name), "{}/{}".format(registry.host, docker_rpc_image), sonic_version) _LOGGER.info("Reloading config and restarting swss...") config_reload(dut)
def test_nhop(request, duthost, tbinfo): """ Test next hop group resource count. Steps: - Add test IP address to an active IP interface - Add static ARPs - Create unique next hop groups - Add IP route and nexthop - check CRM resource - clean up - Verify no erros and crash """ skip_release(duthost, ["201811", "201911"]) default_max_nhop_paths = 32 nhop_group_limit = 1024 # program more than the advertised limit extra_nhops = 10 asic = duthost.asic_instance() # find out MAX NHOP group count supported on the platform result = asic.run_redis_cmd( argv=["redis-cli", "-n", 6, "HGETALL", "SWITCH_CAPABILITY|switch"]) it = iter(result) switch_capability = dict(zip(it, it)) max_nhop = switch_capability.get("MAX_NEXTHOP_GROUP_COUNT") max_nhop = nhop_group_limit if max_nhop == None else int(max_nhop) nhop_group_count = min(max_nhop, nhop_group_limit) + extra_nhops # find out an active IP port ip_ifaces = asic.get_active_ip_interfaces(tbinfo).keys() pytest_assert(len(ip_ifaces), "No IP interfaces found") eth_if = ip_ifaces[0] # Generate ARP entries arp_count = 40 arplist = Arp(duthost, asic, arp_count, eth_if) arplist.arps_add() # indices indices = range(arp_count) ip_indices = combinations(indices, default_max_nhop_paths) # intitialize log analyzer marker = "NHOP TEST PATH COUNT {} {}".format(nhop_group_count, eth_if) loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=marker) marker = loganalyzer.init() loganalyzer.load_common_config() loganalyzer.expect_regex = [] loganalyzer.ignore_regex.extend(loganalyzer_ignore_regex_list()) ip_prefix = ipaddr.IPAddress("192.168.0.0") # list of all IPs available to generate a nexthop group ip_list = arplist.ip_mac_list crm_before = get_crm_info(duthost, asic) # increase CRM polling time asic.command("crm config polling interval 10") logging.info("Adding {} next hops on {}".format(nhop_group_count, eth_if)) # create nexthop group nhop = IPRoutes(duthost, asic) try: for i, indx_list in zip(range(nhop_group_count), ip_indices): # get a list of unique group of next hop IPs ips = [arplist.ip_mac_list[x].ip for x in indx_list] ip_route = "{}/31".format(ip_prefix + (2 * i)) # add IP route with the next hop group created nhop.add_ip_route(ip_route, ips) nhop.program_routes() # wait for routes to be synced and programmed time.sleep(120) crm_after = get_crm_info(duthost, asic) finally: nhop.delete_routes() arplist.clean_up() asic.command("crm config polling interval {}".format( crm_before["polling"])) # check for any errors or crash loganalyzer.analyze(marker) # verify the test used up all the NHOP group resources # skip this check on Mellanox as ASIC resources are shared if not is_mellanox_device(duthost): pytest_assert( crm_after["available"] == 0, "Unused NHOP group resource: {}, used:{}".format( crm_after["available"], crm_after["used"]))