def bsd_flow_ctrl(node, inf, fc_type, fc_val, pattern): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) api.Trigger_AddHostCommand(req, node, 'sysctl dev.%s.flow_ctrl=%d' % (host.GetNaplesSysctl(inf), fc_type)) api.Trigger_AddHostCommand(req, node, 'sysctl dev.%s.link_pause=%d' % (host.GetNaplesSysctl(inf), fc_val)) api.Trigger_AddHostCommand(req, node, BSD_IFCONFIG_MEDIA_CMD % inf) api.Logger.info("Setting %s link type: %d value: %d pattern: %s" % (inf, fc_type, fc_val, pattern)) resp = api.Trigger(req) if resp is None: return -1 # We are interested in only last command response. cmd = resp.commands[2] if cmd.exit_code != 0: api.Logger.error("Failed exit code: %d link type: %d value: %d, stderr: %s" % (cmd.exit_code, fc_type, fc_val, cmd.stderr)) api.PrintCommandResults(cmd) return -1 if cmd.stdout.find("[\n\t]*" + pattern + "[\n\t]*") != -1: api.Logger.error("Failed link type: %d value: %d, stdout: %s" % (cmd.exit_code, fc_type, fc_val, cmd.stdout)) api.PrintCommandResults(cmd) return -1 return 0
def Trigger(tc): if tc.os != host.OS_TYPE_BSD: api.Logger.info("test not supported yet for os %s" %tc.os) return api.types.status.SUCCESS req = api.Trigger_CreateExecuteCommandsRequest(serial = True) tc.cmd_cookies = [] for n in tc.nodes: intfs = api.GetNaplesHostInterfaces(n) for i in intfs: api.Logger.info("getting Media status info from %s" % i) api.Trigger_AddHostCommand(req, n, "sysctl dev.%s.media_status" % host.GetNaplesSysctl(i)) tc.cmd_cookies.append('before') api.Trigger_AddHostCommand(req, n, "sysctl dev.%s.reset=1" % host.GetNaplesSysctl(i)) api.Trigger_AddHostCommand(req, n, "sysctl dev.%s.media_status" % host.GetNaplesSysctl(i)) tc.cmd_cookies.append('after') tc.resp = api.Trigger(req) if tc.resp == None: return api.types.status.FAILURE for cmd in tc.resp.commands: if cmd.exit_code != 0: api.Logger.error("Failed to get media status info (check if cable is plugged in)") api.Logger.info(cmd.stderr) return api.types.status.FAILURE return api.types.status.SUCCESS
def do_lif_reset_test(node, os): for i in range(3): api.Logger.info("LIF reset and driver reload test loop %d" % i) if host.UnloadDriver(os, node, "all") is api.types.status.FAILURE: api.Logger.error("ionic unload failed loop %d" % i) return api.types.status.FAILURE if host.LoadDriver(os, node) is api.types.status.FAILURE: api.Logger.error("ionic load failed loop %d" % i) return api.types.status.FAILURE wl_api.ReAddWorkloads(node) if api.GetNaplesHostInterfaces(node) is None: api.Logger.error("No ionic interface after loop %d" % i) return api.types.status.FAILURE for intf in api.GetNaplesHostInterfaces(node): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) vlan_list = getVlanList(node, intf) filter_list = getFilterList(node, intf) # Single LIF reset api.Trigger_AddHostCommand( req, node, "sysctl dev.%s.reset=1" % (host.GetNaplesSysctl(intf))) resp = api.Trigger(req) time.sleep(5) vlan_list1 = getVlanList(node, intf) filter_list1 = getFilterList(node, intf) if vlan_list != vlan_list1: api.Logger.error( "VLAN list doesn't match for %s, before: %s after: %s" % (intf, str(vlan_list), str(vlan_list1))) return api.types.status.FAILURE if filter_list != filter_list1: api.Logger.error( "Filter list doesn't match for %s, before: %s after: %s" % (intf, str(filter_list), str(filter_list1))) return api.types.status.FAILURE api.Logger.info( "Success running LIF reset test on %s VLAN: %s, Filters; %s" % (intf, str(vlan_list), str(filter_list))) # Now stress test LIF reset for intf in api.GetNaplesHostInterfaces(node): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) api.Trigger_AddHostCommand( req, node, "for ((i=0;i<10;i++)); do sysctl dev.%s.reset=1; done &" % (host.GetNaplesSysctl(intf))) # Some of LIF reset will fill fail since it will be running in background # with reload of driver. resp = api.Trigger(req) return api.types.status.SUCCESS
def getBsdStats(node, intf, pattern): stats_map = [] cmd = 'sysctl dev.' + host.GetNaplesSysctl(intf) + ' | grep -e ' + pattern + ' | cut -d ":" -f 2' req = api.Trigger_CreateExecuteCommandsRequest(serial=True) #api.Logger.info("Getting sysctl map for: %s on host: %s intf: %s" # %(cmd, node.node_name, intf)) api.Trigger_AddHostCommand(req, node.node_name, cmd) resp = api.Trigger(req) if resp is None: api.Logger.error("Failed to run: %s on host: %s intf: %s" %(cmd, node.node_name, intf)) return None cmd = resp.commands[0] if cmd.exit_code != 0: api.Logger.error( "Failed to run: %s for host: %s, stderr: %s" %(cmd, node.node_name, cmd.stderr)) api.PrintCommandResults(cmd) return None if cmd.stdout == "": api.Logger.error("Output is empty for: %s on host: %s intf: %s" %(cmd, node.node_name, intf)) api.PrintCommandResults(cmd) return None stats_map = cmd.stdout.splitlines() stats_map = list(map(int,stats_map)) return stats_map
def Trigger(tc): if tc.os != host.OS_TYPE_BSD: api.Logger.info("Not implemented") return api.types.status.IGNORED for node in tc.nodes: req = api.Trigger_CreateExecuteCommandsRequest(serial=True) for i in api.GetNaplesHostInterfaces(node): api.Trigger_AddHostCommand( req, node, 'sysctl dev.%s.reset_stats=1' % host.GetNaplesSysctl(i)) api.Trigger_AddHostCommand( req, node, 'sysctl dev.%s | grep -v ": 0"' % host.GetNaplesSysctl(i)) tc.resp = api.Trigger(req) return api.types.status.SUCCESS
def Main(tc): for node in api.GetNaplesHostnames(): ionic_utils.checkForIonicError(node) for dev_name in api.GetDeviceNames(node): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) for i in api.GetNaplesHostInterfaces(node, dev_name): os = api.GetNodeOs(node) if os == host.OS_TYPE_BSD: api.Trigger_AddHostCommand( req, node, "bash " + IONIC_STATS_FILE + " -i %s -c" % (host.GetNaplesSysctl(i))) # Clear the stats. api.Trigger_AddHostCommand( req, node, 'sysctl dev.%s.reset_stats=1 1>/dev/null' % host.GetNaplesSysctl(i)) elif os == host.OS_TYPE_WINDOWS: intf = workload_api.GetNodeInterface(node, dev_name) name = intf.WindowsIntName(i) api.Trigger_AddHostCommand( req, node, "/mnt/c/Windows/temp/drivers-windows/IonicConfig.exe portstats -n '%s'" % name) else: api.Trigger_AddHostCommand( req, node, 'ethtool -S %s | grep packets' % i) resp = api.Trigger(req) if resp is None: api.Logger.error( "Failed to get stats for %s, is driver loaded?" % i) return api.types.status.FAILURE for cmd in resp.commands: if cmd.exit_code == 0: if cmd.stdout: #Log for debugging for now. api.Logger.info("Stats output for %s: %s" % (i, cmd.stdout)) else: api.Logger.error("Command failed to run: %s" % cmd.stderr) return api.types.status.FAILURE return api.types.status.SUCCESS
def Verify(tc): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) coales_period = tc.args.coales_period if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # get the current coalescing value from FW/Driver api.Logger.info("Retrieve coalescing value from interfaces") for wl in tc.workloads: # TODO: Maybe revisit this. Ignore 802.1q vlan workloads for now. if wl.interface_type == topo_svc.INTERFACE_TYPE_VSS: api.Logger.info("Verify: Skipping vlan workload") continue if tc.os == 'linux': api.Trigger_AddCommand(req, wl.node_name, wl.workload_name, \ "ethtool -c %s" % wl.interface) elif tc.os == 'freebsd': api.Trigger_AddHostCommand(req, wl.node_name, wl.workload_name, \ "sysctl dev.%s.curr_coal_us" % \ (host.GetNaplesSysctl(wl.interface))) tc.resp = api.Trigger(req) if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # expecting the following value back from FW/Driver if tc.os == 'linux': # linux driver returns coalescing interval as uSecs # 3 is Naples interrupt period current_coalescing = str(int(tc.iterators.coales_interval/coales_period) \ *coales_period) elif tc.os == 'freebsd': # freebsd returns coalescing value, same as # what user programmed. current_coalescing = str(int(tc.iterators.coales_interval)) for cmd in tc.resp.commands: if cmd.exit_code != 0: api.Logger.error("Failed to read interrupt coalescing value") api.Logger.info(cmd.stderr) return api.types.status.FAILURE # for all values < max, validate returned value if tc.iterators.coales_interval < tc.args.max_coales_interval: api.Logger.info("Expecting Coalescing Value: ", current_coalescing) if cmd.stdout.find(current_coalescing) == -1: api.Logger.info("Failed to set coalescing value") api.PrintCommandResults(cmd) return api.types.status.FAILURE return api.types.status.SUCCESS
def get_rdmaresets(req, node, intf, tc): if tc.os == host.OS_TYPE_BSD: cmd = 'sysctl -n dev.' + host.GetNaplesSysctl(intf) + '.rdma.info.reset_cnt' else: pci = tc.pci[(node,intf)] if pci is None: cmd = 'echo 0' else: cmd = 'grep reset_cnt /sys/kernel/debug/ionic/' + pci + '/lif0/rdma/info | cut -f 2' return api.Trigger_AddHostCommand(req, node, cmd)
def set_rdma_lif_reset(req, node, intf, tc): if tc.os == host.OS_TYPE_BSD: cmd = 'sysctl dev.' + host.GetNaplesSysctl(intf) + '.rdma.reset=1' else: pci = tc.pci[(node,intf)] if pci is None: cmd = 'echo 0' else: cmd = 'echo 1 > /sys/kernel/debug/ionic/' + pci + '/lif0/rdma/reset' return api.Trigger_AddHostCommand(req, node, cmd)
def Trigger(tc): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) if tc.os != host.OS_TYPE_BSD and tc.os != host.OS_TYPE_LINUX: api.Logger.info("Not implemented for %s" % tc.os) return api.types.status.IGNORED # set interrupt coalescing value for node in tc.nodes: interfaces = api.GetNaplesHostInterfaces(node) for interface in interfaces: api.Logger.info("Set Interrupt Coalescing on %s:%s to %d" % \ (node, interface, \ tc.iterators.coales_interval)) if tc.os == 'linux': api.Trigger_AddHostCommand(req, node, "ethtool -C %s rx-usecs %d" % \ (interface, \ tc.iterators.coales_interval)) elif tc.os == 'freebsd': api.Trigger_AddHostCommand(req, node, "sysctl dev.%s.intr_coal=%d" % \ (host.GetNaplesSysctl(interface), \ tc.iterators.coales_interval)) tc.resp = api.Trigger(req) if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # validate the command response # for > than max, expect an error and a specific message for cmd in tc.resp.commands: if tc.iterators.coales_interval < tc.args.max_coales_interval: if cmd.exit_code != 0: #linux ethtool will not set the value if same as current if cmd.stderr.find("unmodified, ignoring") == -1: api.Logger.error("Failed to set interrupt coalescing") api.Logger.info(cmd.stderr) return api.types.status.FAILURE else: if tc.os == 'linux': if cmd.stderr.find("out of range") == -1: api.Logger.error("ionic did not error when coales value set (%d) > than supported (%d)" \ %(tc.iterators.coales_interval, tc.args.max_coales_interval)) api.Logger.info(cmd.stderr) return api.types.status.FAILURE elif tc.os == 'freebsd': if cmd.stderr.find("large") == -1: api.Logger.error( "ionic did not error when coales value set > than supported" ) api.Logger.info(cmd.stderr) return api.types.status.FAILURE return api.types.status.SUCCESS
def Trigger(tc): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) # set interrupt coalescing value for wl in tc.workloads: # TODO: Maybe revisit this. Ignore 802.1q vlan workloads for now. if wl.interface_type == topo_svc.INTERFACE_TYPE_VSS: api.Logger.info("Set Interrupt Coalescing: Skipping vlan workload") continue api.Logger.info("Set Interrupt Coalescing on %s:%s:%s to %d" % \ (wl.node_name, wl.workload_name, wl.interface, \ tc.iterators.coales_interval)) if tc.os == 'linux': api.Trigger_AddCommand(req, wl.node_name, wl.workload_name, \ "ethtool -C %s rx-usecs %d" % \ (wl.interface, tc.iterators.coales_interval)) elif tc.os == 'freebsd': api.Trigger_AddCommand(req, wl.node_name, wl.workload_name, \ "sysctl dev.%s.intr_coal=%d" % \ (host.GetNaplesSysctl(wl.interface), \ tc.iterators.coales_interval)) tc.resp = api.Trigger(req) if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # validate the command response # for > than max, expect an error and a specific message for cmd in tc.resp.commands: if tc.iterators.coales_interval < tc.args.max_coales_interval: if cmd.exit_code != 0: #linux ethtool will not set the value if same as current if cmd.stderr.find("unmodified, ignoring") == -1: api.Logger.error("Failed to set interrupt coalescing") api.Logger.info(cmd.stderr) return api.types.status.FAILURE else: if tc.os == 'linux': if cmd.stderr.find("out of range") == -1: api.Logger.error("ionic did not error when coales value set (%d) > than supported %d)" \ % (tc.iterators.coales_interval, tc.args.max_coales_interval)) api.Logger.info(cmd.stderr) return api.types.status.FAILURE elif tc.os == 'freebsd': if cmd.stderr.find("large") == -1: api.Logger.error( "ionic did not error when coales value set > than supported" ) api.Logger.info(cmd.stderr) return api.types.status.FAILURE return api.types.status.SUCCESS
def Verify(tc): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) coales_period = tc.args.coales_period if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # get the current coalescing value from FW/Driver for n in tc.nodes: intfs = api.GetNaplesHostInterfaces(n) api.Logger.info("Retrieve coalescing value from interfaces") for i in intfs: if tc.os == 'linux': api.Trigger_AddHostCommand(req, n, "ethtool -c %s" % i) elif tc.os == 'freebsd': api.Trigger_AddHostCommand(req, n, "sysctl dev.%s.curr_coal_us" % \ (host.GetNaplesSysctl(i))) tc.resp = api.Trigger(req) if tc.resp is None: api.Logger.error("Command failed to respond") return api.types.status.FAILURE # expecting the following value back from FW/Driver if tc.os == 'linux': # linux driver returns coalescing interval as uSecs # 3 is Naples interrupt period current_coalescing = str(int(tc.iterators.coales_interval/coales_period) \ *coales_period) elif tc.os == 'freebsd': # freebsd returns coalescing value, same as # what user programmed. current_coalescing = str(int(tc.iterators.coales_interval)) for cmd in tc.resp.commands: if cmd.exit_code != 0: api.Logger.error("Failed to read interrupt coalescing value") api.Logger.info(cmd.stderr) return api.types.status.FAILURE # for all values < max, validate returned value if tc.iterators.coales_interval < tc.args.max_coales_interval: api.Logger.info("Expecting Coalescing Value: ", current_coalescing) if cmd.stdout.find(current_coalescing) == -1: api.Logger.info("Failed to set coalescing value") api.PrintCommandResults(cmd) return api.types.status.FAILURE return api.types.status.SUCCESS
def Trigger(tc): if tc.os != host.OS_TYPE_BSD: api.Logger.info("Not implemented") return api.types.status.IGNORED # Unload ionic and ionic_fw for node in tc.nodes: host.UnloadDriver(tc.os, node, "all") host.UnloadDriver(tc.os, node, "ionic_fw") for node in tc.nodes: req = api.Trigger_CreateExecuteCommandsRequest(serial=True) # XXX: Find the Naples_fw.tar version. api.Trigger_AddHostCommand( req, node, "kenv hw.ionic.fw_update_ver=FILL_FW_VERSION") resp = api.Trigger(req) if resp is None: api.Logger.info("Failed kenv hw.ionic.fw_update_ver=X") return api.types.status.FAILURE if host.LoadDriver(tc.os, node) is api.types.status.FAILURE: api.Logger.info("ionic already loaded") return api.types.status.FAILURE if LoadFwDriver(tc.os, node) is api.types.status.FAILURE: return api.types.status.FAILURE for i in api.GetNaplesHostInterfaces(node): # # In local testing, this step completes in 35-40s, but the default # timeout is 30s. Therefore, increase the timeout to 60s. # # The iota logs may contain messages such as "CHECK_ERR: Nicmgr # crashed for host: node2?" Please note, this is due to finding # the string "fw heartbeat stuck" in the host dmesg. This is # currently the expected behavior when doing fw update. If nicmgr # does crash, than expect subsequent tests to fail, otherwise the # CHECK_ERR message in the iota test logs may be ignored. # api.Trigger_AddHostCommand(req, node, "sysctl dev.%s.fw_update=1" % host.GetNaplesSysctl(i), timeout=60) tc.resp = api.Trigger(req) return api.types.status.SUCCESS
def grep_qps(tc): req = api.Trigger_CreateExecuteCommandsRequest(serial=True) for n in tc.nodes: for intf in api.GetNaplesHostInterfaces(n): if tc.os == host.OS_TYPE_LINUX: pci = host.GetNaplesPci(n, intf) if pci is None: continue else: sysctl = host.GetNaplesSysctl(intf) if tc.os == host.OS_TYPE_LINUX: cmd = ( "grep qpid /sys/kernel/debug/ionic/{}/lif0/rdma/qp/*/info". format(pci)) else: cmd = ("sysctl dev.{}.rdma.qp | grep qpid".format(sysctl)) api.Trigger_AddHostCommand(req, n, cmd) resp = api.Trigger(req) for cmd in resp.commands: api.PrintCommandResults(cmd)
def getFilterList(node, intf): host_cmd = "sysctl dev." + host.GetNaplesSysctl( intf) + ".filters | grep MAC | cut -d ' ' -f 2" req = api.Trigger_CreateExecuteCommandsRequest(serial=True) api.Trigger_AddHostCommand(req, node, host_cmd) resp = api.Trigger(req) if resp is None: api.Logger.error("Failed to run host cmd: %s on host: %s" % (host_cmd, node)) return None cmd = resp.commands[0] if cmd.exit_code != 0: api.Logger.error("HOST CMD: %s failed on host: %s," % (host_cmd, node)) api.PrintCommandResults(cmd) return None output = cmd.stdout.splitlines() filter_list = list(map(str, output)) return filter_list
def Trigger(tc): #============================================================== # trigger the commands #============================================================== req = api.Trigger_CreateExecuteCommandsRequest(serial = True) if tc.os != 'freebsd': api.Logger.info("Not FreeBSD - unsupported configuration") return api.types.status.DISABLED if tc.w[0].IsNaples(): if tc.w[1].IsNaples(): tc.server_idx = 2 tc.client_idx = 3 else: tc.server_idx = 3 tc.client_idx = 2 w1 = tc.w[0] w2 = tc.w[1] else: if tc.w[1].IsNaples(): tc.server_idx = 2 tc.client_idx = 3 else: api.Logger.info("No naples - unsupported configuration") return api.types.status.DISABLED w1 = tc.w[1] w2 = tc.w[0] ws = tc.w[tc.server_idx] wc = tc.w[tc.client_idx] if hasattr(tc.args, 'class_type'): tc.class_type = int(getattr(tc.args, 'class_type')) if tc.class_type != 1 and tc.class_type != 2: api.Logger.error("invalid class_type passed: {}".format(tc.class_type)) return api.types.status.FAILURE else: api.Logger.error("mandatory argument class_type not passed") return api.types.status.FAILURE num_rdma_cps = getattr(tc.args, 'num_rdma_cps', 0) rdma_cps = getattr(tc.args, 'rdma_cps', None) if num_rdma_cps != 0 and rdma_cps == None: api.Logger.error("num_rdma_cps is non zero but no rdma cps passed") return api.types.status.FAILURE num_iperf_cps = getattr(tc.args, 'num_iperf_cps', 0) iperf_cps = getattr(tc.args, 'iperf_cps', None) if num_iperf_cps != 0 and iperf_cps == None: api.Logger.error("num_iperf_cps is non zero but no iperf cps passed") return api.types.status.FAILURE # Run iperf tests first. All in background. for i_iperf in range(num_iperf_cps): iperf_cp = iperf_cps[i_iperf] qos.TriggerTrafficTest(req, tc, ws, wc, 2, iperf_cp, True) # Run the RDMA tests. All in background except the last one. for i_rdma in range(num_rdma_cps-1): rdma_cp = rdma_cps[i_rdma] qos.TriggerTrafficTest(req, tc, ws, wc, 1, rdma_cp, True) # Run the last RDMA test in the foreground if num_rdma_cps != 0: rdma_cp = rdma_cps[num_rdma_cps-1] qos.TriggerTrafficTest(req, tc, ws, wc, 1, rdma_cp, False) # print the next_qpid for w in [w1, w2]: if not w.IsNaples(): continue if tc.os == host.OS_TYPE_BSD: api.Logger.info("{}".format(w.interface)) cmd = 'sysctl dev.' + host.GetNaplesSysctl(w.interface) + '.rdma.info.next_qpid' elif tc.os == host.OS_TYPE_LINUX: pci = host.GetNaplesPci(w.node_name, w.interface) if pci is None: continue cmd = 'grep next_qpid /sys/kernel/debug/ionic/' + pci + '/lif0/rdma/info' else: continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, cmd) tc.cmd_cookies.append(cmd) # Sleep for a while for all the tests to complete cmd = 'sleep 5' api.Trigger_AddCommand(req, ws.node_name, ws.workload_name, cmd) tc.cmd_cookies.append(cmd) #============================================================== # trigger the request #============================================================== trig_resp = api.Trigger(req) term_resp = api.Trigger_TerminateAllCommands(trig_resp) tc.resp = api.Trigger_AggregateCommandsResponse(trig_resp, term_resp) return api.types.status.SUCCESS
def get_lifresets(req, n, i): return api.Trigger_AddHostCommand(req, n, \ "sysctl -n dev.%s.lif_resets" % host.GetNaplesSysctl(i))
def enable_adq_hb_interval(req, n, i): return api.Trigger_AddHostCommand(req, n, \ "sysctl dev.%s.adq.hb_interval=1500" % host.GetNaplesSysctl(i))
def enable_txq_timeout(req, n, i): return api.Trigger_AddHostCommand(req, n, \ "sysctl dev.%s.txq_wdog_timeout=5000" % host.GetNaplesSysctl(i))
def Trigger(tc): #============================================================== # trigger the commands #============================================================== req = api.Trigger_CreateExecuteCommandsRequest(serial=True) # Populate bw lookup table - manual entry to speed up development bw_dict = {} bw_dict[(1, 4096)] = 10 bw_dict[(1, 8192)] = 10 bw_dict[(1, 65536)] = 50 bw_dict[(2, 4000)] = 10 bw_dict[(2, 4096)] = 10 bw_dict[(2, 8192)] = 10 bw_dict[(2, 16384)] = 10 bw_dict[(2, 32768)] = 30 bw_dict[(2, 65536)] = 50 bw_dict[(2, 8000)] = 10 bw_dict[(2, 16000)] = 10 bw_dict[(2, 32000)] = 30 bw_dict[(2, 64000)] = 50 bw_dict[(3, 4095)] = 5 bw_dict[(3, 3072)] = 5 bw_dict[(3, 3000)] = 5 bw_dict[(3, 12288)] = 10 bw_dict[(3, 24576)] = 20 bw_dict[(3, 12000)] = 10 bw_dict[(3, 24000)] = 20 bw_dict[(4, 4000)] = 5 bw_dict[(4, 4096)] = 5 bw_dict[(4, 8192)] = 10 bw_dict[(4, 16384)] = 10 bw_dict[(4, 32768)] = 30 bw_dict[(4, 65536)] = 50 bw_dict[(4, 16000)] = 10 bw_dict[(4, 32000)] = 30 bw_dict[(4, 64000)] = 50 bw_dict[(5, 20480)] = 20 bw_dict[(5, 20000)] = 10 bw_dict[(5, 10000)] = 5 bw_dict[(6, 12288)] = 10 bw_dict[(6, 24576)] = 20 bw_dict[(6, 24000)] = 20 bw_dict[(7, 28672)] = 20 bw_dict[(7, 28000)] = 30 bw_dict[(7, 7700)] = 4 bw_dict[(8, 16384)] = 5 bw_dict[(8, 32768)] = 10 bw_dict[(8, 65536)] = 10 bw_dict[(8, 32000)] = 10 bw_dict[(8, 64000)] = 10 #============================================================== # init cmd options #============================================================== iter_opt = ' -n 10 ' misc_opt = ' -F --report_gbits ' cm_opt = '' enable_dcqcn = False transport_opt = '' msg_size = 65536 size_opt = ' -a ' mtu_opt = ' -m 4096 ' qp_opt = '' numsges_opt = '' bidir_opt = '' rxdepth_opt = '' txdepth_opt = '' atomic_opt = '' tc.client_bkg = False s_port = 12340 e_port = s_port + 1 server_idx = 0 client_idx = 1 bkg_timeout = 130 sq_drain_opt = '' async_event_stats_opt = '' bw_opt = '' port_flap = False tc.tcpdump = False #============================================================== # update non-default cmd options #============================================================== # if use both duration '-D' and count '-n', count will take precedence if hasattr(tc.iterators, 'duration'): iter_opt = ' -D {} '.format(tc.iterators.duration) # For scale tests, we noticed all 8 threads not started early, # so need to give extra timeout bkg_timeout = tc.iterators.duration + 60 if hasattr(tc.iterators, 'count'): iter_opt = ' -n {} '.format(tc.iterators.count) if getattr(tc.iterators, 'rdma_cm', None) == 'yes': cm_opt = ' -R ' if getattr(tc.iterators, 'transport', None) == 'UD': transport_opt = ' -c UD ' if hasattr(tc.iterators, 'size'): msg_size = int(tc.iterators.size) size_opt = ' -s {} '.format(msg_size) if hasattr(tc.iterators, 'mtu'): mtu_opt = ' -m {} '.format(tc.iterators.mtu) numsges = getattr(tc.iterators, 'numsges', 1) if numsges > 1: numsges_opt = ' -W {} '.format(numsges) num_qp = getattr(tc.iterators, 'num_qp', 1) if num_qp > 1: qp_opt = ' -q {} '.format(num_qp) num_threads = getattr(tc.iterators, 'threads', 1) if num_threads > 1: tc.client_bkg = True e_port = s_port + tc.iterators.threads if getattr(tc.iterators, 'server', None) == 'no': server_idx = 1 client_idx = 0 if getattr(tc.iterators, 'bidir', None) == 'yes': bidir_opt = ' -b ' if hasattr(tc.iterators, 'rxdepth'): rxdepth_opt = ' -r {} '.format(tc.iterators.rxdepth) if hasattr(tc.iterators, 'txdepth'): txdepth_opt = ' -t {} '.format(tc.iterators.txdepth) if getattr(tc.iterators, 'cmp_swp', None) == 'yes': atomic_opt = ' -A CMP_AND_SWAP ' if getattr(tc.iterators, 'enable_dcqcn', None) == 'yes': enable_dcqcn = True if getattr(tc.iterators, 'sq_drain', None) == 'yes': sq_drain_opt = ' --sq-drain ' if getattr(tc.iterators, 'async_event_stats', None) == 'yes': async_event_stats_opt = ' --report-async-ev-stats ' if getattr(tc.iterators, 'check_bw', None) == 'yes' and \ num_qp == 1 and \ (numsges, msg_size) in bw_dict: bw_opt = ' -w {} '.format( math.ceil(bw_dict[(numsges, msg_size)] / num_threads)) if getattr(tc.iterators, 'port_flap', None) == 'true' and \ hasattr(tc.iterators, 'duration'): port_flap = True tc.client_bkg = True if getattr(tc.iterators, 'tcpdump', None) == 'yes' and \ not hasattr(tc.iterators, 'duration'): tc.tcpdump = True iter_opt = ' -n 5 ' #============================================================== # run the cmds #============================================================== w1 = tc.w[server_idx] w2 = tc.w[client_idx] tc.cmd_descr = "Server: %s(%s) <--> Client: %s(%s)" %\ (w1.workload_name, w1.ip_address, w2.workload_name, w2.ip_address) api.Logger.info("Starting %s test from %s" % (tc.iterators.command, tc.cmd_descr)) # Enable rdma sniffer and start tcpdump on Naples Hosts if tc.tcpdump == True: for w in [w1, w2]: if not w.IsNaples(): continue tcpdump_intf = w.interface.split('.')[ 0] # Get the parent interface tcpdump_cmd = "sudo tcpdump -l --immediate-mode -i {} -XXX udp dst port 4791 -w rdma_capture.pcap &".format( tcpdump_intf) if tc.os == host.OS_TYPE_BSD: sniffer_cmd = 'sysctl dev.' + host.GetNaplesSysctl( w.interface) + '.rdma_sniffer=1' elif tc.os == host.OS_TYPE_LINUX: sniffer_cmd = 'sudo ethtool --set-priv-flags ' + tcpdump_intf + ' rdma-sniffer on' else: continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, sniffer_cmd) api.Trigger_AddCommand(req, w.node_name, w.workload_name, tcpdump_cmd, background=True) if enable_dcqcn == True: for w in [w1, w2]: if not w.IsNaples(): continue if tc.os == host.OS_TYPE_BSD: cmd = 'sysctl sys.class.infiniband.' + host.GetNaplesSysClassSysctl( w.interface) + '.dcqcn.match_default="1"' elif tc.os == host.OS_TYPE_LINUX: cmd = 'echo 1 > /sys/class/infiniband/' + host.GetNaplesSysClassSysctl( w.interface) + '/dcqcn/match_default' else: continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, cmd, timeout=120) #============================================================== # cmd for server #============================================================== for p in range(s_port, e_port): port_opt = ' -p {} '.format(p) dev_opt = ' -d {} '.format(tc.devices[server_idx]) gid_opt = ' -x {} '.format(tc.gid[server_idx]) cmd = tc.iterators.command cmd += dev_opt + iter_opt + gid_opt cmd += size_opt + mtu_opt + qp_opt cmd += cm_opt + transport_opt + misc_opt + port_opt + bidir_opt + rxdepth_opt + txdepth_opt + atomic_opt + bw_opt # add numsges_opt only for Naples if w1.IsNaples(): cmd += numsges_opt api.Trigger_AddCommand(req, w1.node_name, w1.workload_name, tc.ib_prefix[server_idx] + cmd, background=True, timeout=120) # On Naples-Mellanox setups, with Mellanox as server, it takes a few seconds before the server # starts listening. So sleep for a few seconds before trying to start the client cmd = 'sleep 2' api.Trigger_AddCommand(req, w2.node_name, w2.workload_name, cmd) #============================================================== # cmd for client #============================================================== for p in range(s_port, e_port): port_opt = ' -p {} '.format(p) dev_opt = ' -d {} '.format(tc.devices[client_idx]) gid_opt = ' -x {} '.format(tc.gid[client_idx]) cmd = tc.iterators.command cmd += dev_opt + iter_opt + gid_opt cmd += size_opt + mtu_opt + qp_opt cmd += cm_opt + transport_opt + misc_opt + port_opt + bidir_opt + rxdepth_opt + txdepth_opt + atomic_opt # add numsges_opt only for Naples if w2.IsNaples(): cmd += numsges_opt + sq_drain_opt + async_event_stats_opt # append server's ip_address cmd += w1.ip_address api.Trigger_AddCommand( req, w2.node_name, w2.workload_name, tc.ib_prefix[client_idx] + cmd, background=tc.client_bkg, timeout=125) #5 secs more than def test timeout=120 #Do the port flap only for duration tests if hasattr(tc.iterators, 'duration') and port_flap == True: num_flaps = int(getattr(tc.iterators, 'duration')) // 20 num_flaps = num_flaps - 2 #Reduce the number of flaps so that we don't flap during connection close export_path_cmd = "export PATH=$PATH:/platform/bin:/nic/bin:/platform/tools:/nic/tools" export_ld_path_cmd = "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/platform/lib:/nic/lib" port_down_cmd = "/nic/bin/halctl debug port --port 1 --admin-state down" port_up_cmd = "/nic/bin/halctl debug port --port 1 --admin-state up" #Sleep for 10 to make sure that we don't flap during connection create cmd = 'sleep 10' api.Trigger_AddCommand(req, w1.node_name, w1.workload_name, cmd, timeout=20) for i in range(num_flaps): api.Trigger_AddNaplesCommand(req, w1.node_name, export_path_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, export_path_cmd) api.Trigger_AddNaplesCommand(req, w1.node_name, export_ld_path_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, export_ld_path_cmd) api.Trigger_AddNaplesCommand(req, w1.node_name, port_down_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, port_down_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, "sleep 1") api.Trigger_AddNaplesCommand(req, w1.node_name, port_up_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, port_up_cmd) api.Trigger_AddNaplesCommand(req, w2.node_name, "sleep 20") #Sleep to let the tests complete before Terminating cmd = 'sleep 30' api.Trigger_AddCommand(req, w1.node_name, w1.workload_name, cmd, timeout=40) if tc.client_bkg and port_flap == False: # since the client is running in the background, sleep for 30 secs # to allow the test to complete before verifying the result # override default timeout to 35, slightly above the sleep duration 30 secs cmd = 'sleep ' + str(bkg_timeout) api.Trigger_AddCommand(req, w1.node_name, w1.workload_name, cmd, timeout=(bkg_timeout + 5)) # try to kill lingering processes for w in [w1, w2]: if not w.IsNaples(): continue cmd = 'killall ' + tc.iterators.command api.Trigger_AddCommand(req, w.node_name, w.workload_name, cmd, timeout=(bkg_timeout + 5)) # print the next_qpid for w in [w1, w2]: if not w.IsNaples(): continue if tc.os == host.OS_TYPE_BSD: cmd = 'sysctl dev.' + host.GetNaplesSysctl( w.interface) + '.rdma.info.next_qpid' elif tc.os == host.OS_TYPE_LINUX: pci = host.GetNaplesPci(w.node_name, w.interface) if pci is None: continue cmd = 'grep next_qpid /sys/kernel/debug/ionic/' + pci + '/lif0/rdma/info' else: continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, cmd, timeout=(bkg_timeout + 5)) if tc.tcpdump == True: api.Trigger_AddCommand(req, w1.node_name, w1.workload_name, "sleep 5") tshark_cmd = "sudo tshark -r rdma_capture.pcap -T fields -e ip.addr -e infiniband.bth.opcode -e infiniband.aeth.msn" for w in [w1, w2]: if not w.IsNaples(): continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, "sudo killall tcpdump") api.Trigger_AddCommand(req, w.node_name, w.workload_name, tshark_cmd, timeout=60) #if dcqcn was enabled, disable it at the end of the test if enable_dcqcn == True: for w in [w1, w2]: if not w.IsNaples(): continue if tc.os == host.OS_TYPE_BSD: cmd = 'sysctl sys.class.infiniband.' + host.GetNaplesSysClassSysctl( w.interface) + '.dcqcn.match_default="0"' elif tc.os == host.OS_TYPE_LINUX: cmd = 'echo 0 > /sys/class/infiniband/' + host.GetNaplesSysClassSysctl( w.interface) + '/dcqcn/match_default' else: continue api.Trigger_AddCommand(req, w.node_name, w.workload_name, cmd, timeout=120) #============================================================== # trigger the request #============================================================== trig_resp = api.Trigger(req) term_resp = api.Trigger_TerminateAllCommands(trig_resp) tc.resp = api.Trigger_AggregateCommandsResponse(trig_resp, term_resp) return api.types.status.SUCCESS