def __launch_tcpdump(self): """Launches the tcpdump process. We launch two processes that captures all packets on the sender's port range on all ethXX interfaces. Returns: The list of tcpdump procs and the tcpdump output paths. """ if not self.__save_pcap: LOG.info('Not saving pcap info per user request.') return ([], []) # tcp port 1 or tcp port 2 ... ports = ['port %d' % port for port in self.__ports()] ports = ' or '.join(ports) ifaces = self.get_all_ifaces() out_paths = [] procs = [] for iface in ifaces: out_path = os.path.join(path.get_tmp_dir(), iface + '.pcap') tcpdump_cmd = 'tcpdump -s128 -w %s -i %s "host %s and (%s)"' % ( out_path, iface, self.__recv, ports) procs.append(shell.bg(tcpdump_cmd)) out_paths.append(out_path) return (procs, out_paths)
def _merge_pcaps(exp_dir): """Merges all the pcaps in the experiment directory.""" pcaps = {} for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): if d == exp_dir: continue if f not in pcaps: pcaps[f] = [] pcaps[f].append(os.path.join(d, f)) procs = [] for f in pcaps: procs.append( shell.bg('mergecap -F pcap -w %s %s' % (os.path.join(exp_dir, 'all.' + f), ' '.join(pcaps[f])))) for p in procs: shell.wait(p)
def pull_bg(self, srcs, dst, dst_is_dir=True, use_rootns=True): """Fetches a file or a directory from remote using rsync in background. Args: srcs: The (remote) source path(s). Note: src should not include the conn_string. dst: The destination path. dst_is_dir: We are specifying a directory to copy into, not a file. use_rootns: Whether we should execute this command in the root namespace of the physical server pointed to by this Ssh object (see self.addr()) or in the context of the optionally specified virtual node (see self.host()). Returns: The process. Raises: RuntimeError: if we have multiple sources to copy but are writing to a file and not a directory. """ # Arguments: # -r: Recursive copy # -L: Follow symlinks and copy underlying files # -p: Preserve permissions. # -t: Preserve modification time. # -u: Only copy over files that are newer than local copies. This fixes # a bug where stale data from a previous run can overwrite fresh # data from a new run. args = ['-rLptu'] srcs = make_iterable(srcs) if len(srcs) > 1 and not dst_is_dir: raise RuntimeError('Destination %s:%s must be dir, >1 srcs!: %s' % (self.conn_string, dst, srcs)) if not use_rootns: srcs = [self.__containerize_path(src) for src in srcs] # Prefix for source node if we're operating remotely. src_pfx = self.conn_string + ':' if self.conn_string else '' if len(srcs) == 1: abs_srcs = src_pfx + srcs[0] else: abs_srcs = ' '.join(['%s%s' % (src_pfx, src) for src in srcs]) dst = os.path.normpath(dst) if dst_is_dir: dst += os.path.sep return shell.bg(' '.join(['rsync'] + args + [abs_srcs, dst]))
def push_bg(self, srcs, dst, dst_is_dir=True, use_rootns=True): """Copies a file or a directory to remote using rsync in background. Args: srcs: The source path(s). dst: The (remote) destination path. Note: dst should not include the conn_string. dst_is_dir: We are specifying a directory to copy into, not a file. use_rootns: Whether we should execute this command in the root namespace of the physical server pointed to by this Ssh object (see self.addr()) or in the context of the optionally specified virtual node (see self.host()). Returns: The process. Raises: RuntimeError: if we have multiple sources to copy but are writing to a file and not a directory. """ # Arguments: # -r: Recursive copy # -L: Follow symlinks and copy underlying files # -p: Preserve permissions. # -t: Preserve modification time. # Note that -u is not present; when copying files over for remote # machines to use, we unconditionally copy what we have. opts = ['-rLpt'] assert srcs is not None srcs = make_iterable(srcs) assert srcs if len(srcs) > 1 and not dst_is_dir: raise RuntimeError('Destination %s:%s must be dir, >1 srcs!: %s' % (self.conn_string, dst, srcs)) dst = os.path.normpath(dst) if not use_rootns: dst = self.__containerize_path(dst) # Append rsync+ssh target if appropriate. abs_dst = dst if self.conn_string: abs_dst = '%s:%s' % (self.conn_string, dst) # Rsync requires a trailing slash to treat the target as a directory. if dst_is_dir: abs_dst += os.path.sep return shell.bg(' '.join(['rsync'] + opts + srcs + [abs_dst]))
def __do_run(self, tools, start_ts, dur, nsenders, out_dir): """Runs the experiment.""" self.__servers = [] till_start_sec = start_ts - calendar.timegm( datetime.datetime.utcnow().utctimetuple()) # Build a set of unique tools and their associated ports. tool_to_ports = {} for tool, port in zip(tools, self.__port_to_addr.keys()): existing = tool_to_ports.setdefault(tool, []) existing.append((port, self.__port_to_addr[port])) # Have each tool add receiver commands to support the senders. for tool, ports in tool_to_ports.iteritems(): toolobj = transperf.TOOLS[tool] toolobj.options_dict['ip_mode'] = ( '-6' if self.__ip_mode == socket.AF_INET6 else '-4') for cmd in transperf.TOOLS[tool].receiver_cmds(ports, till_start_sec): proc = shell.bg(cmd) self.__servers.append(proc) if proc.poll(): raise RuntimeError('cannot run ' + cmd) if not self.__servers: raise RuntimeError('no server to run') LOG.debug('creating commands') if self.__qdisc_noop(nsenders): # If there is no RTT, BW, nor Policer, don't install any qdisc. cmds = [] else: # Setup root qdiscs. for iface in self.get_all_ifaces(): # Skip setting up eth0 and ifb0, if bandwidth is noop. if iface == self.get_bond_iface() and self.__bw_qdisc_noop(): continue iface_ifb = self.get_ifb_for_iface(iface) _, err, _ = shell.run(''' %(tc)s qdisc replace dev %(iface)s handle 1: root htb %(tc)s qdisc replace dev %(iface)s handle ffff: ingress %(tc)s class replace dev %(iface)s parent 1: classid 1:1 \ htb rate 100Gbit ''' % { 'tc': path.tc(), 'iface': iface, }) # Some tc versions print 'Success' to stderr. if any(l and l != 'RTNETLINK answers: Success' for l in err.split('\n')): raise RuntimeError('Error in setting up %s: %s' % (iface, err)) _, err, _ = shell.run(''' %(tc)s qdisc replace dev %(iface)s handle 1: root htb %(tc)s class replace dev %(iface)s parent 1: classid 1:1 \ htb rate 100Gbit ''' % { 'tc': path.tc(), 'iface': iface_ifb, }) if any(l and l != 'RTNETLINK answers: Success' for l in err.split('\n')): raise RuntimeError('Error setting up %s: %s' % (iface_ifb, err)) # We generate commands and their wait time before starting the loop. cmds = self.__cmds cmds += self.__bw_cmds() cmds += self.__rtt_cmds(nsenders) cmds += self.__filter_cmds(nsenders) cmds += self.__policer_cmds() cmds.sort(key=lambda c: c[1]) for cmd in cmds: LOG.debug('at %s will run %s', cmd[1], cmd[0]) cmds_at_zero = [cmd for cmd in cmds if not cmd[1]] cmds_rest = [cmd for cmd in cmds if cmd[1]] # Run all the commands that should be run at 0. for cmd in cmds_at_zero: shell.run(cmd[0]) now = calendar.timegm(datetime.datetime.utcnow().utctimetuple()) sdur = start_ts - now LOG.debug('sleeping for %s seconds', sdur) if start_ts > now: time.sleep(start_ts - now) now = 0.0 # Run the commands that has a later deadline. for cmd in cmds_rest: if cmd[1] < now: LOG.warning('command %s is ran after its deadline', cmd) if cmd[1] > now: LOG.debug('sleeping from %s til %s', now, cmd[1]) time.sleep(cmd[1] - now) now = cmd[1] shell.run(cmd[0]) end_time = datetime.datetime.utcnow().utctimetuple() delta = calendar.timegm(end_time) - start_ts if delta < dur: time.sleep(dur - delta) LOG.info('saving qdisc state in %s', out_dir) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # Save qdisc stats. tcs = '\n'.join([shell.run(path.tc() + ' -d -s -p qdisc show')[0], shell.run(path.tc() + ' -d -s -p class show')[0], shell.run(path.tc() + ' -d -s -p class show')[0], shell.run(path.tc() + ' -d -s -p filter show')[0], shell.run(path.tc() + ' -d -s -p filter show')[0]]) tcf = open(os.path.join(out_dir, 'tc.out'), 'w') tcf.write(tcs) tcf.close() hostname = socket.gethostname() if self.__singlesrv_mode: assert hostname in self.__ip_map if hostname in self.__ip_map: rcv_ip = self.__ip_map[hostname] else: rcv_ip = socket.getaddrinfo(hostname, 0, self.__ip_mode, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] ipf = open(os.path.join(out_dir, 'recv.info'), 'w') ipf.write(rcv_ip) ipf.close()
def gen_transperf_pages(data_dir, has_xplot=False, open_page=True, skip_pcap_scan=False): """Generate transperf pages for all the experiements in the data directory. Args: data_dir: The path to the data directory. has_xplot: Whether we have generated xplots. open_page: Whether to launch the browser at the end. skip_pcap_scan: Whether to skip pcap scan. Returns: 1 if test case errors are present, or 0. """ html = ''' <table> <thead> <tr> <th>#</th> <th>Connections</th> <th>RTT (ms)</th> <th>BW (Mbps)</th> <th>Buf (pkts)</th> <th>Slot</th> <th>Policer (Mbps)</th> <th>ILoss (%)</th> <th>OLoss (%)</th> <th>Dur (sec)</th> <th>Tputs (Mpbs)</th> <th>Retx</th> <th>p95 RTT (ms)</th> <th>Med RTT (ms)</th> <th>Lock on BW (Mbps)</th> <th>Status</th> <th>Links</th> </tr> </thead> <tbody>''' param_cols = [('conn', ''), ('rtt', ''), ('bw', ''), ('buf', 'pkts'), ('slot', ''), ('policer', ''), ('loss', '%'), ('out_loss', '%'), ('dur', 'sec')] metric_cols = [ ('tool_tputs', 'Mbps', '%s', 1.0), # throughput from netperf or similar ('retx', '', '%.2f%%', 100.0), # convert retx fraction to percent ('p95_rtt', 'ms', '%s', 1.0), ('med_rtt', 'ms', '%s', 1.0), ('lock_on_bw', 'Mbps', '%s', 1.0), ] exps = cfgutil.exps(data_dir) has_error = 0 for i, (exp, cfg_dir, cfg_file, exp_name, exp_dir) in enumerate(exps): metrics, errs = gen_exp(exp, exp_dir, has_xplot, skip_pcap_scan) if errs: has_error = 1 if open_page: shell.bg('x-www-browser %s/index.html' % exp_dir) exp_info = _exp_info(exp_dir) fields = exp_info.fields() esc_dir = urllib.quote(os.path.join('__out', cfg_dir, exp_name)) html += '<tr>' html += '<td>%s</td>' % (i + 1) for name, unit in param_cols: v = fields[name] html += '<td>%s %s</td>' % (v, unit) for name, unit, fmt, mul in metric_cols: v = ', '.join([(fmt % (m * mul)) for m in metrics[name].as_array()]) html += '<td>%s %s</td>' % (v, unit) html += '<td>' if not errs: html += '<span class="info">PASSED</span>' else: html += '<span class="error">FAILED</span><br>' html += '<br>'.join(errs) html += '</td>' html += '<td>' html += ('' '<a href="%(dir)s/index.html">dashboard</a><br>' '<a href="%(dir)s/timeseq.html">time seq</a><br>' '<a href="%(dir)s/util.html">utilization</a><br>' '<a href="%(dir)s/klog.html">klog graphs</a><br>' '<a href="%(dir)s/all.eth1.pcap">pcap</a><br>' '<a href="%(dir)s/metrics">metrics</a><br>' '<a href="%(dir)s/sys.out">sys params</a><br>' '<a href="%(cfg)s">config file</a><br>') % { 'dir': esc_dir, 'cfg': cfg_file, } if has_xplot: html += '<a href="%s/xplots.tbz2">xplots</a><br>' % esc_dir html += '</td></tr>' html += '</tbody></table>' inf = open(os.path.join(data_dir, 'index.html'), 'w') inf.write(templates.INDEX % { 'title': 'experiments', 'exps': html, }) inf.close() return has_error
def gen_xplots(data_dir): """Generates xplots for all the experiments in the data directory.""" for _, _, _, _, exp_dir in cfgutil.exps(data_dir): xpl_paths = [] conn_info = outparser.ConnInfo([ os.path.join(d, f) for d, f in all_files(exp_dir, name='conn.info') ]) rcv_ip = outparser.RecvInfo(os.path.join(exp_dir, 'R', 'recv.info')).ip ports = conn_info.ports() all_lines = [] procs = [] for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): if d == exp_dir: continue procs.append( shell.bg('tcptrace -CRSzxy --output_dir="%s" "%s"' % (d, os.path.join(d, f)))) for p in procs: shell.wait(p) for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): for xd, xf in all_files(d, regex=r'.*\.xpl$'): # Only process time sequence graphs. if xf.find('_tsg') == -1: continue xplf = open(os.path.join(xd, xf)) lines = xplf.readlines() # The first 3 lines in the xplot are for the title. # The last line is the draw command. The rest (3:-1) # is data. We save the rest in all_lines in order to # create one xplot that contains the time seqeuence # graphs for all flows. all_lines += lines[3:-1] # Parse the ip and port from the xplot's title. Note that the # addresses may be either IPv4 or IPv6. parts = lines[2].split('_==>_')[0].split(':') ip_base = ':'.join(parts[:-1]) port = int(parts[-1]) try: ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] except socket.gaierror: ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET6, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] # If the ip and port are not from this experiment ignore this # file. if ip == rcv_ip or port not in ports: continue # Rewrite the title of the explot as: # ==> CC -- IP:PORT addr, _, cc, _, _, _, _ = conn_info.conn_info(port) lines[2] = '==>%s -- %s:%s\n' % (cc, addr, port) # Save the file. xpath = os.path.join(xd, 'out-%s.xpl' % port) xpl_paths.append(xpath) oxplf = open(xpath, 'w') oxplf.writelines(lines) oxplf.close() # Prepend the title to all_lines and append the draw command (ie, go). all_lines = (['dtime signed\n', 'title\n', '===> All flows\n'] + all_lines + ['go']) axpath = os.path.join(exp_dir, 'out-all.xpl') xpl_paths.append(axpath) axplf = open(axpath, 'w') axplf.writelines(all_lines) axplf.close() shell.run('tar -C %s -cvjf %s %s' % (exp_dir, os.path.join(exp_dir, 'xplots.tbz2'), ' '.join( [os.path.relpath(p, exp_dir) for p in xpl_paths])))
def __do_run(self, start_ts, dur, out_dir): """Runs the experiment. Args: start_ts: When to start the experiment. dur: The duration of the experiment. out_dir: The output directory. """ # We wait for 1 second in netperf to establish the control channel. dur += 1 tcpdump_procs, pcap_files = self.__launch_tcpdump() self.__truncate_log() now = calendar.timegm(datetime.datetime.utcnow().utctimetuple()) if now < start_ts: LOG.debug('sleeping for %s seconds', start_ts - now) time.sleep(start_ts - now) LOG.info('starting at %s', datetime.datetime.now()) ss_thread, ss_log_path = self.launch_ss(dur) wait = 0 live_conns = [] port = self.__first_port tasks = self.__conns + self.__cmds tasks.sort(key=lambda t: t.start) for t in tasks: if t.start > wait: delta = t.start - wait # TODO(soheil): This may drift. Use an absolute TS instead? LOG.info('sleeping til the next connection for %s seconds', delta) time.sleep(delta) wait += delta if isinstance(t, transperf.Conn): LOG.info('starting connection %s', t) n = t.num while n: # Make sure the duration of netperf is always 1+ seconds. cmd = t.tool.sender_cmd(t, self.__recv, port, max(1, dur - wait), self.__ip_addr) LOG.info('running %s', cmd) np_proc = shell.bg(cmd) live_conns.append((t, np_proc, port, t.tool)) port += 1 n -= 1 elif isinstance(t, transperf.MachineCommand): shell.bg(t.cmd) # Wait until the end of the experiment. if wait < dur: time.sleep(dur - wait) ss_thread.join() # Collect results. LOG.info('saving results in %s', out_dir) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # Kill all the tool processes and collect their outputs. conn_infos = '' tool_stats = '' for conn, np_proc, port, tool in live_conns: out, err, _ = shell.wait(np_proc) if err: # TODO(soheil): cleanup the output directories. LOG.error('error in netperf of %s: %s', conn, err) throughput = tool.throughput(out) tool_stats += '%s of %s:\n%s\n' % (tool, conn, out) conn_infos += '%d=%s,%s,%s,%s,%s,%s,%s\n' % (port, self.__ip_addr, conn.tool.name(), conn.cc, conn.start, conn.dur, throughput, conn.params) LOG.debug('experiment successfully concluded') npf = open(os.path.join(out_dir, 'tool.out'), 'w') npf.write(tool_stats) npf.close() cif = open(os.path.join(out_dir, 'conn.info'), 'w') cif.write(conn_infos) cif.close() if ss_log_path: shutil.move(ss_log_path, out_dir) # Save tcpdump. time.sleep(1) for proc in tcpdump_procs: shell.terminate(proc) for f in pcap_files: shutil.move(f, out_dir) # Save sysctl. mod_params = '' for cc in set([c.cc for c in self.__conns]): params_dir = self.__cc_parameters_path(cc) mod_params += shell.run( 'grep . %s/*' % params_dir)[0] mod_params += '\n' modf = open(os.path.join(out_dir, 'mod.out'), 'w') modf.write(mod_params) modf.close() sysctl = shell.run('sysctl -a')[0] sysf = open(os.path.join(out_dir, 'sys.out'), 'w') sysf.write(sysctl) sysf.close() # Save kernel debug logs if commanded to do so. if self.__save_kern_debug: self.__save_kernel_debug_logs(out_dir) else: LOG.info('Not saving kernel debug log per user request.')
def bg(self, cmd, use_rootns=True): """Runs a command in background (without blocking).""" return shell.bg(self._build_executor_cmd(cmd, use_rootns))