def _collect_results(r_exec, s_execs, out_dir, singlesrv_cfg): """Collects the output on the sender and receiver machines. Args: r_exec: The receiver executor session. s_execs: The sender executor sessions. out_dir: Where to put the data. singlesrv_cfg: Single server mode config params. """ singlesrv_mode = singlesrv_cfg['enabled'] singlesrv_local = singlesrv_cfg['local'] use_rootns = not singlesrv_mode # We ignore errors in the pull to make sure we collect any existing # results from the experiment. The partial results are useful in # debugging transperf and the experiment. # Also, since this is the last stage of the experiment, ignoring errors # does not have adverse affects. procs = [] for target in [r_exec] + s_execs: procs.append( target.pull_bg(transperf.path.get_exp_out_dir(), out_dir, use_rootns=use_rootns)) if singlesrv_mode: if singlesrv_local: orch_log = os.path.join(_root_log_dir(), 'orch.log') procs.append(r_exec.pull_bg(orch_log, out_dir)) else: # We'll need orch.log and exp.info for each experiment. # Since there are multiple experiments, pull the entire tree. procs.append( r_exec.pull_bg(os.path.join(singlesrv_cfg['out_dir'], '__out'), out_dir)) for p in procs: shell.wait(p) procs = [] if not singlesrv_mode: # Only cleanup if not in single server mode. In single server mode, we # may want to go back and look at outputs for a specific run. for target in [r_exec] + s_execs: cleanup_cmd = '{sudo} rm -rf %(out)s/*' % { 'out': transperf.path.get_exp_out_dir() } procs.append(target.bg(cleanup_cmd, use_rootns=use_rootns)) for p in procs: shell.wait(p)
def _merge_pcaps(exp_dir): """Merges all the pcaps in the experiment directory.""" pcaps = {} for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): if d == exp_dir: continue if f not in pcaps: pcaps[f] = [] pcaps[f].append(os.path.join(d, f)) procs = [] for f in pcaps: procs.append( shell.bg('mergecap -F pcap -w %s %s' % (os.path.join(exp_dir, 'all.' + f), ' '.join(pcaps[f])))) for p in procs: shell.wait(p)
def pull(self, srcs, dst, dst_is_dir=True, ignore_errors=False): """Fetches a file or a directory from the remote host using rsync. Args: srcs: The source path(s). Note: src should not include the conn_string. dst: The destination path. dst_is_dir: We are specifying a directory to copy into, not a file. ignore_errors: do not throw an IOError if there was an error in rsync. Raises: IOError: if rsync fails to copy the file/directory. """ _, err, returncode = shell.wait(self.pull_bg(srcs, dst, dst_is_dir)) if err and returncode != 0 and not ignore_errors: raise IOError(err)
def _start_servers(r_exec, s_execs, rport, sport, sslog_interval, ifacecfg_rel, singlesrv_cfg, ip_mode, save_pcap, save_kern_debug, out_dir): """Starts servers on the receiver and on the sender machines. Args: r_exec: The receiver executor session. s_execs: The sender executor sessions. rport: The port used by the receiver. sport: The port used by the senders. sslog_interval: The time interval in seconds to sample ss log. ifacecfg_rel: The name of the staged per-node interface config or None. singlesrv_cfg: Single server mode config params. ip_mode: Whether we are using ipv4 or ipv6. save_pcap: Whether we save pcaps or not. save_kern_debug: Whether we scrape/save kernel debug info or not. out_dir: Output directory for experiment. Raises: RuntimeError: When encountered a critical error. """ singlesrv_mode = singlesrv_cfg['enabled'] singlesrv_local = singlesrv_cfg['local'] use_rootns = not singlesrv_mode ifacecfg_params = [] if ifacecfg_rel is not None: node_ifacecfg_dir = os.path.join(transperf.path.get_transperf_home(), transperf.path.IFACE_CFG_DIR) ifacecfg = os.path.join(node_ifacecfg_dir, ifacecfg_rel) ifacecfg_params = ['--ifacecfg', ifacecfg] env_vars = ([] if not singlesrv_mode else [(transperf.path.TRANSPERF_CONTAINER_ROOT_KEY, r_exec.get_container_root_dir())]) recv_params = [env_vars] recv_params.extend([ 'recv.py', '-v', '-p', rport, '-n', r_exec.host(), '--ip_mode', str(ip_mode), '-s' if singlesrv_mode else '', ]) if singlesrv_mode: recv_params.extend(['--hosts', singlesrv_cfg['hosts']]) recv_params.extend(ifacecfg_params) recv_log = os.path.join( transperf.path.get_exp_out_dir(r_exec.get_container_root_dir()), 'receiver.log') recv_params.append('>%s 2>&1' % recv_log) rproc = r_exec.bg(shell.py_cmd(*recv_params), use_rootns=use_rootns) if rproc.poll(): _, err, returncode = shell.wait(rproc) raise RuntimeError('cannot start receiver: %d: %s' % (returncode, err)) sprocs = [] for s_exec in s_execs: env_vars = ([] if not singlesrv_mode else [(transperf.path.TRANSPERF_CONTAINER_ROOT_KEY, s_exec.get_container_root_dir())]) send_params = [env_vars] send_params.extend([ 'send.py', '-v', '-p', sport, '-n', s_exec.host(), '--ip_mode', str(ip_mode), '-s' if singlesrv_mode else '', '' if save_pcap else '--no_pcap', '' if save_kern_debug else '--no_kern_debug', ]) if singlesrv_mode: send_params.extend(['--hosts', singlesrv_cfg['hosts']]) send_params.extend(ifacecfg_params) send_log = os.path.join( transperf.path.get_exp_out_dir(s_exec.get_container_root_dir()), 'sender.%s.log' % s_exec.host()) send_params.append('>%s 2>&1' % send_log) sproc = s_exec.bg(shell.py_cmd(*send_params), use_rootns=use_rootns) if sproc.poll(): raise RuntimeError('cannot start sender: %s' % (err)) sprocs.append(sproc) # Sleep for 500ms second for each machine and let the receiver and # senders start. grace_period = 0.5 * (len(s_execs) + 1) LOG.debug('sleeping for %s seconds', grace_period) time.sleep(grace_period) r_addr = '%s:%d' % (r_exec.host(), rport) s_addrs = ['%s:%d' % (s_exec.host(), sport) for s_exec in s_execs] env_vars = [] orch_params = [env_vars] orch_out_dir = (singlesrv_cfg['out_dir'] if singlesrv_mode and not singlesrv_local else out_dir) orch_log_dir = (os.path.join(orch_out_dir, '__out') if singlesrv_mode and not singlesrv_local else _root_log_dir()) r_exec.run('mkdir -p {orch_dir}'.format(orch_dir=orch_out_dir)) r_exec.run('mkdir -p {orch_log_dir}'.format(orch_log_dir=orch_log_dir)) orch_params.extend([ 'orch.py', '-v', '-r', r_addr, '-s', ','.join(s_addrs), '--ip_mode', str(ip_mode), '--sslog_interval', str(sslog_interval), '--out_dir', orch_out_dir ]) if singlesrv_mode: orch_params.extend(['--hosts', singlesrv_cfg['hosts']]) orch_log = os.path.join(orch_log_dir, 'orch.log') orch_params.append('>%s 2>&1' % orch_log) orch_stdout, err, returncode = r_exec.run(shell.py_cmd(*orch_params)) LOG.debug('Orch stdout: [%s]', orch_stdout) LOG.debug('Orch err: [%s]', err) LOG.debug('Orch code: %s', returncode) LOG.debug('terminating recv proc') shell.terminate(rproc) for sp in sprocs: LOG.debug('terminating send proc') s_out, s_err, s_ret = shell.terminate(sp) LOG.info('Send_ret:[%s]\nSend_out: [%s]\nSend_err: [%s]\n', s_ret, s_out, s_err) if err and returncode != 0: raise RuntimeError(err)
def _init_servers(r_exec, s_execs, binary_dirs, out_dir, sync, staged_src, singlesrv_cfg): """Initializes the receiver and senders. Args: r_exec: The receiver executor session. s_execs: The sender executor sessions. binary_dirs: Where to fetch binaries (e.g., tc, netperf, ...). This is a list of directories to search in. out_dir: Where to put the data. sync: Whether to sync the python files on sender and receiver. staged_src: Staged transperf source ready for transfer. singlesrv_cfg: Single server mode config params. Raises: RuntimeError: When encountered a critial error. """ # Check if single server mode. If so, we do not use the root namespaces. singlesrv_mode = singlesrv_cfg['enabled'] use_rootns = not singlesrv_mode all_targets = [r_exec] + s_execs cleanup_cmds = {} for target in all_targets: cleanup_cmds[target] = ['{sudo} pkill -f transperf'] tgt_exp_dir = transperf.path.get_exp_out_dir( target.get_container_root_dir()) cleanup_cmds[target].append( 'rm -rf {exp_dir}'.format(exp_dir=tgt_exp_dir)) cleanup_cmds[target].append( 'mkdir -p {exp_dir}'.format(exp_dir=tgt_exp_dir)) if sync: cleanup_cmds[target].append('mkdir -p ' + transperf.path.get_transperf_home( target.get_container_root_dir())) to_sync = _stage_transperf_binaries(binary_dirs, sync, cleanup_cmds, staged_src, all_targets) LOG.info('Staged files list: %s', to_sync) # Background procs are to improve initial launch time. We try to run as much # as we can in parallel. procs = [] for target in all_targets: for cmd in cleanup_cmds[target]: # When in single server mode, trying to run too many commands at # the same time intermittently fails. target.run(cmd, use_rootns=use_rootns) if not singlesrv_mode: LOG.debug('disabling containers on %s', target.addr()) # Create directory for configuration file. config_dir = os.path.join(transperf.path.get_transperf_home(), '__config') cfg_dir_make_cmd = 'rm -rf %(cfg)s && mkdir -p %(cfg)s && rm -rf %(cfg)s/*' cfg_dir_make_cmd %= {'cfg': config_dir} cfg_dir_make_cmd = '{sudo} %(cmd)s' % {'cmd': cfg_dir_make_cmd} # We push it for the receiver node and orchestrator (if single server mode). procs.append(r_exec.bg(cfg_dir_make_cmd, use_rootns=use_rootns)) if singlesrv_mode: procs.append(r_exec.bg(cfg_dir_make_cmd, use_rootns=True)) # for orch # Create directory for node interface configuration. node_ifacecfg_dir = os.path.join(transperf.path.get_transperf_home(), transperf.path.IFACE_CFG_DIR) scp_node_iface_cmd = '{sudo} mkdir -p %s' % node_ifacecfg_dir procs.append(r_exec.bg(scp_node_iface_cmd, use_rootns=use_rootns)) # NB: orch.py does not need this so no single server special case here. # We also push ifacecfg to the sender nodes; prepare directories for them. for s_exec in s_execs: procs.append(s_exec.bg(scp_node_iface_cmd, use_rootns=use_rootns)) # Wait for directory creation/cleanup to complete. for p in procs: shell.wait(p) procs = [] if sync: for target in all_targets: procs.append( target.push_bg(to_sync, transperf.path.get_transperf_home(), use_rootns=use_rootns)) # Push configs. cfg_items = glob.glob(os.path.join(out_dir, '*.py')) procs.append(r_exec.push_bg(cfg_items, config_dir, use_rootns=use_rootns)) if singlesrv_mode: procs.append(r_exec.push_bg(cfg_items, config_dir, use_rootns=True)) # Also push the interface config files if any. local_ifacecfg_dir = os.path.join(out_dir, transperf.path.IFACE_CFG_DIR) iface_cfgs = glob.glob(os.path.join(local_ifacecfg_dir, '*.py')) if iface_cfgs: procs.append( r_exec.push_bg(iface_cfgs, node_ifacecfg_dir, use_rootns=use_rootns)) # Push ifacecfg to senders too. for s_exec in s_execs: procs.append( s_exec.push_bg(iface_cfgs, node_ifacecfg_dir, use_rootns=use_rootns)) # Install data files needed for tc distributions. dist_files = glob.glob(os.path.join(out_dir, 'data', '*.dist')) if dist_files: # Special case here; tc_lib_dir might or might not be in a # node-virtualized directory, and we need to be careful which. use_rootns_dist_files = True # Default behaviour tc_lib_dir = transperf.path.tc_lib_dir() tc_lib_is_virt = False for pfx in virtsetup.Constants.VIRTUALIZED_PATHS: if os.path.commonprefix([pfx, tc_lib_dir]) == pfx: tc_lib_is_virt = True break if tc_lib_is_virt and singlesrv_mode: use_rootns_dist_files = False procs.append( r_exec.push_bg(dist_files, transperf.path.tc_lib_dir(), use_rootns=use_rootns_dist_files)) # Wait for transfers to complete. for p in procs: _, err, returncode = shell.wait(p) if err and returncode != 0: raise RuntimeError(err)
def gen_xplots(data_dir): """Generates xplots for all the experiments in the data directory.""" for _, _, _, _, exp_dir in cfgutil.exps(data_dir): xpl_paths = [] conn_info = outparser.ConnInfo([ os.path.join(d, f) for d, f in all_files(exp_dir, name='conn.info') ]) rcv_ip = outparser.RecvInfo(os.path.join(exp_dir, 'R', 'recv.info')).ip ports = conn_info.ports() all_lines = [] procs = [] for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): if d == exp_dir: continue procs.append( shell.bg('tcptrace -CRSzxy --output_dir="%s" "%s"' % (d, os.path.join(d, f)))) for p in procs: shell.wait(p) for d, f in all_files(exp_dir, regex=r'.*\.pcap$'): for xd, xf in all_files(d, regex=r'.*\.xpl$'): # Only process time sequence graphs. if xf.find('_tsg') == -1: continue xplf = open(os.path.join(xd, xf)) lines = xplf.readlines() # The first 3 lines in the xplot are for the title. # The last line is the draw command. The rest (3:-1) # is data. We save the rest in all_lines in order to # create one xplot that contains the time seqeuence # graphs for all flows. all_lines += lines[3:-1] # Parse the ip and port from the xplot's title. Note that the # addresses may be either IPv4 or IPv6. parts = lines[2].split('_==>_')[0].split(':') ip_base = ':'.join(parts[:-1]) port = int(parts[-1]) try: ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] except socket.gaierror: ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET6, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0][4][0] # If the ip and port are not from this experiment ignore this # file. if ip == rcv_ip or port not in ports: continue # Rewrite the title of the explot as: # ==> CC -- IP:PORT addr, _, cc, _, _, _, _ = conn_info.conn_info(port) lines[2] = '==>%s -- %s:%s\n' % (cc, addr, port) # Save the file. xpath = os.path.join(xd, 'out-%s.xpl' % port) xpl_paths.append(xpath) oxplf = open(xpath, 'w') oxplf.writelines(lines) oxplf.close() # Prepend the title to all_lines and append the draw command (ie, go). all_lines = (['dtime signed\n', 'title\n', '===> All flows\n'] + all_lines + ['go']) axpath = os.path.join(exp_dir, 'out-all.xpl') xpl_paths.append(axpath) axplf = open(axpath, 'w') axplf.writelines(all_lines) axplf.close() shell.run('tar -C %s -cvjf %s %s' % (exp_dir, os.path.join(exp_dir, 'xplots.tbz2'), ' '.join( [os.path.relpath(p, exp_dir) for p in xpl_paths])))
def __do_run(self, start_ts, dur, out_dir): """Runs the experiment. Args: start_ts: When to start the experiment. dur: The duration of the experiment. out_dir: The output directory. """ # We wait for 1 second in netperf to establish the control channel. dur += 1 tcpdump_procs, pcap_files = self.__launch_tcpdump() self.__truncate_log() now = calendar.timegm(datetime.datetime.utcnow().utctimetuple()) if now < start_ts: LOG.debug('sleeping for %s seconds', start_ts - now) time.sleep(start_ts - now) LOG.info('starting at %s', datetime.datetime.now()) ss_thread, ss_log_path = self.launch_ss(dur) wait = 0 live_conns = [] port = self.__first_port tasks = self.__conns + self.__cmds tasks.sort(key=lambda t: t.start) for t in tasks: if t.start > wait: delta = t.start - wait # TODO(soheil): This may drift. Use an absolute TS instead? LOG.info('sleeping til the next connection for %s seconds', delta) time.sleep(delta) wait += delta if isinstance(t, transperf.Conn): LOG.info('starting connection %s', t) n = t.num while n: # Make sure the duration of netperf is always 1+ seconds. cmd = t.tool.sender_cmd(t, self.__recv, port, max(1, dur - wait), self.__ip_addr) LOG.info('running %s', cmd) np_proc = shell.bg(cmd) live_conns.append((t, np_proc, port, t.tool)) port += 1 n -= 1 elif isinstance(t, transperf.MachineCommand): shell.bg(t.cmd) # Wait until the end of the experiment. if wait < dur: time.sleep(dur - wait) ss_thread.join() # Collect results. LOG.info('saving results in %s', out_dir) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # Kill all the tool processes and collect their outputs. conn_infos = '' tool_stats = '' for conn, np_proc, port, tool in live_conns: out, err, _ = shell.wait(np_proc) if err: # TODO(soheil): cleanup the output directories. LOG.error('error in netperf of %s: %s', conn, err) throughput = tool.throughput(out) tool_stats += '%s of %s:\n%s\n' % (tool, conn, out) conn_infos += '%d=%s,%s,%s,%s,%s,%s,%s\n' % (port, self.__ip_addr, conn.tool.name(), conn.cc, conn.start, conn.dur, throughput, conn.params) LOG.debug('experiment successfully concluded') npf = open(os.path.join(out_dir, 'tool.out'), 'w') npf.write(tool_stats) npf.close() cif = open(os.path.join(out_dir, 'conn.info'), 'w') cif.write(conn_infos) cif.close() if ss_log_path: shutil.move(ss_log_path, out_dir) # Save tcpdump. time.sleep(1) for proc in tcpdump_procs: shell.terminate(proc) for f in pcap_files: shutil.move(f, out_dir) # Save sysctl. mod_params = '' for cc in set([c.cc for c in self.__conns]): params_dir = self.__cc_parameters_path(cc) mod_params += shell.run( 'grep . %s/*' % params_dir)[0] mod_params += '\n' modf = open(os.path.join(out_dir, 'mod.out'), 'w') modf.write(mod_params) modf.close() sysctl = shell.run('sysctl -a')[0] sysf = open(os.path.join(out_dir, 'sys.out'), 'w') sysf.write(sysctl) sysf.close() # Save kernel debug logs if commanded to do so. if self.__save_kern_debug: self.__save_kernel_debug_logs(out_dir) else: LOG.info('Not saving kernel debug log per user request.')