Beispiel #1
0
def _collect_results(r_exec, s_execs, out_dir, singlesrv_cfg):
    """Collects the output on the sender and receiver machines.

    Args:
        r_exec: The receiver executor session.
        s_execs: The sender executor sessions.
        out_dir: Where to put the data.
        singlesrv_cfg: Single server mode config params.
    """
    singlesrv_mode = singlesrv_cfg['enabled']
    singlesrv_local = singlesrv_cfg['local']
    use_rootns = not singlesrv_mode

    # We ignore errors in the pull to make sure we collect any existing
    # results from the experiment. The partial results are useful in
    # debugging transperf and the experiment.
    # Also, since this is the last stage of the experiment, ignoring errors
    # does not have adverse affects.
    procs = []
    for target in [r_exec] + s_execs:
        procs.append(
            target.pull_bg(transperf.path.get_exp_out_dir(),
                           out_dir,
                           use_rootns=use_rootns))
    if singlesrv_mode:
        if singlesrv_local:
            orch_log = os.path.join(_root_log_dir(), 'orch.log')
            procs.append(r_exec.pull_bg(orch_log, out_dir))
        else:
            # We'll need orch.log and exp.info for each experiment.
            # Since there are multiple experiments, pull the entire tree.
            procs.append(
                r_exec.pull_bg(os.path.join(singlesrv_cfg['out_dir'], '__out'),
                               out_dir))
    for p in procs:
        shell.wait(p)

    procs = []
    if not singlesrv_mode:
        # Only cleanup if not in single server mode. In single server mode, we
        # may want to go back and look at outputs for a specific run.
        for target in [r_exec] + s_execs:
            cleanup_cmd = '{sudo} rm -rf %(out)s/*' % {
                'out': transperf.path.get_exp_out_dir()
            }
            procs.append(target.bg(cleanup_cmd, use_rootns=use_rootns))
    for p in procs:
        shell.wait(p)
Beispiel #2
0
def _merge_pcaps(exp_dir):
    """Merges all the pcaps in the experiment directory."""
    pcaps = {}
    for d, f in all_files(exp_dir, regex=r'.*\.pcap$'):
        if d == exp_dir:
            continue
        if f not in pcaps:
            pcaps[f] = []
        pcaps[f].append(os.path.join(d, f))
    procs = []
    for f in pcaps:
        procs.append(
            shell.bg('mergecap -F pcap -w %s %s' %
                     (os.path.join(exp_dir, 'all.' + f), ' '.join(pcaps[f]))))
    for p in procs:
        shell.wait(p)
Beispiel #3
0
    def pull(self, srcs, dst, dst_is_dir=True, ignore_errors=False):
        """Fetches a file or a directory from the remote host using rsync.

        Args:
            srcs: The source path(s). Note: src should not include the
                  conn_string.
            dst: The destination path.
            dst_is_dir: We are specifying a directory to copy into, not a file.
            ignore_errors: do not throw an IOError if there was an error in
                           rsync.

        Raises:
            IOError: if rsync fails to copy the file/directory.
        """
        _, err, returncode = shell.wait(self.pull_bg(srcs, dst, dst_is_dir))
        if err and returncode != 0 and not ignore_errors:
            raise IOError(err)
Beispiel #4
0
def _start_servers(r_exec, s_execs, rport, sport, sslog_interval, ifacecfg_rel,
                   singlesrv_cfg, ip_mode, save_pcap, save_kern_debug,
                   out_dir):
    """Starts servers on the receiver and on the sender machines.

    Args:
        r_exec: The receiver executor session.
        s_execs: The sender executor sessions.
        rport: The port used by the receiver.
        sport: The port used by the senders.
        sslog_interval: The time interval in seconds to sample ss log.
        ifacecfg_rel: The name of the staged per-node interface config or None.
        singlesrv_cfg: Single server mode config params.
        ip_mode: Whether we are using ipv4 or ipv6.
        save_pcap: Whether we save pcaps or not.
        save_kern_debug: Whether we scrape/save kernel debug info or not.
        out_dir: Output directory for experiment.

    Raises:
          RuntimeError: When encountered a critical error.
    """
    singlesrv_mode = singlesrv_cfg['enabled']
    singlesrv_local = singlesrv_cfg['local']
    use_rootns = not singlesrv_mode
    ifacecfg_params = []
    if ifacecfg_rel is not None:
        node_ifacecfg_dir = os.path.join(transperf.path.get_transperf_home(),
                                         transperf.path.IFACE_CFG_DIR)
        ifacecfg = os.path.join(node_ifacecfg_dir, ifacecfg_rel)
        ifacecfg_params = ['--ifacecfg', ifacecfg]

    env_vars = ([] if not singlesrv_mode else
                [(transperf.path.TRANSPERF_CONTAINER_ROOT_KEY,
                  r_exec.get_container_root_dir())])
    recv_params = [env_vars]
    recv_params.extend([
        'recv.py',
        '-v',
        '-p',
        rport,
        '-n',
        r_exec.host(),
        '--ip_mode',
        str(ip_mode),
        '-s' if singlesrv_mode else '',
    ])
    if singlesrv_mode:
        recv_params.extend(['--hosts', singlesrv_cfg['hosts']])
    recv_params.extend(ifacecfg_params)
    recv_log = os.path.join(
        transperf.path.get_exp_out_dir(r_exec.get_container_root_dir()),
        'receiver.log')
    recv_params.append('>%s 2>&1' % recv_log)

    rproc = r_exec.bg(shell.py_cmd(*recv_params), use_rootns=use_rootns)

    if rproc.poll():
        _, err, returncode = shell.wait(rproc)
        raise RuntimeError('cannot start receiver: %d: %s' % (returncode, err))

    sprocs = []
    for s_exec in s_execs:
        env_vars = ([] if not singlesrv_mode else
                    [(transperf.path.TRANSPERF_CONTAINER_ROOT_KEY,
                      s_exec.get_container_root_dir())])
        send_params = [env_vars]
        send_params.extend([
            'send.py',
            '-v',
            '-p',
            sport,
            '-n',
            s_exec.host(),
            '--ip_mode',
            str(ip_mode),
            '-s' if singlesrv_mode else '',
            '' if save_pcap else '--no_pcap',
            '' if save_kern_debug else '--no_kern_debug',
        ])
        if singlesrv_mode:
            send_params.extend(['--hosts', singlesrv_cfg['hosts']])
        send_params.extend(ifacecfg_params)
        send_log = os.path.join(
            transperf.path.get_exp_out_dir(s_exec.get_container_root_dir()),
            'sender.%s.log' % s_exec.host())
        send_params.append('>%s 2>&1' % send_log)
        sproc = s_exec.bg(shell.py_cmd(*send_params), use_rootns=use_rootns)

        if sproc.poll():
            raise RuntimeError('cannot start sender: %s' % (err))
        sprocs.append(sproc)

    # Sleep for 500ms second for each machine and let the receiver and
    # senders start.
    grace_period = 0.5 * (len(s_execs) + 1)
    LOG.debug('sleeping for %s seconds', grace_period)
    time.sleep(grace_period)
    r_addr = '%s:%d' % (r_exec.host(), rport)
    s_addrs = ['%s:%d' % (s_exec.host(), sport) for s_exec in s_execs]
    env_vars = []
    orch_params = [env_vars]
    orch_out_dir = (singlesrv_cfg['out_dir']
                    if singlesrv_mode and not singlesrv_local else out_dir)
    orch_log_dir = (os.path.join(orch_out_dir, '__out') if singlesrv_mode
                    and not singlesrv_local else _root_log_dir())
    r_exec.run('mkdir -p {orch_dir}'.format(orch_dir=orch_out_dir))
    r_exec.run('mkdir -p {orch_log_dir}'.format(orch_log_dir=orch_log_dir))
    orch_params.extend([
        'orch.py', '-v', '-r', r_addr, '-s', ','.join(s_addrs), '--ip_mode',
        str(ip_mode), '--sslog_interval',
        str(sslog_interval), '--out_dir', orch_out_dir
    ])
    if singlesrv_mode:
        orch_params.extend(['--hosts', singlesrv_cfg['hosts']])
    orch_log = os.path.join(orch_log_dir, 'orch.log')
    orch_params.append('>%s 2>&1' % orch_log)
    orch_stdout, err, returncode = r_exec.run(shell.py_cmd(*orch_params))
    LOG.debug('Orch stdout: [%s]', orch_stdout)
    LOG.debug('Orch err: [%s]', err)
    LOG.debug('Orch code: %s', returncode)
    LOG.debug('terminating recv proc')
    shell.terminate(rproc)
    for sp in sprocs:
        LOG.debug('terminating send proc')
        s_out, s_err, s_ret = shell.terminate(sp)
        LOG.info('Send_ret:[%s]\nSend_out: [%s]\nSend_err: [%s]\n', s_ret,
                 s_out, s_err)

    if err and returncode != 0:
        raise RuntimeError(err)
Beispiel #5
0
def _init_servers(r_exec, s_execs, binary_dirs, out_dir, sync, staged_src,
                  singlesrv_cfg):
    """Initializes the receiver and senders.

    Args:
        r_exec: The receiver executor session.
        s_execs: The sender executor sessions.
        binary_dirs: Where to fetch binaries (e.g., tc, netperf, ...). This is a
        list of directories to search in.
        out_dir: Where to put the data.
        sync: Whether to sync the python files on sender and receiver.
        staged_src: Staged transperf source ready for transfer.
        singlesrv_cfg: Single server mode config params.

    Raises:
          RuntimeError: When encountered a critial error.
    """
    # Check if single server mode. If so, we do not use the root namespaces.
    singlesrv_mode = singlesrv_cfg['enabled']
    use_rootns = not singlesrv_mode
    all_targets = [r_exec] + s_execs

    cleanup_cmds = {}
    for target in all_targets:
        cleanup_cmds[target] = ['{sudo} pkill -f transperf']
        tgt_exp_dir = transperf.path.get_exp_out_dir(
            target.get_container_root_dir())
        cleanup_cmds[target].append(
            'rm -rf {exp_dir}'.format(exp_dir=tgt_exp_dir))
        cleanup_cmds[target].append(
            'mkdir -p {exp_dir}'.format(exp_dir=tgt_exp_dir))
        if sync:
            cleanup_cmds[target].append('mkdir -p ' +
                                        transperf.path.get_transperf_home(
                                            target.get_container_root_dir()))

    to_sync = _stage_transperf_binaries(binary_dirs, sync, cleanup_cmds,
                                        staged_src, all_targets)
    LOG.info('Staged files list: %s', to_sync)

    # Background procs are to improve initial launch time. We try to run as much
    # as we can in parallel.
    procs = []
    for target in all_targets:
        for cmd in cleanup_cmds[target]:
            # When in single server mode, trying to run too many commands at
            # the same time intermittently fails.
            target.run(cmd, use_rootns=use_rootns)
        if not singlesrv_mode:
            LOG.debug('disabling containers on %s', target.addr())

    # Create directory for configuration file.
    config_dir = os.path.join(transperf.path.get_transperf_home(), '__config')
    cfg_dir_make_cmd = 'rm -rf %(cfg)s && mkdir -p %(cfg)s && rm -rf %(cfg)s/*'
    cfg_dir_make_cmd %= {'cfg': config_dir}
    cfg_dir_make_cmd = '{sudo} %(cmd)s' % {'cmd': cfg_dir_make_cmd}
    # We push it for the receiver node and orchestrator (if single server mode).
    procs.append(r_exec.bg(cfg_dir_make_cmd, use_rootns=use_rootns))
    if singlesrv_mode:
        procs.append(r_exec.bg(cfg_dir_make_cmd, use_rootns=True))  # for orch

    # Create directory for node interface configuration.
    node_ifacecfg_dir = os.path.join(transperf.path.get_transperf_home(),
                                     transperf.path.IFACE_CFG_DIR)
    scp_node_iface_cmd = '{sudo} mkdir -p %s' % node_ifacecfg_dir
    procs.append(r_exec.bg(scp_node_iface_cmd, use_rootns=use_rootns))
    # NB: orch.py does not need this so no single server special case here.

    # We also push ifacecfg to the sender nodes; prepare directories for them.
    for s_exec in s_execs:
        procs.append(s_exec.bg(scp_node_iface_cmd, use_rootns=use_rootns))

    # Wait for directory creation/cleanup to complete.
    for p in procs:
        shell.wait(p)

    procs = []

    if sync:
        for target in all_targets:
            procs.append(
                target.push_bg(to_sync,
                               transperf.path.get_transperf_home(),
                               use_rootns=use_rootns))

    # Push configs.
    cfg_items = glob.glob(os.path.join(out_dir, '*.py'))
    procs.append(r_exec.push_bg(cfg_items, config_dir, use_rootns=use_rootns))
    if singlesrv_mode:
        procs.append(r_exec.push_bg(cfg_items, config_dir, use_rootns=True))

    # Also push the interface config files if any.
    local_ifacecfg_dir = os.path.join(out_dir, transperf.path.IFACE_CFG_DIR)
    iface_cfgs = glob.glob(os.path.join(local_ifacecfg_dir, '*.py'))
    if iface_cfgs:
        procs.append(
            r_exec.push_bg(iface_cfgs,
                           node_ifacecfg_dir,
                           use_rootns=use_rootns))
        # Push ifacecfg to senders too.
        for s_exec in s_execs:
            procs.append(
                s_exec.push_bg(iface_cfgs,
                               node_ifacecfg_dir,
                               use_rootns=use_rootns))

    # Install data files needed for tc distributions.
    dist_files = glob.glob(os.path.join(out_dir, 'data', '*.dist'))
    if dist_files:
        # Special case here; tc_lib_dir might or might not be in a
        # node-virtualized directory, and we need to be careful which.
        use_rootns_dist_files = True  # Default behaviour
        tc_lib_dir = transperf.path.tc_lib_dir()
        tc_lib_is_virt = False
        for pfx in virtsetup.Constants.VIRTUALIZED_PATHS:
            if os.path.commonprefix([pfx, tc_lib_dir]) == pfx:
                tc_lib_is_virt = True
                break
        if tc_lib_is_virt and singlesrv_mode:
            use_rootns_dist_files = False
        procs.append(
            r_exec.push_bg(dist_files,
                           transperf.path.tc_lib_dir(),
                           use_rootns=use_rootns_dist_files))

    # Wait for transfers to complete.
    for p in procs:
        _, err, returncode = shell.wait(p)
        if err and returncode != 0:
            raise RuntimeError(err)
Beispiel #6
0
def gen_xplots(data_dir):
    """Generates xplots for all the experiments in the data directory."""
    for _, _, _, _, exp_dir in cfgutil.exps(data_dir):
        xpl_paths = []
        conn_info = outparser.ConnInfo([
            os.path.join(d, f) for d, f in all_files(exp_dir, name='conn.info')
        ])
        rcv_ip = outparser.RecvInfo(os.path.join(exp_dir, 'R', 'recv.info')).ip
        ports = conn_info.ports()
        all_lines = []
        procs = []
        for d, f in all_files(exp_dir, regex=r'.*\.pcap$'):
            if d == exp_dir:
                continue
            procs.append(
                shell.bg('tcptrace -CRSzxy --output_dir="%s" "%s"' %
                         (d, os.path.join(d, f))))
        for p in procs:
            shell.wait(p)

        for d, f in all_files(exp_dir, regex=r'.*\.pcap$'):
            for xd, xf in all_files(d, regex=r'.*\.xpl$'):
                # Only process time sequence graphs.
                if xf.find('_tsg') == -1:
                    continue

                xplf = open(os.path.join(xd, xf))
                lines = xplf.readlines()

                # The first 3 lines in the xplot are for the title.
                # The last line is the draw command. The rest (3:-1)
                # is data. We save the rest in all_lines in order to
                # create one xplot that contains the time seqeuence
                # graphs for all flows.
                all_lines += lines[3:-1]

                # Parse the ip and port from the xplot's title. Note that the
                # addresses may be either IPv4 or IPv6.
                parts = lines[2].split('_==>_')[0].split(':')
                ip_base = ':'.join(parts[:-1])
                port = int(parts[-1])
                try:
                    ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET,
                                            socket.SOCK_STREAM,
                                            socket.IPPROTO_TCP)[0][4][0]
                except socket.gaierror:
                    ip = socket.getaddrinfo(ip_base, 0, socket.AF_INET6,
                                            socket.SOCK_STREAM,
                                            socket.IPPROTO_TCP)[0][4][0]

                # If the ip and port are not from this experiment ignore this
                # file.
                if ip == rcv_ip or port not in ports:
                    continue

                # Rewrite the title of the explot as:
                #   ==> CC -- IP:PORT
                addr, _, cc, _, _, _, _ = conn_info.conn_info(port)
                lines[2] = '==>%s -- %s:%s\n' % (cc, addr, port)

                # Save the file.
                xpath = os.path.join(xd, 'out-%s.xpl' % port)
                xpl_paths.append(xpath)
                oxplf = open(xpath, 'w')
                oxplf.writelines(lines)
                oxplf.close()

        # Prepend the title to all_lines and append the draw command (ie, go).
        all_lines = (['dtime signed\n', 'title\n', '===> All flows\n'] +
                     all_lines + ['go'])
        axpath = os.path.join(exp_dir, 'out-all.xpl')
        xpl_paths.append(axpath)
        axplf = open(axpath, 'w')
        axplf.writelines(all_lines)
        axplf.close()

        shell.run('tar -C %s -cvjf %s %s' %
                  (exp_dir, os.path.join(exp_dir, 'xplots.tbz2'), ' '.join(
                      [os.path.relpath(p, exp_dir) for p in xpl_paths])))
Beispiel #7
0
    def __do_run(self, start_ts, dur, out_dir):
        """Runs the experiment.

        Args:
            start_ts: When to start the experiment.
            dur: The duration of the experiment.
            out_dir: The output directory.
        """
        # We wait for 1 second in netperf to establish the control channel.
        dur += 1

        tcpdump_procs, pcap_files = self.__launch_tcpdump()
        self.__truncate_log()

        now = calendar.timegm(datetime.datetime.utcnow().utctimetuple())
        if now < start_ts:
            LOG.debug('sleeping for %s seconds', start_ts - now)
            time.sleep(start_ts - now)

        LOG.info('starting at %s', datetime.datetime.now())
        ss_thread, ss_log_path = self.launch_ss(dur)
        wait = 0
        live_conns = []
        port = self.__first_port

        tasks = self.__conns + self.__cmds
        tasks.sort(key=lambda t: t.start)

        for t in tasks:
            if t.start > wait:
                delta = t.start - wait
                # TODO(soheil): This may drift. Use an absolute TS instead?
                LOG.info('sleeping til the next connection for %s seconds',
                         delta)
                time.sleep(delta)
                wait += delta

            if isinstance(t, transperf.Conn):
                LOG.info('starting connection %s', t)
                n = t.num
                while n:
                    # Make sure the duration of netperf is always 1+ seconds.
                    cmd = t.tool.sender_cmd(t, self.__recv, port,
                                            max(1, dur - wait), self.__ip_addr)
                    LOG.info('running %s', cmd)
                    np_proc = shell.bg(cmd)
                    live_conns.append((t, np_proc, port, t.tool))
                    port += 1
                    n -= 1
            elif isinstance(t, transperf.MachineCommand):
                shell.bg(t.cmd)

        # Wait until the end of the experiment.
        if wait < dur:
            time.sleep(dur - wait)
        ss_thread.join()

        # Collect results.
        LOG.info('saving results in %s', out_dir)
        if os.path.exists(out_dir):
            shutil.rmtree(out_dir)
        os.makedirs(out_dir)

        # Kill all the tool processes and collect their outputs.
        conn_infos = ''
        tool_stats = ''
        for conn, np_proc, port, tool in live_conns:
            out, err, _ = shell.wait(np_proc)
            if err:
                # TODO(soheil): cleanup the output directories.
                LOG.error('error in netperf of %s: %s', conn, err)
            throughput = tool.throughput(out)
            tool_stats += '%s of %s:\n%s\n' % (tool, conn, out)
            conn_infos += '%d=%s,%s,%s,%s,%s,%s,%s\n' % (port,
                                                         self.__ip_addr,
                                                         conn.tool.name(),
                                                         conn.cc,
                                                         conn.start,
                                                         conn.dur,
                                                         throughput,
                                                         conn.params)

        LOG.debug('experiment successfully concluded')

        npf = open(os.path.join(out_dir, 'tool.out'), 'w')
        npf.write(tool_stats)
        npf.close()

        cif = open(os.path.join(out_dir, 'conn.info'), 'w')
        cif.write(conn_infos)
        cif.close()

        if ss_log_path:
            shutil.move(ss_log_path, out_dir)

        # Save tcpdump.
        time.sleep(1)
        for proc in tcpdump_procs:
            shell.terminate(proc)
        for f in pcap_files:
            shutil.move(f, out_dir)

        # Save sysctl.
        mod_params = ''
        for cc in set([c.cc for c in self.__conns]):
            params_dir = self.__cc_parameters_path(cc)
            mod_params += shell.run(
                'grep . %s/*' % params_dir)[0]
            mod_params += '\n'
        modf = open(os.path.join(out_dir, 'mod.out'), 'w')
        modf.write(mod_params)
        modf.close()

        sysctl = shell.run('sysctl -a')[0]
        sysf = open(os.path.join(out_dir, 'sys.out'), 'w')
        sysf.write(sysctl)
        sysf.close()

        # Save kernel debug logs if commanded to do so.
        if self.__save_kern_debug:
            self.__save_kernel_debug_logs(out_dir)
        else:
            LOG.info('Not saving kernel debug log per user request.')