def create_ssh_connections(config): agenda.task("Creating SSH connections") conns = {} machines = {} args = config['args'] for (role, details) in [(r, d) for r, d in config['topology'].items() if r in ("sender", "inbox", "outbox", "receiver")]: hostname = details['name'] is_self = 'self' in details and details['self'] if is_self: agenda.subtask(hostname) conns[hostname] = ConnectionWrapper('localhost', nickname=role, dry=args.dry_run, verbose=args.verbose, interact=args.interact) config['self'] = conns[hostname] elif not hostname in conns: agenda.subtask(hostname) user = None port = None if 'user' in details: user = details['user'] if 'port' in details: port = details['port'] conns[hostname] = ConnectionWrapper(hostname, nickname=role, user=user, port=port, dry=args.dry_run, verbose=args.verbose, interact=args.interact) machines[role] = conns[hostname] return (conns, machines)
def start_server(srv_addr, srv_port, ghostunnel, burrito_root, n): agenda.subtask(f"encr arg: {ghostunnel}") agenda.subtask(f"bertha arg: {burrito_root}") agenda.subtask(f"neg arg: {n}") neg = '' if n != 'off' else '--no-negotiation' sp = srv_addr.split(":") srv_addr = sp[0] encr_arg = f"{ghostunnel}" if ghostunnel is not None else "none" burrito_root_arg = f"--burrito-root=/burrito" if burrito_root is not None else "" if srv_addr == '127.0.0.1': agenda.task("local rpcbench-server") cmd = f"./scripts/start-rpcbench-server.sh \ ./target/release \ {srv_port} \ {encr_arg} \ {burrito_root_arg} \ {neg} \ &" else: agenda.task("remote rpcbench-server") agenda.subtask("copying binary + script") sh.run(f"scp ./target/release/bincode-pingserver {srv_addr}:", shell=True) sh.run(f"scp ./scripts/start-rpcbench-server.sh {srv_addr}:", shell=True) cmd = f"ssh {srv_addr} ./start-rpcbench-server.sh \ . \ {srv_port} \ '{encr_arg}' \ {burrito_root_arg} \ {neg} \ &" agenda.subtask("running launch script") agenda.subtask(cmd) sh.run(cmd, shell=True) agenda.subtask("launched")
def start_server_unix(neg_arg): agenda.task("local rpcbench-server unix") neg = '' if neg_arg != 'off' else '--no-negotiation' cmd = f"./scripts/start-rpcbench-unix-server.sh ./target/release {neg} &" agenda.subtask("running launch script") sh.run(cmd, shell=True) agenda.subtask("launched")
def read_config(args): agenda.task("Reading config file: {}".format(args.config)) with open(args.config) as f: try: config = toml.loads(f.read()) config[ 'experiment_name'] = args.name #args.config.split(".toml")[0] except Exception as e: print(e) fatal_error("Failed to parse config") raise e check_config(config) bundler_root = config['structure']['bundler_root'] config['box_root'] = os.path.join(bundler_root, "bundler") config['experiment_root'] = os.path.join(bundler_root, "experiments") config['distribution_dir'] = os.path.join(bundler_root, 'distributions') config['etg_client_path'] = os.path.join( config['structure']['bundler_root'], "empirical-traffic-gen/bin/etgClient") config['etg_server_path'] = os.path.join( config['structure']['bundler_root'], "empirical-traffic-gen/run-servers.py") config['experiment_dir'] = os.path.join(config['experiment_root'], config['experiment_name']) config['local_experiment_dir'] = os.path.join("experiments", config['experiment_name']) config['ccp_dir'] = os.path.join(bundler_root, 'ccp') return config
def start_localnamectl(srv_addr, burrito_root): sp = srv_addr.split(":") srv_addr = sp[0] if burrito_root is not None: cmd = f"sudo RUST_LOG=debug ./target/release/burrito-localname -f &" if srv_addr == "127.0.0.1": agenda.task("starting localname-ctl") sh.run(cmd, shell=True) else: agenda.task(f"no localname-ctl for {srv_addr}")
def check_ccp_alg(config, node): for (alg, details) in config['ccp'].items(): agenda.task(alg) alg_dir = get_ccp_alg_dir(config, alg) if not node.file_exists(alg_dir): expect( node.run("git clone {} {}".format(details['repo'], alg_dir)), "node failed to clone {}".format(alg)) branch = node.run("git -C {} rev-parse --abbrev-ref HEAD".format( alg_dir)).stdout.strip() if branch != details['branch']: expect( node.run("git -C {} checkout {}".format( alg_dir, details['branch'])), "node failed to checkout branch {} of {}".format( details['branch'], alg)) commit = node.run( "git -C {} rev-parse HEAD".format(alg_dir)).stdout.strip() should_recompile = False if not details['commit'] in commit: pull = expect(node.run("git -C {} pull".format(alg_dir)), "node failed to pull latest code for {}".format( alg)).stdout.strip() if details['commit'] == 'latest': if not 'Already up-to-date.' in pull: should_recompile = True else: expect( node.run("git -C {} checkout {}".format( alg_dir, details['commit'])), "node failed to checkout commit {} of {}".format( details['commit'], alg)) should_recompile = True if details['language'] == 'rust': ccp_binary = get_ccp_binary_path(config, alg) if not node.file_exists(ccp_binary): print("could not find ccp binary") should_recompile = True if should_recompile: new_commit = node.run( "git -C {} rev-parse HEAD".format(alg_dir)).stdout.strip() if commit.strip() != new_commit.strip(): print("updated {}: {} -> {}".format( alg, commit[:6], new_commit[:6])) agenda.subtask("compiling ccp algorithm") expect( node.run("~/.cargo/bin/cargo build {}".format( '--release' if 'release' in ccp_binary else ''), wd=alg_dir), "node failed to build {}".format(alg))
def update_sysctl(machines, config): if 'sysctl' in config: agenda.task("Updating sysctl") for (name, conn) in set((m, machines[m]) for m in machines if m in ("sender", "inbox", "outbox", "receiver")): agenda.subtask(f"{name}") for k in config['sysctl']: v = config['sysctl'][k] expect( conn.run(f"sysctl -w {k}=\"{v}\"", sudo=True), f"Failed to set {k} on {conn.addr}" )
def start_redis(machine, use_sudo=False): machine.run("docker rm -f burrito-shard-redis", sudo=True) agenda.task("Starting redis") ok = machine.run("docker run \ --name burrito-shard-redis \ -d -p 6379:6379 redis:6", sudo=True, wd="~/burrito") check(ok, "start redis", machine.addr) ok = machine.run("docker ps | grep burrito-shard-redis", sudo=True) check(ok, "start redis", machine.addr) agenda.subtask(f"Started redis on {machine.host}") return f"{machine.alt}:6379"
def setup_routing(self, config): """ sender --> inbox --> (mahimahi --> outbox ) ( \ ) ( -> receiver ) """ agenda.task("Setting up routing tables") machines = self.machines initcwnd = 10 if 'initcwnd' in config['topology']['sender']: initcwnd = config['topology']['sender']['initcwnd'] agenda.subtask("sender") expect( machines['sender'].run( "ip route del {receiver}; ip route add {receiver} via {inbox} src {sender} initcwnd {initcwnd}" .format(sender=get_iface(config, 'sender')['addr'], receiver=get_iface(config, 'receiver')['addr'], inbox=get_iface(config, 'inbox')['addr'], initcwnd=initcwnd), sudo=True), "Failed to set routing tables at sender") agenda.subtask("inbox") expect( machines['inbox'].run("sysctl net.ipv4.ip_forward=1", sudo=True), "Failed to set IP forwarding at inbox") expect( machines['inbox'].run( "ip route del {receiver}; ip route add {receiver} dev {inbox_send_iface}" .format(receiver=get_iface(config, 'receiver')['addr'], inbox_send_iface=get_iface(config, 'inbox')['dev']), sudo=True), "Failed to set forward route at inbox") expect( machines['inbox'].run( "ip route del {sender}; ip route add {sender} dev {inbox_recv_iface}" .format(sender=get_iface(config, 'sender')['addr'], inbox_recv_iface=get_iface(config, 'inbox')['dev']), sudo=True), "Failed to set reverse route at inbox") agenda.subtask("outbox") expect( machines['outbox'].run( "ip route del {sender_addr}; ip route add {sender_addr} via {inbox_addr}" .format(sender_addr=get_iface(config, 'sender')['addr'], inbox_addr=get_iface(config, 'inbox')['addr']), sudo=True), "Failed to set routing tables at outbox") expect( machines['outbox'].run("sysctl net.ipv4.ip_forward=1", sudo=True), "Failed to set IP forwarding at outbox")
def parse_outputs(config, replot=False, interact=False, graph_kwargs={}): experiment_root = os.path.abspath(os.path.expanduser(config['local_experiment_dir'])) agenda.task(f'parsing experiment_root: {experiment_root}') if 'downsample' in graph_kwargs: sample_rate = graph_kwargs['downsample'] else: sample_rate = 1 global_out_fname, num_ccp = parse_ccp_logs(experiment_root, sample_rate, replot) parse_mahimahi_logs(experiment_root, sample_rate, replot, config['structure']['bundler_root']) parse_etg_logs(experiment_root, replot) write_rmd(experiment_root, global_out_fname, num_ccp, **graph_kwargs)
def disable_tcp_offloads(config, machines): agenda.task("Turn off TSO, GSO, and GRO") for (name, conn) in set((m, machines[m]) for m in machines if m in ("sender", "inbox", "outbox", "receiver")): agenda.subtask(name) for iface in config['topology'][name]['ifaces']: expect( conn.run( "ethtool -K {} tso off gso off gro off".format( iface['dev'] ), sudo=True ), "Failed to turn off optimizations" )
def check_existing_experiment(driver): agenda.task("Check for existing experiment") driver.get("https://www.cloudlab.us/user-dashboard.php#experiments") table = None try: table = driver.find_element_by_id("experiments_table") except: agenda.subfailure("No existing experiment found") return None elements = [ e.text.split()[0] for e in table.find_elements_by_xpath("//table/tbody") if len(e.text.split()) > 0 ] agenda.subtask("Existing experiment found") driver.find_element_by_link_text(elements[0]).click() time.sleep(6) return get_machines_from_experiment(driver)
def prepare_directories(config, conns): agenda.task("Preparing result directories") local_experiment_dir = config['local_experiment_dir'] if os.path.exists(local_experiment_dir): if not (config['args'].skip_existing or config['args'].overwrite_existing): fatal_warn("There are existing results for this experiment.\nYou must run this script with either --skip or --overwrite to specify how to proceed.") if config['args'].overwrite_existing: while True: warn("Overwrite existing results set to TRUE. Are you sure you want to continue? (y/n)", exit=False) got = input().strip() if got == 'y': break elif got == 'n': sys.exit(1) os.makedirs(local_experiment_dir, exist_ok=True) for (addr, conn) in conns.items(): if config['args'].verbose: agenda.subtask(addr) if config['args'].overwrite_existing: expect( conn.run("rm -rf {}".format(config['experiment_dir'])), "Failed to remove existing experiment directory {}".format(config['experiment_dir']) ) expect( conn.run("mkdir -p {}".format(config['experiment_dir'])), "Failed to create experiment directory {}".format(config['experiment_dir']) ) expect( conn.run("mkdir -p {}".format(config['ccp_dir'])), "Failed to create experiment directory {}".format(config['experiment_dir']) ) # Keep a copy of the config in the experiment directory for future reference subprocess.check_output(f"cp {config['args'].config} {local_experiment_dir}", shell=True)
def exp(srv_addr, mode, args, n): sp = srv_addr.split(":") srv_addr = sp[0] if len(sp) == 2: exp_addr = sp[1] else: exp_addr = srv_addr neg = f'--negotiation={n}' encr_arg = f"{args.ghostunnel}" if args.ghostunnel and 'rel' not in mode else "none" burrito_root_arg = f"--burrito-root=/burrito" if args.burrito_root and 'fp' in mode else "none" if '127.0.0.1' == exp_addr: is_local = 'local' elif '10.1' in exp_addr: is_local = 'remote' else: is_local = 'farther' outfile_arg = f"{is_local}-mode:{mode}-msgs:{args.reqs}-perconn:{args.perconn}-neg:{n}" if '@' in exp_addr: exp_addr = exp_addr.split('@')[-1] addr_arg = f"{args.server_port}" if is_local == 'local' else f"{exp_addr}:{args.server_port}" cmd = f"./scripts/run-rpcbench-client.sh \ {args.outdir} \ {addr_arg} \ -i={args.reqs} \ --reqs-per-iter={args.perconn} \ {outfile_arg} \ {encr_arg} \ {burrito_root_arg} \ {neg} \ " agenda.task(f"run client: mode {mode} \ {exp_addr}:{args.server_port} ({is_local}), \ {args.reqs} reqs, \ {args.perconn} /conn, \ encrypt {encr_arg != 'none'}, \ negotiation {neg} \ burrito {burrito_root_arg != 'none'}") agenda.subtask(f"outfile: {outfile_arg}") sh.run(cmd, shell=True)
def get_interfaces(config, machines): agenda.section("Get node interfaces") for m in machines: if m == 'self' or 'ifaces' in config['topology'][m]: agenda.subtask( f"{machines[m].addr}: {config['topology'][m]['ifaces']}") continue agenda.task(machines[m].addr) conn = machines[m] ifaces_raw = conn.run("ip -4 -o addr").stdout.strip().split("\n") ifaces = [ip_addr_rgx.match(i) for i in ifaces_raw] ifaces = [ i.groupdict() for i in ifaces if i is not None and i["dev"] != "lo" ] if len(ifaces) == 0: raise Exception( f"Could not find ifaces on {conn.addr}: {ifaces_raw}") config['topology'][m]['ifaces'] = ifaces agenda.subtask( f"{machines[m].addr}: {config['topology'][m]['ifaces']}") return config
def init_repo(config, machines): agenda.section("Init nodes") root = config['structure']['bundler_root'] clone = f'git clone --recurse-submodules https://github.com/bundler-project/evaluation {root}' for m in machines: if m == 'self': continue agenda.task(f"init {m}: {machines[m].addr}") agenda.subtask("cloning eval repo") if not machines[m].file_exists(root): res = machines[m].run(clone) else: # previously cloned, update to latest commit machines[m].run(f"cd {root} && git pull origin cloudlab") machines[m].run( f"cd {root} && git submodule update --init --recursive") agenda.subtask("compiling experiment tools") machines[m].run( f"make -C {root}", stdout=f"{config['structure']['bundler_root']}/{m}.out.mk", stderr=f"{config['structure']['bundler_root']}/{m}.err.mk")
def launch(driver): agenda.task("Launch new cloudlab experiment") driver.get("https://www.cloudlab.us/instantiate.php#") agenda.subtask("Select bundler profile") time.sleep(2) driver.find_element_by_id("change-profile").click() time.sleep(2) driver.find_element_by_name("bundler-local").click() time.sleep(2) driver.find_element_by_id("showtopo_select").click() #click through time.sleep(2) driver.execute_script("$(\"[href='#next']\").click()") time.sleep(2) driver.execute_script("$(\"[href='#next']\").click()") agenda.subprompt("Press [Enter] to verify cluster availability>") input() time.sleep(2) driver.execute_script("$(\"[href='#next']\").click()") time.sleep(2) driver.find_element_by_id("experiment_duration").clear() driver.find_element_by_id("experiment_duration").send_keys("16") agenda.subprompt("Press [Enter] to launch") input() time.sleep(2) driver.execute_script("$(\"[href='#finish']\").click()") agenda.subtask("Launch") launch_wait(driver) return get_machines_from_experiment(driver)
def login(driver, username, pw): try: # will except and return if not present driver.find_element_by_name("login") agenda.task("Login") time.sleep(2) driver.find_element_by_name("uid").send_keys(username) driver.find_element_by_name("password").send_keys(pw) driver.find_element_by_name("login").click() except: agenda.failure("Could not attempt login") return time.sleep(2) try: # if things worked, this will throw an exception driver.find_element_by_name("login") except: return agenda.failure("Login attempt failed, check username/password") raise Exception("Login failed")
def timerange_get_and_group_paths(tr, outf1, outf_mx): agenda.task(f"{tr[0]} starting") ms = measurements_in_timerange(*tr) ps = list(add_fields( tr[0], itertools.chain.from_iterable( all_paths(m['id']) for m in ms['results'], ), )) agenda.task(f"{tr[0]} writing") write_result(ps, outf, outf_mx) agenda.task(f"{tr[0]} done") return True
agenda.failure("need at least 2 shards") sys.exit(1) if args.shardtype is None: args.shardtype = ['server'] for t in args.shardtype: if t not in ['client', 'server']: agenda.failure(f"Unknown shardtype {t}") sys.exit(1) for t in args.wrk: if not t.endswith(".access") or '-' not in t: agenda.failure(f"Workload file should be <name>-<concurrency>.access, got {t}") sys.exit(1) agenda.task(f"connecting to lb machine") lb_conn, lb_commit = check_machine(args.lb.split(':')) agenda.task(f"connecting to shard machines") shard_conns, shard_commits = zip(*[check_machine(i.split(':')) for i in args.shard]) shard_conns = list(shard_conns) agenda.task(f"connecting to client machines") client_conns, client_commits = zip(*[check_machine(i.split(':')) for i in args.client]) client_conns = list(client_conns) commits = shard_commits + client_commits if not all(c == lb_commit for c in commits): agenda.failure(f"not all commits equal: {lb_commit}, {shard_commits} {client_commits}") sys.exit(1) if lb_conn.host in ['127.0.0.1', '::1', 'localhost']: agenda.subtask(f"Local conn: {lb_conn.host}/{lb_conn.addr}")
return True def write_result(ps, outf, outf_mx): try: outf_mx.acquire() # write out results for p in ps: outf.write(" ".join(str(s) for s in [p['src_ip'], p['dst_ip'], p['aspair'], p['timeslot'], p['ip-path']]) + '\n') except Exception as e: agenda.failure(e) finally: outf_mx.release() start = int(sys.argv[2]) timerange = (start, start+3600) outf_mx = threading.Lock() # one lock for both files with open(f"{sys.argv[1]}-{start}.data", 'w') as outf: outf.write(" ".join(['srcip', 'dstip', 'aspair', 'timeslot', 'path']) + '\n') with concurrent.futures.ThreadPoolExecutor() as rt: futs = [] ctr = 0 while ctr < int(sys.argv[3]): agenda.task(f"{timerange[0]} submitting") fut = rt.submit(timerange_get_and_group_paths, timerange, outf, outf_mx) futs.append(fut) timerange = (timerange[0] + 3600, timerange[1] + 3600) ctr += 1 for f in futs: f.result()
def write_rmd(experiment_root, csv_name, num_ccp, downsample=None, interact=False, fields="zt, rout, rin, curr_rate, curr_q, elasticity2", rows=None, cols=None, **kwargs): experiment_root = os.path.abspath(os.path.expanduser(experiment_root)) experiment_name = os.path.basename(experiment_root) tomls = glob.glob(os.path.join(experiment_root, '*.toml')) assert len(tomls) == 1, f"there should be exactly 1 .toml (config) in the experiment directory: {experiment_root} -> {tomls}" with open(tomls[0], 'r') as f: config = f.read() wrap_str = rows grid = [] if rows: rows = 'rows=vars({})'.format(rows) grid.append(rows) if cols: cols = 'cols=vars({})'.format(cols) grid.append(cols) grid_str = ','.join(grid) if interact: interact_str = "" static_str = "#" else: interact_str = "#" static_str = "" def format_title(experiment_name): return experiment_name mm_plt_fmt = """ **{title}** ```{{r mm{i}, fig.width=15, fig.align='center', echo=FALSE}} df_m_{i} <- read.csv("{path}", sep=" ") # header=FALSE, col.names=c("t", "total", "delay","bundle", "cross")) df_m_{i} <- df_m_{i} %>% gather("measurement", "value", total, delay, bundle, cross) {remove}df_switch_{i} <- read.csv("{switch_path}", sep=",") plt_m_{i} <- ggplot(df_m_{i}, aes(x=t, y=value, color=measurement)) + geom_line() + {remove}geom_rect(data=df_switch_{i}, inherit.aes=FALSE, aes(xmin=xmin,xmax=xmax,ymin=0,ymax=max(df_m_{i}$value),fill="xtcp"), alpha=0.2) + scale_fill_manual('Mode', values="black", labels=c("xtcp")) {interact_str}ggplotly(plt_m_{i}) {static_str}plt_m_{i} ```""" g = glob.glob(experiment_root + '/**/mm-graph.tmp', recursive=True) mm_plots = [] for (i,path) in enumerate(g): switch_path = "/".join(path.split("/")[:-1])+"/ccp_switch.parsed" try: with open(switch_path) as f: if sum(1 for _ in f) < 2: raise Exception("") # goto except remove = "" except: remove = "#" mm_plots.append( mm_plt_fmt.format( i=i, path=path, title=format_title(path.split(experiment_name)[1]), switch_path=switch_path, remove=remove, interact_str=interact_str, static_str=static_str, ) ) mm_plots_str = "\n".join(mm_plots) if num_ccp == 0: nimbus_plots = "" else: if len(g) < 3: nimbus_fig_height = len(g) * 4 elif len(g) < 10: nimbus_fig_height = len(g) * 2 elif len(g) < 50: nimbus_fig_height = len(g) * 1 else: nimbus_fig_height = 30 nimbus_fig_height = max(nimbus_fig_height, 15) nimbus_plots = """ #### Nimbus ```{{r plot1, fig.width=15, fig.height={fig_height}, fig.align='center', echo=FALSE}} df <- read.csv("{csv}", sep=",", na.strings=c("","none")) if (nrow(df) == 0) {{ print("no ccp output") }} else {{ df <- df %>% gather("measurement", "value", {fields}) plt <- ggplot(df, aes(x=elapsed, y=value, color=measurement)) + geom_line() + facet_wrap(~interaction(sch, alg, rate, rtt, bundle, cross, seed), labeller = labeller(.default=label_both, .multi_line=FALSE), nrow={nrow}, ncol=1) + scale_x_continuous(breaks=seq(0, max(df$elapsed), by=5)) {interact_str}ggplotly(plt) {static_str}plt }} ``` """.format( csv = os.path.join(experiment_root, csv_name), fields = fields, wrap_str_check = "1" if wrap_str is not None else "0", wrap_str = wrap_str, nrow = len(g), fig_height = nimbus_fig_height, interact_str=interact_str, static_str=static_str, ) fct_path = os.path.join(experiment_root, 'fcts.data') if os.path.isfile(fct_path): fct_plots = """ #### Flow Completion Times ```{{r fcts, fig.width=15, fig.height=6, fig.align='center', echo=FALSE}} df_fct <- read.csv("{csv}", sep=" ") df_fct$Duration <- df_fct$Duration.usec. / 1e6 bw <- 12e6 # TODO make this configurable df_fct$ofct <- (df_fct$Size / bw) + 0.05 df_fct$NormFct <- df_fct$Duration / df_fct$ofct df_fct$scheme <- paste(df_fct$sch, "_", df_fct$alg, sep="") fct_plt <- ggplot(df_fct, aes(x=NormFct, colour=scheme)) + stat_ecdf() + scale_x_log10() fct_plt ```""".format( csv = fct_path, ) else: fct_plots = "" contents = """ --- title: "{title}" output: html_document --- <style type="text/css"> .main-container {{ max-width: 1400px; margin-left: auto; margin-right: auto; }} </style> ```{{r, echo=FALSE}} suppressWarnings(suppressMessages(library(ggplot2))) suppressWarnings(suppressMessages(library(plotly))) suppressWarnings(suppressMessages(library(dplyr))) suppressWarnings(suppressMessages(library(tidyr))) ``` ### Overall {fct_plots} ### Per-Experiment {nimbus_plots} #### Mahimahi {mm_plots} ### Config ```{{r config, eval=FALSE}} {config} ``` """.format( title = experiment_name, config = config, grid_str = grid_str, nimbus_plots = nimbus_plots, fct_plots = fct_plots, mm_plots = mm_plots_str, ) rmd = os.path.join(experiment_root, 'exp.Rmd') html = os.path.join(experiment_root, 'index.html') with open(rmd, 'w') as f: f.write(contents) agenda.task("Rendering Rmd as HTML...") try: out = subprocess.check_output("R -e rmarkdown::render\"('{}', output_file='{}')\"".format( rmd, html ), shell=True) except subprocess.CalledProcessError as e: agenda.failure("Failed to render Rmd as HTML:") print(e.output.decode())
def do_exp(outdir, lb, shards, clients, shardtype, ops_per_sec, wrkload): wrkname = wrkload.split("/")[-1].split(".")[0] num_shards = len(shards) server_prefix = f"{outdir}/{shardtype}shard-{ops_per_sec}-{wrkname}-lb" shard_prefix = f"{outdir}/{shardtype}shard-{ops_per_sec}-{wrkname}-shard" outf = f"{outdir}/{shardtype}shard-{ops_per_sec}-{wrkname}-client" agenda.task(f"checking {outf}0-{clients[0].addr}.data") if os.path.exists(f"{outf}0-{clients[0].addr}.data"): agenda.task(f"skipping: server = {lb.addr}, shardtype = {shardtype}, load = {ops_per_sec} ops/s") return True else: agenda.task(f"running: server = {lb.addr}, shardtype = {shardtype}, load = {ops_per_sec} ops/s") # load = (4 (client threads / proc) * 1 (procs/machine) * {len(machines) - 1} (machines)) # / {interarrival} (per client thread) num_client_threads = int(wrkname.split('-')[-1]) interarrival_secs = num_client_threads * len(clients) / ops_per_sec interarrival_us = int(interarrival_secs * 1e6) redis_addr = start_redis(lb) time.sleep(5) server_addr = lb.addr agenda.task(f"starting: server = {server_addr}, shardtype = {shardtype}, load = {ops_per_sec}, ops/s -> interarrival_us = {interarrival_us}, num_clients = {len(clients)}") agenda.subtask("starting shards") for s in shards: start_shard(s, shard_prefix) time.sleep(5) agenda.subtask("starting lb") redis_port = redis_addr.split(":")[-1] start_lb(lb, f"127.0.0.1:{redis_port}", [s.addr for s in shards], server_prefix) time.sleep(5) # prime the server with loads # conn, server, redis_addr, outf, wrkload='uniform' agenda.task("doing loads") run_loads(clients[0], server_addr, redis_addr, outf, wrkload) # others are clients agenda.task("starting clients") client_threads = [threading.Thread(target=run_client, args=( m, server_addr, redis_addr, interarrival_us, shardtype, outf, wrkload ), ) for m in clients] [t.start() for t in client_threads] [t.join() for t in client_threads] agenda.task("all clients returned") # kill the server lb.run("sudo pkill -9 burrito-lb") lb.run("sudo pkill -9 iokerneld") for s in shards: s.run("sudo pkill -9 single-shard") s.run("sudo pkill -9 iokerneld") lb.run("rm ~/burrito/*.config") for m in shards: m.run("rm ~/burrito/*.config") for m in clients: m.run("rm ~/burrito/*.config") agenda.task("get lb files") if not lb.local: lb.get(f"burrito/{server_prefix}.out", local=f"{server_prefix}.out", preserve_mode=False) lb.get(f"burrito/{server_prefix}.err", local=f"{server_prefix}.err", preserve_mode=False) agenda.task("get shard files") for s in shards: if not s.local: s.get(f"burrito/{shard_prefix}-{s.addr}.out", local=f"{shard_prefix}-{s.addr}.out", preserve_mode=False) s.get(f"burrito/{shard_prefix}-{s.addr}.err", local=f"{shard_prefix}-{s.addr}.err", preserve_mode=False) #s.get(f"burrito/{shard_prefix}-{s.addr}.trace", local=f"{shard_prefix}-{s.addr}.trace", preserve_mode=False) def get_files(num): fn = c.get if c.local: agenda.subtask(f"Use get_local: {c.host}") fn = get_local agenda.subtask(f"getting {outf}{num}-{c.addr}.err") fn( f"burrito/{outf}{num}.err", local=f"{outf}{num}-{c.addr}.err", preserve_mode=False, ) agenda.subtask(f"getting {outf}{num}-{c.addr}.out") fn( f"burrito/{outf}{num}.out", local=f"{outf}{num}-{c.addr}.out", preserve_mode=False, ) #agenda.subtask(f"getting {outf}{num}-{c.addr}.trace") #fn( # f"burrito/{outf}{num}.trace", # local=f"{outf}{num}-{c.addr}.trace", # preserve_mode=False, #) agenda.subtask(f"getting {outf}{num}-{c.addr}.data1") fn( f"burrito/{outf}{num}.data1", local=f"{outf}{num}-{c.addr}.data1", preserve_mode=False, ) agenda.task("get client files") ok = True for c in clients: try: get_files(0) except Exception as e: agenda.subfailure(f"At least one file missing for {c}: {e}") ok = False if not ok: return ok def awk_files(num): subprocess.run(f"awk '{{if (!hdr) {{hdr=$1; print \"ShardType NumShards Ops \"$0;}} else {{print \"{shardtype} {num_shards} {ops_per_sec} \"$0}} }}' {outf}{num}-{c.addr}.data1 > {outf}{num}-{c.addr}.data", shell=True, check=True) for c in clients: agenda.subtask(f"adding experiment info for {c.addr}") try: awk_files(0) except: agenda.subfailure(f"At least one file missing") return False agenda.task("done") return True
def check_receiver(config, receiver): agenda.task("mahimahi (receiver)") if not receiver.prog_exists("mm-delay"): fatal_warn("Receiver does not have mahimahi installed.")
if 'all' in args.negotiate: args.negotiate = neg_opts os.makedirs(args.outdir, exist_ok = True) for n in args.negotiate: for srv in args.server: is_remote = '127.0.0.1' not in srv for m in args.mode: if m not in modes: agenda.failure(f"unknown mode {m}") break if m != 'rel' and args.ghostunnel is None: agenda.failure("need ghostunnel arg for non-rel exp") break agenda.task(f"mode: {m}, negotiate {n}") if is_remote and m == 'rel-ux': agenda.subfailure("No remote for unix mode") continue start_localnamectl(srv, args.burrito_root if 'fp' in m else None) if m == 'rel-ux': start_server_unix(n) time.sleep(15) exp_unix(args, n) else: start_server( srv, args.server_port, args.ghostunnel if 'rel' not in m else None, args.burrito_root if 'fp' in m else None, n)
def check_inbox(config, inbox): agenda.task("inbox") check_ccp_alg(config, inbox)
config = make_cloudlab_topology(config, headless=args.headless) topo = MahimahiTopo(config) topo.setup_routing(config) machines = topo.machines conns = topo.conns disable_tcp_offloads(config, machines) update_sysctl(machines, config) agenda.section("Setup") prepare_directories(config, conns) details_md = os.path.join(config['local_experiment_dir'], 'details.md') results_md = os.path.join(config['local_experiment_dir'], 'results.md') agenda.task("Fetch build logs") topo.fetch_build_logs(config) if not os.path.exists(details_md): with open(details_md, 'w') as f: f.write(args.details + "\n") if not os.path.exists(results_md): with open(results_md, 'w') as f: f.write("TODO\n") agenda.section("Synchronizing code versions") if not args.skip_git: check_inbox(config, machines['inbox']) check_receiver(config, machines['receiver']) exps = enumerate_experiments(config)
#!/usr/bin/env python3 import agenda agenda.section("Set up network") agenda.task("Create Virtual Private Cloud") agenda.task("Attach internet gateway") agenda.task("Allocate subnet #1") agenda.subtask("Hook in internet-enabled route table") agenda.task("Allocate subnet #2") agenda.task("Generate VPC key-pair") agenda.subfailure("Could not create key-pair") agenda.subtask("Attempting to delete old key-pair") agenda.subtask("Attempting to generate new key-pair") agenda.section("Launch instances") agenda.task("Launch instances in cluster #1") agenda.task("Launch instances in cluster #2") agenda.task("Wait for HQ to start running") agenda.subtask("Still in 'pending' state") agenda.subtask("Still in 'pending' state") agenda.task("Wait for workers to reach 'running' state") agenda.task("Wait for HQ to become pingable") print("54.84.179.156 | UNREACHABLE!") print("54.84.179.156 | UNREACHABLE!") print('54.84.179.156 | SUCCESS => {"changed": false, "ping": "pong"}') agenda.task("Wait for workers to become pingable") print('10.0.1.237 | SUCCESS => {"changed": false, "ping": "pong"}') agenda.section("Deploy application") print("""\ PLAY [ansible-playbook]
def do_exp(iter_num, outdir=None, machines=None, num_shards=None, shardtype=None, ops_per_sec=None, client_batch=None, server_batch=None, poisson_arrivals=None, stack_frag=None, wrkload=None, overwrite=None): assert (outdir is not None and machines is not None and num_shards is not None and shardtype is not None and ops_per_sec is not None and client_batch is not None and server_batch is not None and poisson_arrivals is not None and stack_frag is not None and wrkload is not None and overwrite is not None) wrkname = wrkload.split("/")[-1].split(".")[0] server_prefix = f"{outdir}/{num_shards}-{shardtype}shard-{ops_per_sec}-poisson={poisson_arrivals}-clientbatch={client_batch}-server_batch={server_batch}-stackfrag={stack_frag}-{wrkname}-{iter_num}-kvserver" outf = f"{outdir}/{num_shards}-{shardtype}shard-{ops_per_sec}-poisson={poisson_arrivals}-client_batch={client_batch}-server_batch={server_batch}-stackfrag={stack_frag}-{wrkname}-{iter_num}-client" for m in machines: if m.local: m.run(f"mkdir -p {outdir}", wd="~/burrito") continue m.run(f"rm -rf {outdir}", wd="~/burrito") m.run(f"mkdir -p {outdir}", wd="~/burrito") if not overwrite and os.path.exists(f"{outf}0-{machines[1].addr}.data"): agenda.task( f"skipping: server = {machines[0].addr}, num_shards = {num_shards}, shardtype = {shardtype}, client_batch = {client_batch}, server_batch = {server_batch}, stack_fragmentation = {stack_frag}, load = {ops_per_sec} ops/s" ) return True else: agenda.task(f"running: {outf}0-{machines[1].addr}.data") # load = (n (client threads / proc) * 1 (procs/machine) * {len(machines) - 1} (machines)) # / {interarrival} (per client thread) num_client_threads = int(wrkname.split('-')[-1]) interarrival_secs = num_client_threads * len(machines[1:]) / ops_per_sec interarrival_us = int(interarrival_secs * 1e6) #if interarrival_us < 5000: # agenda.subfailure("Can't have interarrival < 5ms") # return False redis_addr = start_redis(machines[0]) time.sleep(5) server_addr = machines[0].addr agenda.task( f"starting: server = {machines[0].addr}, num_shards = {num_shards}, shardtype = {shardtype}, client_batch = {client_batch}, server_batch = {server_batch}, load = {ops_per_sec} ops/s -> interarrival_us = {interarrival_us}, num_clients = {len(machines)-1}" ) # first one is the server, start the server agenda.subtask("starting server") redis_port = redis_addr.split(":")[-1] start_server(machines[0], f"127.0.0.1:{redis_port}", server_prefix, shards=num_shards, ebpf=False, server_batch=server_batch, stack_frag=stack_frag) time.sleep(5) # prime the server with loads agenda.task("doing loads") run_loads(machines[1], server_addr, redis_addr, outf, wrkload) try: machines[1].get(f"{outf}-loads.out", local=f"{outf}-loads.out", preserve_mode=False) machines[1].get(f"{outf}-loads.err", local=f"{outf}-loads.err", preserve_mode=False) except Exception as e: agenda.subfailure(f"Could not get file from loads client: {e}") # others are clients agenda.task("starting clients") clients = [ threading.Thread( target=run_client, args=(m, server_addr, redis_addr, interarrival_us, poisson_arrivals, client_batch, shardtype, stack_frag, outf, wrkload), ) for m in machines[1:] ] [t.start() for t in clients] [t.join() for t in clients] agenda.task("all clients returned") # kill the server machines[0].run("sudo pkill -9 kvserver-ebpf") machines[0].run("sudo pkill -9 kvserver-noebpf") machines[0].run("sudo pkill -INT iokerneld") for m in machines: m.run("rm ~/burrito/*.config") agenda.task("get server files") if not machines[0].local: machines[0].get(f"~/burrito/{server_prefix}.out", local=f"{server_prefix}.out", preserve_mode=False) machines[0].get(f"~/burrito/{server_prefix}.err", local=f"{server_prefix}.err", preserve_mode=False) def get_files(num): fn = c.get if c.local: agenda.subtask(f"Use get_local: {c.host}") fn = get_local agenda.subtask(f"getting {outf}{num}-{c.addr}.err") fn( f"burrito/{outf}{num}.err", local=f"{outf}{num}-{c.addr}.err", preserve_mode=False, ) agenda.subtask(f"getting {outf}{num}-{c.addr}.out") fn( f"burrito/{outf}{num}.out", local=f"{outf}{num}-{c.addr}.out", preserve_mode=False, ) agenda.subtask(f"getting {outf}{num}-{c.addr}.data") fn( f"burrito/{outf}{num}.data", local=f"{outf}{num}-{c.addr}.data", preserve_mode=False, ) agenda.subtask(f"getting {outf}{num}-{c.addr}.trace") fn( f"burrito/{outf}{num}.trace", local=f"{outf}{num}-{c.addr}.trace", preserve_mode=False, ) agenda.task("get client files") for c in machines[1:]: try: get_files(0) except Exception as e: agenda.subfailure(f"At least one file missing for {c}: {e}") agenda.task("done") return True
def check_config(config): agenda.task("Checking config file") topology = config['topology'] if 'cloudlab' not in topology: nodes = ['sender', 'inbox', 'outbox', 'receiver'] for node in nodes: assert node in topology, "Missing key topology.{}".format(node) assert 'name' in topology[ node], "topology.{} is missing 'name' key".format(node) assert 'ifaces' in topology[ node], "topology.{} is missing 'ifaces' key".format(node) assert len( topology[node]['ifaces'] ) > 0, "topology.{} must have at least 1 interface".format(node) for i, iface in enumerate(topology[node]['ifaces']): assert 'dev' in iface, "topology.{} iface {} is missing 'dev' key".format( node, i) assert 'addr' in iface, "topology.{} iface {} is missing 'addr' key".format( node, i) assert len(topology['inbox']['ifaces'] ) > 1, "topology.inbox must have at least 2 interaces" assert 'listen_port' in topology[ 'inbox'], "topology.inbox must define listen_port" num_self = 0 for node in topology: if 'self' in topology[node] and topology[node]['self']: num_self += 1 assert num_self > 0, "One node in topology section must be labeled with \"self = true\"" assert num_self == 1, "Only one node in topology section can be labeled self" else: assert 'listen_port' in topology[ 'inbox'], "topology.inbox must define listen_port" nodes = ['sender', 'outbox', 'receiver'] for node in nodes: assert node not in topology, "Don't use key topology.{} with cloudlab; it will be auto-populated".format( node) for k in config['sysctl']: v = config['sysctl'][k] assert type( v ) == str, "key names with dots must be enclosed in quotes (sysctl)" parameters = [ 'initial_sample_rate', 'bg_port_start', 'bg_port_end', 'qdisc_buf_size', 'fifo_uplink', 'fifo_downlink' ] for param in parameters: assert param in config[ 'parameters'], "parameters must include {}".format(param) structure_fields = [ ('bundler_root', 'root directory for all experiments and code'), ] for (field, detail) in structure_fields: assert field in config[ 'structure'], "[structure] missing key '{}': {}".format( field, detail) assert len( config['experiment']['seed']) > 0, "must specify at least one seed" assert len(config['experiment'] ['sch']) > 0, "must specify at least one scheduler (sch)" assert len(config['experiment'] ['alg']) > 0, "must specify at least one algorithm (alg)" assert all('name' in a for a in config['experiment']['alg']), "algs must have key name" assert len( config['experiment']['rate']) > 0, "must specify at least one rate" assert len( config['experiment']['rtt']) > 0, "must specify at least one rtt" assert len( config['experiment']['bdp']) > 0, "must specify at least one bdp" assert 'bundle_traffic' in config[ 'experiment'], "must specify at least one type of bundle traffic" assert len(config['experiment']['bundle_traffic'] ) > 0, "must specify at least one type of bundle traffic" assert 'cross_traffic' in config[ 'experiment'], "must specify at least one type of cross traffic" assert len(config['experiment']['cross_traffic'] ) > 0, "must specify at least one type of cross traffic" sources = ['iperf', 'poisson', 'cbr'] for traffic_type in ['bundle_traffic', 'cross_traffic']: for traffic in config['experiment'][traffic_type]: for t in traffic: print('traffic:', t) assert t[ 'source'] in sources, "{} traffic source must be one of ({})".format( traffic_type, "|".join(sources)) assert 'start_delay' in t, "{} missing start_delay (int)".format( traffic_type) if t['source'] == 'iperf': assert t['alg'], "{} missing 'alg' (str)".format( traffic_type) assert t['flows'], "{} missing 'flows' (int)".format( traffic_type) assert t['length'], "{} missing 'length' (int)".format( traffic_type) if t['source'] == 'poisson': assert t['conns'], "{} missing 'conns' (int)".format( traffic_type) assert t[ 'start_port'], "{} missing 'start_port' (int)".format( traffic_type) assert t['reqs'], "{} missing 'reqs' (int)".format( traffic_type) assert t['dist'], "{} missing 'dist' (str)".format( traffic_type) assert t['load'], "{} missing 'load' (str)".format( traffic_type) assert t['alg'], "{} missing 'alg' (str)".format( traffic_type) assert 'backlogged' in t, "{} missing 'backlogged' (int)".format( traffic_type) if t['source'] == 'cbr': assert t['length'], "{} missing 'length (int)'".format( traffic_type) assert t['port'], "{} missing 'port (int)'".format( traffic_type) assert t['rate'], "{} missing 'rate (int)'".format( traffic_type)