def check_binaries(exps): # if not os.path.isdir("binaries"): # execute(compile_binaries,exps) # return # if len(glob.glob("binaries/*")) == 0: # execute(compile_binaries,exps) # return if not os.path.isdir("binaries") or len(glob.glob("binaries/*")) == 0: local("mkdir -p binaries") local("rm -rf binaries/*") fmt, experiments = experiment_map[exps]() for e in experiments: cfgs = get_cfgs(fmt, e) # if env.remote and not env.same_node: if env.cluster == "ec2": cfgs["ENVIRONMENT_EC2"] = "true" else: cfgs["ENVIRONMENT_EC2"] = "false" if env.cluster == "istc": cfgs["CORE_CNT"] = 64 else: cfgs["CORE_CNT"] = 8 if env.remote: cfgs["TPORT_TYPE"] = "TCP" if env.shmem: cfgs["SHMEM_ENV"] = "true" else: cfgs["SHMEM_ENV"] = "false" # output_f = get_outfile_name(cfgs,fmt,env.hosts) output_f = get_execfile_name(cfgs, fmt, env.hosts) executables = glob.glob("{}*".format(os.path.join( "binaries", output_f))) has_rundb, has_runcl, has_config = False, False, False # has_rundb,has_runcl,has_runsq,has_config=False,False,False,False for executable in executables: if executable.endswith("rundb"): has_rundb = True elif executable.endswith("runcl"): has_runcl = True # elif executable.endswith("runsq"): # has_runsq = True elif executable.endswith("cfg"): has_config = True # if not has_rundb or not has_runcl or not has_runsq or not has_config: if not has_rundb or not has_runcl or not has_config: execute(compile_binary, fmt, e)
def check_binaries(exps): # if not os.path.isdir("binaries"): # execute(compile_binaries,exps) # return # if len(glob.glob("binaries/*")) == 0: # execute(compile_binaries,exps) # return if not os.path.isdir("binaries") or len(glob.glob("binaries/*")) == 0: local("mkdir -p binaries") local("rm -rf binaries/*") fmt,experiments = experiment_map[exps]() for e in experiments: cfgs = get_cfgs(fmt,e) # if env.remote and not env.same_node: if env.cluster == "ec2": cfgs["ENVIRONMENT_EC2"]="true" else: cfgs["ENVIRONMENT_EC2"]="false" if env.cluster == "istc": cfgs["CORE_CNT"]=64 else: cfgs["CORE_CNT"]=8 if env.remote: cfgs["TPORT_TYPE"]="TCP" if env.shmem: cfgs["SHMEM_ENV"]="true" else: cfgs["SHMEM_ENV"]="false" # output_f = get_outfile_name(cfgs,fmt,env.hosts) output_f = get_execfile_name(cfgs,fmt,env.hosts) executables = glob.glob("{}*".format(os.path.join("binaries",output_f))) has_rundb,has_runcl,has_config=False,False,False # has_rundb,has_runcl,has_runsq,has_config=False,False,False,False for executable in executables: if executable.endswith("rundb"): has_rundb = True elif executable.endswith("runcl"): has_runcl = True # elif executable.endswith("runsq"): # has_runsq = True elif executable.endswith("cfg"): has_config = True # if not has_rundb or not has_runcl or not has_runsq or not has_config: if not has_rundb or not has_runcl or not has_config: execute(compile_binary,fmt,e)
def compile_binary(fmt, e): ecfgs = get_cfgs(fmt, e) cfgs = dict(configs) for c in dict(ecfgs): if c not in CONFIG_PARAMS and c in FLAG: del ecfgs[c] cfgs.update(ecfgs) # if env.remote and not env.same_node: if env.cluster == "ec2": cfgs["ENVIRONMENT_EC2"] = "true" else: cfgs["ENVIRONMENT_EC2"] = "false" if env.cluster == "istc": cfgs["CORE_CNT"] = 64 else: cfgs["CORE_CNT"] = 8 if env.remote: cfgs["TPORT_TYPE"] = "TCP" if env.shmem: cfgs["SHMEM_ENV"] = "true" else: cfgs["SHMEM_ENV"] = "false" execute(write_config, cfgs) execute(compile) # output_f = get_outfile_name(cfgs,fmt,env.hosts) output_f = get_execfile_name(cfgs, fmt, env.hosts) local("cp rundb binaries/{}rundb".format(output_f)) local("cp runcl binaries/{}runcl".format(output_f)) # local("cp runsq binaries/{}runsq".format(output_f)) local("cp config.h binaries/{}cfg".format(output_f)) if EXECUTE_EXPS: cmd = "mkdir -p {}".format(env.result_dir) local(cmd) set_hosts() #???? execute(copy_binaries, output_f)
def compile_binary(fmt,e): ecfgs = get_cfgs(fmt,e) cfgs = dict(configs) for c in dict(ecfgs): if c not in CONFIG_PARAMS and c in FLAG: del ecfgs[c] cfgs.update(ecfgs) # if env.remote and not env.same_node: if env.cluster == "ec2": cfgs["ENVIRONMENT_EC2"]="true" else: cfgs["ENVIRONMENT_EC2"]="false" if env.cluster == "istc": cfgs["CORE_CNT"]=64 else: cfgs["CORE_CNT"]=8 if env.remote: cfgs["TPORT_TYPE"]="TCP" if env.shmem: cfgs["SHMEM_ENV"]="true" else: cfgs["SHMEM_ENV"]="false" execute(write_config,cfgs) execute(compile) # output_f = get_outfile_name(cfgs,fmt,env.hosts) output_f = get_execfile_name(cfgs,fmt,env.hosts) local("cp rundb binaries/{}rundb".format(output_f)) local("cp runcl binaries/{}runcl".format(output_f)) # local("cp runsq binaries/{}runsq".format(output_f)) local("cp config.h binaries/{}cfg".format(output_f)) if EXECUTE_EXPS: cmd = "mkdir -p {}".format(env.result_dir) local(cmd) set_hosts() #???? execute(copy_binaries,output_f)
def run_exp_old(exps, network_test=False, delay=''): if env.shmem: schema_path = "/dev/shm/" else: schema_path = "{}/".format(env.rem_homedir) good_hosts = [] if not network_test and EXECUTE_EXPS: good_hosts = get_good_hosts() with color(): puts("good host list =\n{}".format( pprint.pformat(good_hosts, depth=3)), show_prefix=True) execute(copy_schema) fmt, experiments = experiment_map[exps]() batch_size = 0 nids = {} outfiles = {} exps = {} runfiles = {} for e in experiments: print(e) cfgs = get_cfgs(fmt, e) output_fbase = get_outfile_name(cfgs, fmt, env.hosts) output_exec_fname = get_execfile_name(cfgs, fmt, env.hosts) output_f = output_fbase + STRNOW last_exp = experiments.index(e) == len(experiments) - 1 skip_exp = False # Check whether experiment has been already been run in this batch if SKIP: if len(glob.glob('{}*{}*.out'.format(env.result_dir, output_fbase))) > 0: with color("warn"): puts("experiment exists in results folder... skipping", show_prefix=True) if last_exp: skip_exp = True else: continue global CC_ALG CC_ALG = cfgs["CC_ALG"] if EXECUTE_EXPS: cfg_srcpath = "{}cfg".format( os.path.join("binaries", output_exec_fname)) cfg_destpath = "{}.cfg".format( os.path.join(env.result_dir, output_exec_fname + STRNOW)) local("cp {} {}".format(cfg_srcpath, cfg_destpath)) nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] try: ntotal = nnodes + nclnodes except TypeError: nclnodes = cfgs[cfgs["CLIENT_NODE_CNT"]] ntotal = nnodes + nclnodes # if CC_ALG == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes, nclnodes) if env.cram: ntotal = max(max(nnodes, nclnodes) / 8, 1) if env.remote: if not network_test: set_hosts(good_hosts) # if ntotal > len(env.hosts): # msg = "Not enough nodes to run experiment!\n" # msg += "\tRequired nodes: {}, ".format(ntotal) # msg += "Actual nodes: {}".format(len(env.hosts)) # with color(): # puts(msg,show_prefix=True) # cmd = "rm -f config.h {}".format(cfg_destpath) # local(cmd) # continue if not skip_exp: if env.batch_mode: # If full, execute all exps in batch and reset everything full = (batch_size + ntotal) > len(env.hosts) if full: if env.cluster != 'istc' and not env.dry_run: # Sync clocks before each experiment execute(sync_clocks) with color(): puts("Batch is full, deploying batch...{}/{}". format(batch_size, len(good_hosts)), show_prefix=True) with color("debug"): puts(pprint.pformat(outfiles, depth=3), show_prefix=False) set_hosts(env.hosts[:batch_size]) with color(): puts("Starttime: {}".format( datetime.datetime.now().strftime( "%H:%M:%S")), show_prefix=True) execute(deploy, schema_path, nids, exps, runfiles, fmt) with color(): puts("Endtime: {}".format( datetime.datetime.now().strftime( "%H:%M:%S")), show_prefix=True) execute(get_results, outfiles, nids) if not env.dry_run: good_hosts = get_good_hosts() env.roledefs = None batch_size = 0 nids = {} exps = {} runfiles = {} outfiles = {} set_hosts(good_hosts) else: with color(): puts("Adding experiment to current batch: {}". format(output_f), show_prefix=True) machines = env.hosts[batch_size:batch_size + ntotal] batch_size += ntotal else: machines = env.hosts[:ntotal] set_hosts(machines) new_roles = execute(assign_roles, nnodes, nclnodes, append=env.batch_mode)[env.host] new_nids, new_exps, new_runfiles = execute( write_ifconfig, new_roles, e, output_exec_fname)[env.host] nids.update(new_nids) exps.update(new_exps) runfiles.update(new_runfiles) for host, nid in new_nids.iteritems(): outfiles[host] = "{}.out".format(output_f) # if env.same_node: # outfiles[host] = "{}.out".format(output_f) # else: # outfiles[host] = "{}_{}.out".format(nid[0],output_f) print(nids) if cfgs["WORKLOAD"] == "TPCC": schema = "benchmarks/TPCC_full_schema.txt" # schema = "benchmarks/TPCC_short_schema.txt" elif cfgs["WORKLOAD"] == "YCSB": schema = "benchmarks/YCSB_schema.txt" elif cfgs["WORKLOAD"] == "PPS": schema = "benchmarks/PPS_schema.txt" # NOTE: copy_files will fail if any (possibly) stray processes # are still running one of the executables. Setting the 'kill' # flag in environment.py to true to kill these processes. This # is useful for running real experiments but dangerous when both # of us are debugging... # execute(copy_files,schema,output_exec_fname) execute(copy_ifconfig) if not env.batch_mode or last_exp and len(exps) > 0: if env.batch_mode: set_hosts(good_hosts[:batch_size]) puts("Deploying last batch...{}/{}".format( batch_size, len(good_hosts)), show_prefix=True) else: print("Deploying: {}".format(output_f)) if env.cluster != 'istc': # Sync clocks before each experiment print("Syncing Clocks...") execute(sync_clocks) if delay != '': execute(set_delay, delay=delay) with color(): puts("Starttime: {}".format( datetime.datetime.now().strftime("%H:%M:%S")), show_prefix=True) execute(deploy, schema_path, nids, exps, runfiles, fmt) with color(): puts("Endtime: {}".format( datetime.datetime.now().strftime("%H:%M:%S")), show_prefix=True) if delay != '': execute(reset_delay) execute(get_results, outfiles, nids) if not env.dry_run: good_hosts = get_good_hosts() set_hosts(good_hosts) batch_size = 0 nids = {} exps = {} outfiles = {} env.roledefs = None else: pids = [] print("Deploying: {}".format(output_f)) for n in range(ntotal): if n < nnodes: cmd = "./rundb -nid{}".format(n) elif n < nnodes + nclnodes: cmd = "./runcl -nid{}".format(n) # elif n == nnodes+nclnodes: # assert(CC_ALG == 'CALVIN') # cmd = "./runsq -nid{}".format(n) else: assert (false) print(cmd) cmd = shlex.split(cmd) ofile_n = "{}{}_{}.out".format(env.result_dir, n, output_f) ofile = open(ofile_n, 'w') p = subprocess.Popen(cmd, stdout=ofile, stderr=ofile) pids.insert(0, p) for n in range(ntotal): pids[n].wait()
def run_exp(exps, network_test=False, delay=''): if env.shmem: schema_path = "/dev/shm/" else: schema_path = "{}/".format(env.rem_homedir) good_hosts = [] if not network_test and EXECUTE_EXPS: good_hosts = get_good_hosts() with color(): puts("good host list =\n{}".format( pprint.pformat(good_hosts, depth=3)), show_prefix=True) fmt, experiments = experiment_map[exps]() batch_size = 0 nids = {} outfiles = {} exps = {} if SKIP: for e in experiments[:]: cfgs = get_cfgs(fmt, e) output_fbase = get_outfile_name(cfgs, fmt, env.hosts) if len(glob.glob('{}*{}*.out'.format(env.result_dir, output_fbase))) > 0: with color("warn"): puts("experiment exists in results folder... skipping", show_prefix=True) experiments.remove(e) experiments.sort(key=lambda x: x[fmt.index("NODE_CNT")] + x[fmt.index( "CLIENT_NODE_CNT")], reverse=True) # Fill experiment pool while len(experiments) > 0: round_exps = [] batch_total = 0 for e in experiments[:]: cfgs = get_cfgs(fmt, e) nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] ccalg = cfgs["CC_ALG"] ntotal = cfgs["NODE_CNT"] + cfgs["CLIENT_NODE_CNT"] # if ccalg == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes, nclnodes) if env.cram: ntotal = max(max(nnodes, nclnodes) / 8, 1) if ntotal > len(env.hosts): msg = "Not enough nodes to run experiment!\n" msg += "\tRequired nodes: {}, ".format(ntotal) msg += "Actual nodes: {}".format(len(env.hosts)) with color(): puts(msg, show_prefix=True) experiments.remove(e) continue if (batch_total + ntotal) > len(env.hosts): continue batch_total += ntotal round_exps.append(e) experiments.remove(e) if not EXECUTE_EXPS: continue batch_size = 0 for e in round_exps: set_hosts(good_hosts) cfgs = get_cfgs(fmt, e) global CC_ALG nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] CC_ALG = cfgs["CC_ALG"] ntotal = cfgs["NODE_CNT"] + cfgs["CLIENT_NODE_CNT"] # if ccalg == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes, nclnodes) if env.cram: ntotal = max(max(nnodes, nclnodes) / 8, 1) output_fbase = get_outfile_name(cfgs, fmt, env.hosts) output_exec_fname = get_execfile_name(cfgs, fmt, env.hosts) output_f = output_fbase + STRNOW cfg_srcpath = "{}cfg".format( os.path.join("binaries", output_exec_fname)) cfg_destpath = "{}.cfg".format( os.path.join(env.result_dir, output_exec_fname + STRNOW)) local("cp {} {}".format(cfg_srcpath, cfg_destpath)) with color(): puts("Adding experiment to current batch: {}".format(output_f), show_prefix=True) machines = env.hosts[batch_size:batch_size + ntotal] batch_size += ntotal set_hosts(machines) new_roles = execute(assign_roles, nnodes, nclnodes, append=env.batch_mode)[env.host] new_nids, new_exps = execute(write_ifconfig, new_roles, e)[env.host] nids.update(new_nids) exps.update(new_exps) for host, nid in new_nids.iteritems(): outfiles[host] = "{}.out".format(output_f) if cfgs["WORKLOAD"] == "TPCC": schema = "benchmarks/TPCC_full_schema.txt" # schema = "benchmarks/TPCC_short_schema.txt" elif cfgs["WORKLOAD"] == "YCSB": schema = "benchmarks/YCSB_schema.txt" elif cfgs["WORKLOAD"] == "PPS": schema = "benchmarks/PPS_schema.txt" # NOTE: copy_files will fail if any (possibly) stray processes # are still running one of the executables. Setting the 'kill' # flag in environment.py to true to kill these processes. This # is useful for running real experiments but dangerous when both # of us are debugging... # execute(copy_files,schema,output_exec_fname) execute(copy_ifconfig) if env.remote: set_hosts(good_hosts[:batch_size]) if env.cluster != 'istc' and not env.dry_run: # Sync clocks before each experiment execute(sync_clocks) with color(): puts("Batch is full, deploying batch...{}/{}".format( batch_size, len(good_hosts)), show_prefix=True) with color("debug"): puts(pprint.pformat(outfiles, depth=3), show_prefix=False) with color(): puts("Starttime: {}".format( datetime.datetime.now().strftime("%H:%M:%S")), show_prefix=True) execute(deploy, schema_path, nids, exps, runfiles, fmt) with color(): puts("Endtime: {}".format( datetime.datetime.now().strftime("%H:%M:%S")), show_prefix=True) execute(get_results, outfiles, nids) good_hosts = get_good_hosts() batch_size = 0 nids = {} exps = {} outfiles = {} set_hosts(good_hosts) env.roledefs = None
# summary = {} # summary_client = {} fmt,experiments = experiment_map[exp]() for e in experiments: s = {} s2 = {} timestamp = 0 # if "HSTORE" in e or "HSTORE_SPEC" in e: # nfmt=fmt # ne=e # else: # nfmt=fmt[:-1] # ne=e[:-1] # cfgs = get_cfgs(nfmt,ne) cfgs = get_cfgs(fmt,e) output_f = get_outfile_name(cfgs,fmt,["*","*"]) # output_f = get_outfile_name(cfgs,nfmt,["*","*"]) nnodes = cfgs["NODE_CNT"] nclients = cfgs["CLIENT_NODE_CNT"] try: ntotal = nnodes + nclients except TypeError: nclients = cfgs[cfgs["CLIENT_NODE_CNT"]] ntotal = nnodes + nclients cc = cfgs["CC_ALG"] is_network_test = cfgs["NETWORK_TEST"] == "true" if is_network_test: r = {} r2 = {}
def run_exp_old(exps,network_test=False,delay=''): if env.shmem: schema_path = "/dev/shm/" else: schema_path = "{}/".format(env.rem_homedir) good_hosts = [] if not network_test and EXECUTE_EXPS: good_hosts = get_good_hosts() with color(): puts("good host list =\n{}".format(pprint.pformat(good_hosts,depth=3)),show_prefix=True) execute(copy_schema) fmt,experiments = experiment_map[exps]() batch_size = 0 nids = {} outfiles = {} exps = {} runfiles = {} for e in experiments: print(e) cfgs = get_cfgs(fmt,e) output_fbase = get_outfile_name(cfgs,fmt,env.hosts) output_exec_fname = get_execfile_name(cfgs,fmt,env.hosts) output_f = output_fbase + STRNOW last_exp = experiments.index(e) == len(experiments) - 1 skip_exp = False # Check whether experiment has been already been run in this batch if SKIP: if len(glob.glob('{}*{}*.out'.format(env.result_dir,output_fbase))) > 0: with color("warn"): puts("experiment exists in results folder... skipping",show_prefix=True) if last_exp: skip_exp = True else: continue global CC_ALG CC_ALG = cfgs["CC_ALG"] if EXECUTE_EXPS: cfg_srcpath = "{}cfg".format(os.path.join("binaries",output_exec_fname)) cfg_destpath = "{}.cfg".format(os.path.join(env.result_dir,output_exec_fname+STRNOW)) local("cp {} {}".format(cfg_srcpath,cfg_destpath)) nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] try: ntotal = nnodes + nclnodes except TypeError: nclnodes = cfgs[cfgs["CLIENT_NODE_CNT"]] ntotal = nnodes + nclnodes # if CC_ALG == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes,nclnodes) if env.cram: ntotal = max(max(nnodes,nclnodes)/8,1) if env.remote: if not network_test: set_hosts(good_hosts) # if ntotal > len(env.hosts): # msg = "Not enough nodes to run experiment!\n" # msg += "\tRequired nodes: {}, ".format(ntotal) # msg += "Actual nodes: {}".format(len(env.hosts)) # with color(): # puts(msg,show_prefix=True) # cmd = "rm -f config.h {}".format(cfg_destpath) # local(cmd) # continue if not skip_exp: if env.batch_mode: # If full, execute all exps in batch and reset everything full = (batch_size + ntotal) > len(env.hosts) if full: if env.cluster != 'istc' and not env.dry_run: # Sync clocks before each experiment execute(sync_clocks) with color(): puts("Batch is full, deploying batch...{}/{}".format(batch_size,len(good_hosts)),show_prefix=True) with color("debug"): puts(pprint.pformat(outfiles,depth=3),show_prefix=False) set_hosts(env.hosts[:batch_size]) with color(): puts("Starttime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) execute(deploy,schema_path,nids,exps,runfiles,fmt) with color(): puts("Endtime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) execute(get_results,outfiles,nids) if not env.dry_run: good_hosts = get_good_hosts() env.roledefs = None batch_size = 0 nids = {} exps = {} runfiles = {} outfiles = {} set_hosts(good_hosts) else: with color(): puts("Adding experiment to current batch: {}".format(output_f), show_prefix=True) machines = env.hosts[batch_size : batch_size + ntotal] batch_size += ntotal else: machines = env.hosts[:ntotal] set_hosts(machines) new_roles=execute(assign_roles,nnodes,nclnodes,append=env.batch_mode)[env.host] new_nids,new_exps,new_runfiles = execute(write_ifconfig,new_roles,e,output_exec_fname)[env.host] nids.update(new_nids) exps.update(new_exps) runfiles.update(new_runfiles) for host,nid in new_nids.iteritems(): outfiles[host] = "{}.out".format(output_f) # if env.same_node: # outfiles[host] = "{}.out".format(output_f) # else: # outfiles[host] = "{}_{}.out".format(nid[0],output_f) print(nids) if cfgs["WORKLOAD"] == "TPCC": schema = "benchmarks/TPCC_full_schema.txt" # schema = "benchmarks/TPCC_short_schema.txt" elif cfgs["WORKLOAD"] == "YCSB": schema = "benchmarks/YCSB_schema.txt" elif cfgs["WORKLOAD"] == "PPS": schema = "benchmarks/PPS_schema.txt" # NOTE: copy_files will fail if any (possibly) stray processes # are still running one of the executables. Setting the 'kill' # flag in environment.py to true to kill these processes. This # is useful for running real experiments but dangerous when both # of us are debugging... # execute(copy_files,schema,output_exec_fname) execute(copy_ifconfig) if not env.batch_mode or last_exp and len(exps) > 0: if env.batch_mode: set_hosts(good_hosts[:batch_size]) puts("Deploying last batch...{}/{}".format(batch_size,len(good_hosts)),show_prefix=True) else: print("Deploying: {}".format(output_f)) if env.cluster != 'istc': # Sync clocks before each experiment print("Syncing Clocks...") execute(sync_clocks) if delay != '': execute(set_delay,delay=delay) with color(): puts("Starttime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) execute(deploy,schema_path,nids,exps,runfiles,fmt) with color(): puts("Endtime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) if delay != '': execute(reset_delay) execute(get_results,outfiles,nids) if not env.dry_run: good_hosts = get_good_hosts() set_hosts(good_hosts) batch_size = 0 nids = {} exps = {} outfiles = {} env.roledefs = None else: pids = [] print("Deploying: {}".format(output_f)) for n in range(ntotal): if n < nnodes: cmd = "./rundb -nid{}".format(n) elif n < nnodes+nclnodes: cmd = "./runcl -nid{}".format(n) # elif n == nnodes+nclnodes: # assert(CC_ALG == 'CALVIN') # cmd = "./runsq -nid{}".format(n) else: assert(false) print(cmd) cmd = shlex.split(cmd) ofile_n = "{}{}_{}.out".format(env.result_dir,n,output_f) ofile = open(ofile_n,'w') p = subprocess.Popen(cmd,stdout=ofile,stderr=ofile) pids.insert(0,p) for n in range(ntotal): pids[n].wait()
def run_exp(exps,network_test=False,delay=''): if env.shmem: schema_path = "/dev/shm/" else: schema_path = "{}/".format(env.rem_homedir) good_hosts = [] if not network_test and EXECUTE_EXPS: good_hosts = get_good_hosts() with color(): puts("good host list =\n{}".format(pprint.pformat(good_hosts,depth=3)),show_prefix=True) fmt,experiments = experiment_map[exps]() batch_size = 0 nids = {} outfiles = {} exps = {} if SKIP: for e in experiments[:]: cfgs = get_cfgs(fmt,e) output_fbase = get_outfile_name(cfgs,fmt,env.hosts) if len(glob.glob('{}*{}*.out'.format(env.result_dir,output_fbase))) > 0: with color("warn"): puts("experiment exists in results folder... skipping",show_prefix=True) experiments.remove(e) experiments.sort(key=lambda x: x[fmt.index("NODE_CNT")] + x[fmt.index("CLIENT_NODE_CNT")],reverse=True) # Fill experiment pool while len(experiments) > 0 : round_exps = [] batch_total = 0 for e in experiments[:]: cfgs = get_cfgs(fmt,e) nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] ccalg = cfgs["CC_ALG"] ntotal = cfgs["NODE_CNT"] + cfgs["CLIENT_NODE_CNT"] # if ccalg == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes,nclnodes) if env.cram: ntotal = max(max(nnodes,nclnodes)/8,1) if ntotal > len(env.hosts): msg = "Not enough nodes to run experiment!\n" msg += "\tRequired nodes: {}, ".format(ntotal) msg += "Actual nodes: {}".format(len(env.hosts)) with color(): puts(msg,show_prefix=True) experiments.remove(e) continue if (batch_total + ntotal) > len(env.hosts): continue batch_total += ntotal round_exps.append(e) experiments.remove(e) if not EXECUTE_EXPS: continue batch_size = 0 for e in round_exps: set_hosts(good_hosts) cfgs = get_cfgs(fmt,e) global CC_ALG nnodes = cfgs["NODE_CNT"] nclnodes = cfgs["CLIENT_NODE_CNT"] CC_ALG = cfgs["CC_ALG"] ntotal = cfgs["NODE_CNT"] + cfgs["CLIENT_NODE_CNT"] # if ccalg == 'CALVIN': # ntotal += 1 if env.same_node: ntotal = 1 if env.overlap: ntotal = max(nnodes,nclnodes) if env.cram: ntotal = max(max(nnodes,nclnodes)/8,1) output_fbase = get_outfile_name(cfgs,fmt,env.hosts) output_exec_fname = get_execfile_name(cfgs,fmt,env.hosts) output_f = output_fbase + STRNOW cfg_srcpath = "{}cfg".format(os.path.join("binaries",output_exec_fname)) cfg_destpath = "{}.cfg".format(os.path.join(env.result_dir,output_exec_fname+STRNOW)) local("cp {} {}".format(cfg_srcpath,cfg_destpath)) with color(): puts("Adding experiment to current batch: {}".format(output_f), show_prefix=True) machines = env.hosts[batch_size : batch_size + ntotal] batch_size += ntotal set_hosts(machines) new_roles=execute(assign_roles,nnodes,nclnodes,append=env.batch_mode)[env.host] new_nids,new_exps = execute(write_ifconfig,new_roles,e)[env.host] nids.update(new_nids) exps.update(new_exps) for host,nid in new_nids.iteritems(): outfiles[host] = "{}.out".format(output_f) if cfgs["WORKLOAD"] == "TPCC": schema = "benchmarks/TPCC_full_schema.txt" # schema = "benchmarks/TPCC_short_schema.txt" elif cfgs["WORKLOAD"] == "YCSB": schema = "benchmarks/YCSB_schema.txt" elif cfgs["WORKLOAD"] == "PPS": schema = "benchmarks/PPS_schema.txt" # NOTE: copy_files will fail if any (possibly) stray processes # are still running one of the executables. Setting the 'kill' # flag in environment.py to true to kill these processes. This # is useful for running real experiments but dangerous when both # of us are debugging... # execute(copy_files,schema,output_exec_fname) execute(copy_ifconfig) if env.remote: set_hosts(good_hosts[:batch_size]) if env.cluster != 'istc' and not env.dry_run: # Sync clocks before each experiment execute(sync_clocks) with color(): puts("Batch is full, deploying batch...{}/{}".format(batch_size,len(good_hosts)),show_prefix=True) with color("debug"): puts(pprint.pformat(outfiles,depth=3),show_prefix=False) with color(): puts("Starttime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) execute(deploy,schema_path,nids,exps,runfiles,fmt) with color(): puts("Endtime: {}".format(datetime.datetime.now().strftime("%H:%M:%S")),show_prefix=True) execute(get_results,outfiles,nids) good_hosts = get_good_hosts() batch_size = 0 nids = {} exps = {} outfiles = {} set_hosts(good_hosts) env.roledefs = None