def main(argv): scfg = maybe_forced_preset(presets.AllocProf) if argv: run_seq(scfg.copy(output_dir=FLAGS.save_dir), *parse_actions_from_cmd(argv)) return run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("alexnet", 25, 200), WTL.create("alexnet", 25, 200), WTL.create("alexnet", 25, 200), WTL.create("alexnet", 25, 200), WTL.create("alexnet", 25, 200), ) run_tf(FLAGS.save_dir, WTL.create("alexnet", 25, 200, executor=Executor.TF), Pause.Wait, WTL.create("alexnet", 25, 200, executor=Executor.TF), Pause.Wait, WTL.create("alexnet", 25, 200, executor=Executor.TF), Pause.Wait, WTL.create("alexnet", 25, 200, executor=Executor.TF), Pause.Wait, WTL.create("alexnet", 25, 200, executor=Executor.TF), )
def single(scfg): run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 165), Pause.Manual, WTL.create("resnet50", 50, 798), Pause.Manual, WTL.create("resnet152", 75, 19))
def main(argv): scfg = maybe_forced_preset(presets.OpTracing) name, bs = 'vgg11', 25 if len(argv) > 0: name = argv[0] if len(argv) > 1: bs = int(argv[1]) def create_wl(ex): return WTL.create(name, bs, 10, executor=ex) # Run on TF wl = create_wl(Executor.TF) wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '2' wl.env['TF_CPP_MIN_LOG_LEVEL'] = '' run_tf(FLAGS.save_dir / 'tf', wl) # Run on Salus wl = create_wl(Executor.Salus) run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '1'), wl) return # Run 2 on Salus run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '2'), create_wl(Executor.Salus), create_wl(Executor.Salus), )
def twoinfer(argv): # type: (Sequence[str]) -> None base_cfg = maybe_forced_preset(presets.MostEfficient) sm_factors = [float(v) for v in argv] if not sm_factors: sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0] for idx, factor in enumerate(sm_factors): scfg = base_cfg.copy(output_dir=FLAGS.save_dir / "twoinfer" / "salus" / f"{factor:.2f}") scfg.extra_args += [ '--sm-factor', f'{factor:.2f}' ] with tempfile.TemporaryDirectory() as td: # create the foreground inference job wl1, pipe1 = create_infer(Executor.Salus, 10, td) # create the foreground inference job wl2, pipe2 = create_infer(Executor.Salus, 10, td) run_seq(scfg, wl1, # start the first job wl2, # start the second job # wait for both jobs to be ready RunFn(lambda *args, **kwargs: wait_on_pipe(pipe1)), RunFn(lambda *args, **kwargs: wait_on_pipe(pipe2)), # start 1st job RunFn(lambda *args, **kwargs: release_on_pipe(pipe1)), # release 2nd job RunFn(lambda *args, **kwargs: release_on_pipe(pipe2)), # run_seq automatically join all jobs at the end of the sequence )
def main(argv): scfg = maybe_forced_preset(presets.OpTracing) name, bs, bn = 'vgg11', 25, 10 if len(argv) > 0: name = argv[0] if len(argv) > 1: bs = argv[1] bs = try_with_default(int, bs, ValueError)(bs) if len(argv) > 2: bn = int(argv[2]) def create_wl(ex): return WTL.create(name, bs, bn, executor=ex) # Run on Salus wl = create_wl(Executor.Salus) run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '1'), wl) if FLAGS.also_tf: wl = create_wl(Executor.TF) wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '1' wl.env['TF_CPP_MIN_LOG_LEVEL'] = '' run_tf(FLAGS.save_dir / "tf", wl) # filter and move file to a more convinent name for f in (FLAGS.save_dir / "tf").iterdir(): f.rename(f.with_name('perf.output')) break
def case1(): scfg = maybe_forced_preset(presets.MostEfficient) wls = [ 'vgg19', 'vgg16', 'vgg11', 'resnet50', 'resnet101', 'resnet152', 'overfeat', 'inception3', 'inception4', 'googlenet', 'alexnet', 'seq2seq', 'vae', 'superres', ] * 3 wls = [WTL.create(name + 'eval', 1 if name != 'seq2seq' else 'small', 500, executor=Executor.Salus) for name in wls] for wl in wls: wl.env['SALUS_TFBENCH_EVAL_INTERVAL'] = '10' wl.env['SALUS_TFBENCH_EVAL_RAND_FACTOR'] = '3' wl.env['SALUS_TFBENCH_EVAL_BLOCK'] = 'true' scfg.env['SALUS_DISABLE_LANEMGR'] = '1' scfg.logconf = 'log' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), *wls)
def case4(): scfg = maybe_forced_preset(presets.MostEfficient) for model in ['inception3eval', 'vgg19eval']: for i in [1, 2, 4, 8]: run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case4'/f'{model}-{i}'), *[WTL.create("inception3eval", 1, 1000) for _ in range(i)] )
def case1(): scfg = maybe_forced_preset(presets.MostEfficient) run_tf(FLAGS.save_dir/'case1', WTL.create("inception3eval", 1, 1000, executor=Executor.TF)) run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), WTL.create("inception3eval", 1, 1000))
def case2(): scfg = maybe_forced_preset(presets.MostEfficient) # BUG: seems we must run a single job first otherwise it will hang run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'), WTL.create("super_res", 128, 20, executor=Executor.TF), Pause.Wait, WTL.create("super_res", 128, 20))
def case2(): """Use OpTracing to see if each iteration is exclusive""" scfg = maybe_forced_preset(presets.OpTracing) scfg.logconf = 'memop' run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19))
def case2(): scfg = maybe_forced_preset(presets.AllocProf) scfg.env['SALUS_DISABLE_LANEMGR'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'), WTL.create("inception3", 100, 20), WTL.create("inception3", 100, 20) )
def test(): scfg = maybe_forced_preset(presets.MostEfficient) # BUG: seems we must run a single job first otherwise it will hang run_seq(scfg.copy(output_dir=FLAGS.save_dir/'test'), WTL.create("alexnet", 25, 20, executor=Executor.TF), Pause.Wait, WTL.create("alexnet", 25, 20))
def test(scfg): run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19), Pause.Wait, WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19))
def main(argv): scfg = maybe_forced_preset(presets.OpTracing) scfg.scheduler = 'pack' # Firstly run concurrently on salus run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"), WTL.create("resnet101", 50, 47), WTL.create("resnet101", 50, 47), )
def case1(argv): model, bs, bn = 'inception3', 50, 10 name = inspect.currentframe().f_code.co_name scfg = maybe_forced_preset(presets.AllocProf) scfg.scheduler = 'pack' wl = WTL.create(model, bs, bn) run_seq(scfg.copy(output_dir=FLAGS.save_dir/name), wl)
def plotrun(scfg): wls = [ ('inception3', 100, 5), ("resnet50", 50, 5), ("resnet152", 75, 5), ] for name, bs, bn in wls: run_seq(scfg.copy(output_dir=FLAGS.save_dir / "plotrun" / "{}_{}".format(name, bs)), WTL.create(name, bs, bn))
def case1(): scfg = maybe_forced_preset(presets.OpTracing) scfg.logconf = 'memop' # BUG: seems we must run a single job first otherwise it will hang run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), WTL.create("inception3", 100, 20), Pause.Wait, WTL.create("inception3", 100, 20), WTL.create("resnet50", 50, 20))
def case2(): scfg = maybe_forced_preset(presets.OpTracing) scfg.logconf = 'memop' # BUG: seems we must run a single job first otherwise it will hang run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'), WTL.create("alexnet", 25, 10), Pause.Wait, WTL.create("alexnet", 25, 50), WTL.create("alexnet", 25, 50))
def test(argv): model, bs, bn = 'vae', 64, 500 name = inspect.currentframe().f_code.co_name scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'pack' # create 300 vae wls = [WTL.create(model, bs, bn) for _ in range(1)] run_seq(scfg.copy(output_dir=FLAGS.save_dir/name), *wls)
def case2(): scfg = maybe_forced_preset(presets.MostEfficient) for rate in rates: wl = WTL.create("inception3eval", 1, 500, executor=Executor.Salus) wl.env['SALUS_TFBENCH_EVAL_INTERVAL'] = str(1 / rate) print("using interval " + str(1 / rate)) wl.env['SALUS_TFBENCH_EVAL_RAND_FACTOR'] = '1' wl.env['SALUS_TFBENCH_EVAL_BLOCK'] = 'false' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'/str(rate)), wl)
def test(): scfg = maybe_forced_preset(presets.Debugging) run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19), Pause.Wait, WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19))
def case2(): # Run on TF wl = WTL.create("super_res", 128, 20, executor=Executor.TF) wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '2' wl.env['TF_CPP_MIN_LOG_LEVEL'] = '' run_tf(FLAGS.save_dir/'case2'/'tf', wl) scfg = maybe_forced_preset(presets.OpTracing) scfg.logconf = 'memop' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'/'salus'), WTL.create("super_res", 128, 20))
def case3(): """With specially compiled salus, no restriction for how iteration runs, i.e. multiple iter can run together, to collect mem data and fragmentation """ scfg = maybe_forced_preset(presets.AllocProf) scfg.env['SALUS_DISABLE_LANEMGR'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'), WTL.create("inception3", 25, 20), WTL.create("inception3", 25, 20) )
def main(argv): scfg = maybe_forced_preset(presets.Profiling) if argv: run_seq(scfg.copy(output_dir=FLAGS.save_dir), *parse_actions_from_cmd(argv)) return run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("resnet50", 50, 265), WTL.create("resnet50", 50, 265), )
def case2(): """Inception3_100 is missing some dealloc log entry""" scfg = maybe_forced_preset(presets.Debugging) run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 10), Pause.Wait, WTL.create("inception3", 50, 10), Pause.Wait, WTL.create("inception3", 25, 10), Pause.Wait )
def do_measure(scfg, name, batch_sizes): batch_num = 100 # batch_sizes = [1, 2, 4, 8, 16, 32] # batch_sizes = [1024, 1536, 2048, 4096] for bs in batch_sizes: wl = WTL.create(name, bs, batch_num, executor=Executor.Salus) set_env(wl) run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"), wl) wl = WTL.create(name, bs, batch_num, executor=Executor.TF) set_env(wl) run_seq(scfg.copy(output_dir=FLAGS.save_dir / "tf"), wl)
def case3(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'fair' scfg.env['SALUS_DISABLE_LANEMGR'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), )
def case4(): scfg = maybe_forced_preset(presets.MostEfficient) # BUG: seems we must run a single job first otherwise it will hang run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case4'/'seq2'), WTL.create("inception3", 100, 100), Pause.Wait, WTL.create("resnet50", 50, 100)) run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'/'par2'), WTL.create("inception3", 100, 100), WTL.create("resnet50", 50, 100))
def case1(): """There are probablity that first 3 job can finish. But the second time they almost for sure deadlock""" scfg = maybe_forced_preset(presets.Debugging) run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19), Pause.Wait, Pause.Manual, WTL.create("inception3", 100, 165), WTL.create("resnet50", 50, 798), WTL.create("resnet152", 75, 19))
def case2(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'pack' scfg.logconf = 'log' scfg.env['SALUS_DISABLE_SHARED_LANE'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), )