def case3(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'fair' scfg.env['SALUS_DISABLE_LANEMGR'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), )
def tfmps2(argv): # type: (Sequence[str]) -> None name = "alexneteval" if len(argv) > 1: name = argv[0] batch_sizes = [int(v) for v in argv[1:]] if not batch_sizes: batch_sizes = [1, 2, 4, 8] batch_num = 300 # batch_sizes = [1, 2, 4, 8, 16, 32] # batch_sizes = [1024, 1536, 2048, 4096] for idx, bs in enumerate(batch_sizes): with tempfile.TemporaryDirectory() as td: # create a background training job, the batch number has no effect here, # only used to distinguish different runs trainWl = WTL.create('inception4', 50, 100 + idx, executor=Executor.TF) # make sure it runs long enough trainWl.env['SALUS_ITER_SECONDS'] = '300' trainWl.extra_args += ['--min_mem'] # create a pipe to signal trainWl pipetrain = str(pathlib.Path(td).joinpath('fifotrain')) os.mkfifo(pipetrain) trainWl.env['SALUS_WAIT_FOR_SIGNAL'] = pipetrain # create the foreground inference job wl = WTL.create(name, bs, batch_num, executor=Executor.TF) set_env(wl) wl.env['SALUS_ITER_SECONDS'] = '150' wl.extra_args += ['--min_mem'] pipe = str(pathlib.Path(td).joinpath('fifo')) os.mkfifo(pipe) wl.env['SALUS_WAIT_FOR_SIGNAL'] = pipe run_tf(FLAGS.save_dir / "tfmps2" / (name + "-inception4"), wl, # start the foreground job Pause(20), trainWl, # start the background job # wait for both jobs to be ready RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)), RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)), # start train job RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)), # wait 10 seconds Pause(10), # release inference job RunFn(lambda *args, **kwargs: release_on_pipe(pipe)), # run_seq automatically join all jobs at the end of the sequence )
def case2(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'pack' scfg.logconf = 'log' scfg.env['SALUS_DISABLE_SHARED_LANE'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), Pause(10), WTL.create("inception3eval", 50, 250), )
def case1(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'rr' scfg.disable_wc = True scfg.env['SALUS_DISABLE_LANEMGR'] = '1' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), WTL.create("inception3", 50, 145), Pause(15), WTL.create("inception3", 50, 75), Pause(15), WTL.create("inception3", 50, 35), )
def case1(): scfg = maybe_forced_preset(presets.MostEfficient) scfg.scheduler = 'fair' run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), WTL.create("inception3eval", 50, 250), Pause(5), WTL.create("inception3eval", 50, 250), Pause(5), WTL.create("inception3eval", 50, 250), Pause(5), WTL.create("inception3eval", 50, 250), Pause(5), WTL.create("inception3eval", 50, 250), )
def tfmps(argv): # type: (Sequence[str]) -> None batch_sizes = [int(v) for v in argv[1:]] if not batch_sizes: batch_sizes = [1, 2, 4, 8] for idx, bs in enumerate(batch_sizes): with tempfile.TemporaryDirectory() as td: # create a background training job train_wl, pipetrain = create_train(Executor.TF, idx, td) train_wl.extra_args += ['--min_mem'] # create the foreground inference job wl, pipe = create_infer(Executor.TF, bs, td) wl.extra_args += ['--min_mem'] run_tf(FLAGS.save_dir / "tfmps", train_wl, # start the background job wl, # start the foreground job # wait for both jobs to be ready RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)), RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)), # start train job RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)), # wait 10 seconds Pause(10), # release inference job RunFn(lambda *args, **kwargs: release_on_pipe(pipe)), # run_seq automatically join all jobs at the end of the sequence )
def salus(argv): # type: (Sequence[str]) -> None base_cfg = maybe_forced_preset(presets.MostEfficient) sm_factors = [float(v) for v in argv] if not sm_factors: sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0] for idx, factor in enumerate(sm_factors): scfg = base_cfg.copy(output_dir=FLAGS.save_dir / "salus" / f"{factor:.2f}") scfg.extra_args += [ '--sm-factor', f'{factor:.2f}' ] with tempfile.TemporaryDirectory() as td: # create a background training job train_wl, pipetrain = create_train(Executor.Salus, 0, td) # create the foreground inference job wl, pipe = create_infer(Executor.Salus, 10, td) run_seq(scfg, train_wl, # start the background job wl, # start the foreground job # wait for both jobs to be ready RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)), RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)), # start train job RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)), # wait 10 seconds Pause(10), # release inference job RunFn(lambda *args, **kwargs: release_on_pipe(pipe)), # run_seq automatically join all jobs at the end of the sequence )
def main(argv): scfg = maybe_forced_preset(presets.Profiling) scfg.scheduler = 'preempt' scfg.disable_wc = True if argv: run_seq(scfg.copy(output_dir=FLAGS.save_dir), *parse_actions_from_cmd(argv)) return run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("inception3", 25, 1298), Pause(60), WTL.create("alexnet", 100, 508), ) if not FLAGS.with_ref: return # we also need reference data run_seq(presets.MostEfficient(output_dir=FLAGS.save_dir / 'reference'), WTL.create("alexnet", 100, 508), Pause.Wait, WTL.create("alexnet", 100, 508, executor=Executor.TF), )
def main(argv): scfg = maybe_forced_preset(presets.AllocProf) if argv: run_seq(scfg.copy(output_dir=FLAGS.save_dir), *parse_actions_from_cmd(argv)) return run_seq(scfg.copy(output_dir=FLAGS.save_dir), WTL.create("resnet50", 50, 265), Pause(10), WTL.create("googlenet", 100, 200), Pause(10), WTL.create("inception3", 25, 170), Pause(10), WTL.create("vgg16", 50, 50), Pause(10), WTL.create("overfeat", 100, 80), )
def salus(argv): # type: (Sequence[str]) -> None scfg = maybe_forced_preset(presets.MostEfficient) name = "alexneteval" if len(argv) > 1: name = argv[0] batch_sizes = [int(v) for v in argv[1:]] if not batch_sizes: batch_sizes = [1, 2, 4, 8] batch_num = 300 # batch_sizes = [1, 2, 4, 8, 16, 32] # batch_sizes = [1024, 1536, 2048, 4096] for idx, bs in enumerate(batch_sizes): with tempfile.TemporaryDirectory() as td: # create a background training job train_wl, pipetrain = create_train(Executor.Salus, idx, td) # create the foreground inference job wl, pipe = create_infer(Executor.Salus, name, bs, batch_num, td) run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / (name + "-inception4")), train_wl, # start the background job wl, # start the foreground job # wait for both jobs to be ready RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)), RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)), # start train job RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)), # wait 10 seconds Pause(10), # release inference job RunFn(lambda *args, **kwargs: release_on_pipe(pipe)), # run_seq automatically join all jobs at the end of the sequence )