예제 #1
0
def case3():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'fair'
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            )
예제 #2
0
def tfmps2(argv):
    # type: (Sequence[str]) -> None
    name = "alexneteval"
    if len(argv) > 1:
        name = argv[0]
    batch_sizes = [int(v) for v in argv[1:]]

    if not batch_sizes:
        batch_sizes = [1, 2, 4, 8]

    batch_num = 300
    # batch_sizes = [1, 2, 4, 8, 16, 32]
    # batch_sizes = [1024, 1536, 2048, 4096]
    for idx, bs in enumerate(batch_sizes):
        with tempfile.TemporaryDirectory() as td:
            # create a background training job, the batch number has no effect here,
            # only used to distinguish different runs
            trainWl = WTL.create('inception4', 50, 100 + idx, executor=Executor.TF)
            # make sure it runs long enough
            trainWl.env['SALUS_ITER_SECONDS'] = '300'
            trainWl.extra_args += ['--min_mem']

            # create a pipe to signal trainWl
            pipetrain = str(pathlib.Path(td).joinpath('fifotrain'))
            os.mkfifo(pipetrain)
            trainWl.env['SALUS_WAIT_FOR_SIGNAL'] = pipetrain

            # create the foreground inference job
            wl = WTL.create(name, bs, batch_num, executor=Executor.TF)
            set_env(wl)
            wl.env['SALUS_ITER_SECONDS'] = '150'
            wl.extra_args += ['--min_mem']

            pipe = str(pathlib.Path(td).joinpath('fifo'))
            os.mkfifo(pipe)
            wl.env['SALUS_WAIT_FOR_SIGNAL'] = pipe

            run_tf(FLAGS.save_dir / "tfmps2" / (name + "-inception4"),
                   wl,  # start the foreground job
                   Pause(20),
                   trainWl,  # start the background job
                   # wait for both jobs to be ready
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)),
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
                   # start train job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)),
                   # wait 10 seconds
                   Pause(10),
                   # release inference job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipe)),
                   # run_seq automatically join all jobs at the end of the sequence
                   )
예제 #3
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'
    scfg.logconf = 'log'
    scfg.env['SALUS_DISABLE_SHARED_LANE'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            )
예제 #4
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'rr'
    scfg.disable_wc = True
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3", 50, 145),
            Pause(15),
            WTL.create("inception3", 50, 75),
            Pause(15),
            WTL.create("inception3", 50, 35),
            )
예제 #5
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'fair'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            )
예제 #6
0
def tfmps(argv):
    # type: (Sequence[str]) -> None
    batch_sizes = [int(v) for v in argv[1:]]

    if not batch_sizes:
        batch_sizes = [1, 2, 4, 8]

    for idx, bs in enumerate(batch_sizes):
        with tempfile.TemporaryDirectory() as td:
            # create a background training job
            train_wl, pipetrain = create_train(Executor.TF, idx, td)
            train_wl.extra_args += ['--min_mem']

            # create the foreground inference job
            wl, pipe = create_infer(Executor.TF, bs, td)
            wl.extra_args += ['--min_mem']

            run_tf(FLAGS.save_dir / "tfmps",
                   train_wl,  # start the background job
                   wl,  # start the foreground job
                   # wait for both jobs to be ready
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)),
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
                   # start train job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)),
                   # wait 10 seconds
                   Pause(10),
                   # release inference job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipe)),
                   # run_seq automatically join all jobs at the end of the sequence
                   )
예제 #7
0
def salus(argv):
    # type: (Sequence[str]) -> None
    base_cfg = maybe_forced_preset(presets.MostEfficient)

    sm_factors = [float(v) for v in argv]
    if not sm_factors:
        sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0]

    for idx, factor in enumerate(sm_factors):
        scfg = base_cfg.copy(output_dir=FLAGS.save_dir / "salus" / f"{factor:.2f}")
        scfg.extra_args += [
            '--sm-factor', f'{factor:.2f}'
        ]
        with tempfile.TemporaryDirectory() as td:
            # create a background training job
            train_wl, pipetrain = create_train(Executor.Salus, 0, td)

            # create the foreground inference job
            wl, pipe = create_infer(Executor.Salus, 10, td)

            run_seq(scfg,
                    train_wl,  # start the background job
                    wl,  # start the foreground job
                    # wait for both jobs to be ready
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)),
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
                    # start train job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)),
                    # wait 10 seconds
                    Pause(10),
                    # release inference job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipe)),
                    # run_seq automatically join all jobs at the end of the sequence
                    )
예제 #8
0
def main(argv):
    scfg = maybe_forced_preset(presets.Profiling)
    scfg.scheduler = 'preempt'
    scfg.disable_wc = True

    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 25, 1298),
            Pause(60),
            WTL.create("alexnet", 100, 508),
            )

    if not FLAGS.with_ref:
        return

    # we also need reference data
    run_seq(presets.MostEfficient(output_dir=FLAGS.save_dir / 'reference'),
            WTL.create("alexnet", 100, 508),
            Pause.Wait,
            WTL.create("alexnet", 100, 508, executor=Executor.TF),
            )
예제 #9
0
def main(argv):
    scfg = maybe_forced_preset(presets.AllocProf)
    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("resnet50", 50, 265),
            Pause(10),
            WTL.create("googlenet", 100, 200),
            Pause(10),
            WTL.create("inception3", 25, 170),
            Pause(10),
            WTL.create("vgg16", 50, 50),
            Pause(10),
            WTL.create("overfeat", 100, 80),
            )
예제 #10
0
def salus(argv):
    # type: (Sequence[str]) -> None
    scfg = maybe_forced_preset(presets.MostEfficient)

    name = "alexneteval"
    if len(argv) > 1:
        name = argv[0]
    batch_sizes = [int(v) for v in argv[1:]]

    if not batch_sizes:
        batch_sizes = [1, 2, 4, 8]

    batch_num = 300
    # batch_sizes = [1, 2, 4, 8, 16, 32]
    # batch_sizes = [1024, 1536, 2048, 4096]
    for idx, bs in enumerate(batch_sizes):
        with tempfile.TemporaryDirectory() as td:
            # create a background training job
            train_wl, pipetrain = create_train(Executor.Salus, idx, td)

            # create the foreground inference job
            wl, pipe = create_infer(Executor.Salus, name, bs, batch_num, td)

            run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / (name + "-inception4")),
                    train_wl,  # start the background job
                    wl,  # start the foreground job
                    # wait for both jobs to be ready
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)),
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
                    # start train job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)),
                    # wait 10 seconds
                    Pause(10),
                    # release inference job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipe)),
                    # run_seq automatically join all jobs at the end of the sequence
                    )