Example #1
0
def main(argv):
    scfg = maybe_forced_preset(presets.AllocProf)
    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            )

    run_tf(FLAGS.save_dir,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           )
Example #2
0
def main(argv):
    scfg = maybe_forced_preset(presets.Profiling)
    scfg.scheduler = 'preempt'
    scfg.disable_wc = True

    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 25, 1298),
            Pause(60),
            WTL.create("alexnet", 100, 508),
            )

    if not FLAGS.with_ref:
        return

    # we also need reference data
    run_seq(presets.MostEfficient(output_dir=FLAGS.save_dir / 'reference'),
            WTL.create("alexnet", 100, 508),
            Pause.Wait,
            WTL.create("alexnet", 100, 508, executor=Executor.TF),
            )
Example #3
0
def single(scfg):
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 165),
            Pause.Manual,
            WTL.create("resnet50", 50, 798),
            Pause.Manual,
            WTL.create("resnet152", 75, 19))
Example #4
0
def test():
    scfg = maybe_forced_preset(presets.MostEfficient)

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'test'),
            WTL.create("alexnet", 25, 20, executor=Executor.TF),
            Pause.Wait,
            WTL.create("alexnet", 25, 20))
Example #5
0
def case2():
    """Use OpTracing to see if each iteration is exclusive"""
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 165),
            WTL.create("resnet50", 50, 798),
            WTL.create("resnet152", 75, 19))
Example #6
0
def case2():
    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3", 100, 20),
            WTL.create("inception3", 100, 20)
            )
Example #7
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)

    run_tf(FLAGS.save_dir/'case1',
           WTL.create("inception3eval", 1, 1000, executor=Executor.TF))

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3eval", 1, 1000))
Example #8
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("super_res", 128, 20, executor=Executor.TF),
            Pause.Wait,
            WTL.create("super_res", 128, 20))
Example #9
0
def main(argv):
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.scheduler = 'pack'

    # Firstly run concurrently on salus
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"),
            WTL.create("resnet101", 50, 47),
            WTL.create("resnet101", 50, 47),
            )
Example #10
0
def case2():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("alexnet", 25, 10),
            Pause.Wait,
            WTL.create("alexnet", 25, 50),
            WTL.create("alexnet", 25, 50))
Example #11
0
def case1():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3", 100, 20),
            Pause.Wait,
            WTL.create("inception3", 100, 20),
            WTL.create("resnet50", 50, 20))
Example #12
0
def case2():
    """Inception3_100 is missing some dealloc log entry"""
    scfg = maybe_forced_preset(presets.Debugging)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 10),
            Pause.Wait,
            WTL.create("inception3", 50, 10),
            Pause.Wait,
            WTL.create("inception3", 25, 10),
            Pause.Wait
            )
Example #13
0
def main(argv):
    scfg = maybe_forced_preset(presets.Profiling)
    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("resnet50", 50, 265),
            WTL.create("resnet50", 50, 265),
            )
Example #14
0
def case2():
    # Run on TF
    wl = WTL.create("super_res", 128, 20, executor=Executor.TF)
    wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '2'
    wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
    run_tf(FLAGS.save_dir/'case2'/'tf', wl)

    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'/'salus'),
            WTL.create("super_res", 128, 20))
Example #15
0
def case3():
    """With specially compiled salus, no restriction for how iteration runs, i.e. multiple iter can run
    together, to collect mem data and fragmentation
    """
    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'),
            WTL.create("inception3", 25, 20),
            WTL.create("inception3", 25, 20)
            )
Example #16
0
def case3():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'fair'
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            )
Example #17
0
def case4():
    scfg = maybe_forced_preset(presets.MostEfficient)

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case4'/'seq2'),
            WTL.create("inception3", 100, 100),
            Pause.Wait,
            WTL.create("resnet50", 50, 100))

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'/'par2'),
            WTL.create("inception3", 100, 100),
            WTL.create("resnet50", 50, 100))
Example #18
0
def do_measure(scfg, name, batch_sizes):
    batch_num = 100
    # batch_sizes = [1, 2, 4, 8, 16, 32]
    # batch_sizes = [1024, 1536, 2048, 4096]
    for bs in batch_sizes:
        wl = WTL.create(name, bs, batch_num, executor=Executor.Salus)
        set_env(wl)
        run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"), wl)

        wl = WTL.create(name, bs, batch_num, executor=Executor.TF)
        set_env(wl)
        run_seq(scfg.copy(output_dir=FLAGS.save_dir / "tf"), wl)
Example #19
0
def tfmps2(argv):
    # type: (Sequence[str]) -> None
    name = "alexneteval"
    if len(argv) > 1:
        name = argv[0]
    batch_sizes = [int(v) for v in argv[1:]]

    if not batch_sizes:
        batch_sizes = [1, 2, 4, 8]

    batch_num = 300
    # batch_sizes = [1, 2, 4, 8, 16, 32]
    # batch_sizes = [1024, 1536, 2048, 4096]
    for idx, bs in enumerate(batch_sizes):
        with tempfile.TemporaryDirectory() as td:
            # create a background training job, the batch number has no effect here,
            # only used to distinguish different runs
            trainWl = WTL.create('inception4', 50, 100 + idx, executor=Executor.TF)
            # make sure it runs long enough
            trainWl.env['SALUS_ITER_SECONDS'] = '300'
            trainWl.extra_args += ['--min_mem']

            # create a pipe to signal trainWl
            pipetrain = str(pathlib.Path(td).joinpath('fifotrain'))
            os.mkfifo(pipetrain)
            trainWl.env['SALUS_WAIT_FOR_SIGNAL'] = pipetrain

            # create the foreground inference job
            wl = WTL.create(name, bs, batch_num, executor=Executor.TF)
            set_env(wl)
            wl.env['SALUS_ITER_SECONDS'] = '150'
            wl.extra_args += ['--min_mem']

            pipe = str(pathlib.Path(td).joinpath('fifo'))
            os.mkfifo(pipe)
            wl.env['SALUS_WAIT_FOR_SIGNAL'] = pipe

            run_tf(FLAGS.save_dir / "tfmps2" / (name + "-inception4"),
                   wl,  # start the foreground job
                   Pause(20),
                   trainWl,  # start the background job
                   # wait for both jobs to be ready
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipetrain)),
                   RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
                   # start train job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipetrain)),
                   # wait 10 seconds
                   Pause(10),
                   # release inference job
                   RunFn(lambda *args, **kwargs: release_on_pipe(pipe)),
                   # run_seq automatically join all jobs at the end of the sequence
                   )
Example #20
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'
    scfg.logconf = 'log'
    scfg.env['SALUS_DISABLE_SHARED_LANE'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            Pause(10),
            WTL.create("inception3eval", 50, 250),
            )
Example #21
0
def case3(argv):
    model, bs, bn = 'resnet50', 50, 500
    name = inspect.currentframe().f_code.co_name

    # first run one along to get JCT
    run_tfdist(FLAGS.save_dir/name, WTL.create(model, bs, bn, executor=Executor.TFDist))

    # create 300 vae
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'

    wls = [WTL.create(model, bs, bn) for _ in range(300)]
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/name), *wls)
Example #22
0
def case1():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3", 100, 10),
            Pause.Wait,
            WTL.create("resnet50", 50, 10),
            Pause.Wait,
            WTL.create("inception3", 100, 20),
            # resnet50 seems to start earlier than inception3 and finishes too early, use more iters (40)
            WTL.create("resnet50", 50, 40))
Example #23
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'rr'
    scfg.disable_wc = True
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3", 50, 145),
            Pause(15),
            WTL.create("inception3", 50, 75),
            Pause(15),
            WTL.create("inception3", 50, 35),
            )
Example #24
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            WTL.create("inception3eval", 1, 1000),
            )
Example #25
0
def case2():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'
    scfg.env['TF_CPP_MIN_VLOG_LEVEL'] = '1'
    scfg.env['TF_CPP_MIN_LOG_LEVEL'] = ''
    scfg.save_outerr = True
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3", 100, 20),
            WTL.create("resnet50", 50, 20))
    # filter the TF allocator output
    f = FLAGS.save_dir/'case1'/'server.stderr'
    with f.with_name('tfalloc.output').open('w') as file:
        grep = execute(['egrep', r"] (\+|-)", f.name], stdout=file, cwd=str(f.parent))
        grep.wait()
Example #26
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'fair'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            Pause(5),
            WTL.create("inception3eval", 50, 250),
            )
Example #27
0
def case1():
    for rate in rates:
        wl = WTL.create("inception3eval", 1, 500, executor=Executor.TFDist)
        wl.env['SALUS_TFBENCH_EVAL_INTERVAL'] = str(1 / rate)
        wl.env['SALUS_TFBENCH_EVAL_RAND_FACTOR'] = '1'
        wl.env['SALUS_TFBENCH_EVAL_BLOCK'] = 'false'
        run_tfdist(FLAGS.save_dir/'case1'/str(rate), wl)
Example #28
0
def main(argv):
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'

    # Firstly run concurrently on salus
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"),
            WTL.create("resnet101", 50, 47),
            WTL.create("resnet101", 50, 47),
            )

    # Then run on tf
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "tf"),
            WTL.create("resnet101", 50, 47, executor=Executor.TF),
            Pause.Wait,
            WTL.create("resnet101", 50, 47, executor=Executor.TF),
            )
Example #29
0
def select_workloads(argv):
    # type: (Iterable[str]) -> Iterable[(str, TBatchSize)]
    """Select workloads based on commandline

        Example: alexnet,vgg11
    """
    if not argv:
        names = WTL.known_workloads.keys()
    else:
        names = unique((
            name
            for piece in argv
            for name in piece.split(',')
        ), stable=True)

    def getbs(name):
        if '_' in name:
            name, bs = name.split('_')
            bs = int(bs)
            return [(name, bs)]
        else:
            bss = WTL.from_name(name).available_batch_sizes()
            names = [name] * len(bss)
            return zip(names, bss)
    return [WTL.create(n, batch_size, 1, executor=Executor.TF)
            for name in names
            for n, batch_size in getbs(name)]
Example #30
0
def do_mem(logdir, network, batch_size):
    """Do basic JCT on workload"""
    batch_num = 20
    if network == "speech":
        batch_num = 5

    logger.info(f'Measuring memory for {network}_{batch_size} for {batch_num} iter')

    ex = "salus" if FLAGS.use_salus else "tf"
    final_dst = logdir / ex / WTL.from_name(network).canonical_name(RunConfig(batch_size, batch_num, None))
    with atomic_directory(final_dst) as outputdir:
        if not FLAGS.use_salus:
            logger.info('    Running on TF')
            wl = WTL.create(network, batch_size, batch_num, Executor.TF)
            wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '1'
            wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
            run_tf(outputdir, wl)
            # filter and move file to a more convinent name
            for f in pathlib.Path(outputdir).iterdir():
                with f.with_name('alloc.output').open('w') as file:
                    grep = execute(['egrep', r"] (\+|-)", f.name], stdout=file, cwd=str(f.parent))
                    grep.wait()
                f.unlink()
                break
        else:
            scfg = maybe_forced_preset(presets.AllocProf)
            scfg.logconf = "memop"
            scfg.output_dir = outputdir
            server = SalusServer(scfg)
            with server.run():
                logger.info('    Running on Salus')
                WTL.block_run(network, batch_size, batch_num, Executor.Salus, outputdir / 'rpc.output')

    return final_dst