Beispiel #1
0
def test_simulate():
    tmpdir = tempfile.mkdtemp(prefix='psweep_test_simulate_')
    params = [{'a': 1}, {'a': 2}, {'a': 3}, {'a': 4}]
    params_sim = [{'a': 88}, {'a': 99}]
    calc_dir = "{}/calc".format(tmpdir)
    calc_dir_sim = calc_dir + '.simulate'

    df = ps.run(func, params, calc_dir=calc_dir)
    df_sim = ps.run(func, params_sim, calc_dir=calc_dir, simulate=True)
    dbfn = "{}/results.pk".format(calc_dir)
    dbfn_sim = "{}/results.pk".format(calc_dir_sim)

    assert len(df_sim) == 6
    assert len(df) == 4
    assert os.path.exists(dbfn)
    assert os.path.exists(dbfn_sim)
    assert df.equals(ps.df_read(dbfn))
    assert df_sim.equals(ps.df_read(dbfn_sim))

    assert df.iloc[:4].equals(df_sim.iloc[:4])
    assert np.isnan(df_sim.result.values[-2:]).all()

    df2 = ps.run(func, params_sim, calc_dir=calc_dir)
    assert len(df2) == 6
    assert df.iloc[:4].equals(df2.iloc[:4])
    assert (df2.result.values[-2:] == np.array([880.0, 990.0])).all()

    shutil.rmtree(tmpdir)
Beispiel #2
0
def test_run():
    tmpdir = tempfile.mkdtemp(prefix='psweep_test_run_')
    params = [{'a': 1}, {'a': 2}, {'a': 3}, {'a': 4}]
    calc_dir = "{}/calc".format(tmpdir)

    # run two times, updating the database, the second time,
    # also write tmp results
    df = ps.run(func, params, calc_dir=calc_dir)
    assert len(df) == 4
    assert len(df._run_id.unique()) == 1
    assert len(df._pset_id.unique()) == 4
    df = ps.run(func, params, calc_dir=calc_dir, poolsize=2, tmpsave=True)
    assert len(df) == 8
    assert len(df._run_id.unique()) == 2
    assert len(df._pset_id.unique()) == 8
    assert set(df.columns) == \
        set(['_calc_dir', '_pset_id', '_run_id', '_time_utc', 'a', 'result'])

    dbfn = "{}/results.pk".format(calc_dir)
    assert os.path.exists(dbfn)
    assert df.equals(ps.df_read(dbfn))

    # tmp results of second run
    run_id = df._run_id.unique()[-1]
    for pset_id in df[df._run_id == run_id]._pset_id:
        tmpsave_fn = "{calc_dir}/tmpsave/{run_id}/{pset_id}.pk".format(
            calc_dir=calc_dir, run_id=run_id, pset_id=pset_id)
        assert os.path.exists(tmpsave_fn)
    shutil.rmtree(tmpdir)
Beispiel #3
0
def get_holomaps():
    # dict with jpeg byte strings of all images from the parameter study
    #   {pset_id: jpegstr}
    jpegstr_dct = common.pkread('img_dct_rgb.pk')

    # shape: assume all imgs have the same shape
    shape = common.jpegstr2imgarr(jpegstr_dct[list(
        jpegstr_dct.keys())[0]]).shape

    # the parameter sweep database (created by the psweep package)
    df = ps.df_read('results.pk')
    df = df[df.fail_state.isna()]

    vary_cols = [
        'style_weight', 'tv_weight', 'learning_rate', 'style_scales',
        'content_weight_blend', 'style_layer_weight_exp'
    ]

    holos = {}
    print("creating holomaps ...")
    for study in vary_cols:
        print("    " + study)
        this_df = df[df.study == study].sort_values(study)

        # {value of varied param (study): array shape (width, height, 3),...}
        imgs = dict((this_df.loc[this_df._pset_id == pset_id, study][0],
                     hv.RGB(common.jpegstr2imgarr(jpegstr_dct[pset_id])))
                    for pset_id in this_df._pset_id)

        holos[study] = hv.HoloMap(imgs, kdims=study)

    # holoviews settings for matplotlib
    hv.util.opts({
        'RGB': {
            'plot': {
                'fig_latex': False,
                'aspect': shape[1] / shape[0],
                'fig_size': 200,
                'xaxis': False,
                'yaxis': False
            }
        }
    })

    print("\nhang tight, we're rendering stuff ...")
    return holos
Beispiel #4
0
        savefig(fig, '{}_{}'.format(study, maxsize_str))


def savefig(fig, name):
    os.makedirs('pics', exist_ok=True)
    ##    for ext in ['pdf', 'png']:
    for ext in ['png']:
        fig.savefig("pics/{name}.{ext}".format(name=name, ext=ext), dpi=300)


if __name__ == '__main__':
    if len(sys.argv) > 1:
        results = sys.argv[1]
    else:
        results = 'calc/results.pk'
    dfall = ps.df_read(results)
    if 'share_leafs' in dfall.columns:
        dfall.share_leafs = dfall.share_leafs.astype(bool)
    for maxsize_str in np.unique(dfall.maxsize_str.values):
        df = dfall[dfall.maxsize_str == maxsize_str]
        if 'main_blocksize_single' in df.study.values:
            plot('main_blocksize_single',
                 df,
                 'blocksize',
                 'timing',
                 'filesize_str',
                 plot='semilogx')
        if 'main_filesize_single' in df.study.values:
            plot('main_filesize_single', df, 'filesize', 'timing',
                 'blocksize_str')
        if 'main_blocksize' in df.study.values:
Beispiel #5
0
from matplotlib import pyplot as plt
import numpy as np

from psweep import psweep as ps

import common


def nearest_idx(src, tgt):
    return [np.abs(src - x).argmin() for x in tgt]


if __name__ == '__main__':

    df = ps.df_read('results.pk')
    df = df[df.fail_state.isna()]

    # all possible parameters, with data limits
    vary_cols = dict(
        style_weight=[1, 30],
        tv_weight=None,
        learning_rate=None,
        style_scales=[None, 2],
        content_weight_blend=[None, 0.5],
        style_layer_weight_exp=None,
    )

    img_dct = {
        key: common.jpegstr2imgarr(val)
        for key, val in common.pkread('img_dct_rgb.pk').items()
Beispiel #6
0
    os.makedirs('pics', exist_ok=True)

    # anonymize dirs for plots which we publish
    anon_dirs = True

    # group commands which perform roughly the same thing together, the last
    # group's commands are unrelated
    cmd_groups = \
        [('s', r"^(findsame|jdupes -rQ|rdfind|duff -ra)$"),
         ('o', r"^(jdupes -r|duff -rat)$"),
         ('^', r"^(findsame -l 4K|jdupes -rTT)$"),
         ('*', r"^findsame.*(-t1|-l 512K).*$"),
         ]

    df = ps.df_read('calc/results.pk')

    letters = iter(string.ascii_uppercase)
    datadirs = [
        analyze.DataDir(pth, next(letters)) for pth in df.datadir.unique()
    ]
    datadirs.append(analyze.DataDir(os.environ['HOME'], 'HOME'))
    print(datadirs)

    # plot bench data, exclude HOME, for which we only calculate the histogram
    # later
    for datadir in datadirs[:-1]:
        fig, ax = plt.subplots()
        for cache, color in [('cold', 'tab:blue'), ('warm', 'tab:orange')]:
            this_df = df[(df.cache == cache) & (df.datadir == datadir.path)]
            cmds = list(map(filter_cmd, this_df.tool_cmd))
Beispiel #7
0
    Find all _run_ids
        $ {this} {db} | jq -r '.[]|._run_id' | sort -u
        02ca9694-696e-4fdd-ac08-8c343080bb63
        0a1fb364-8681-4178-869e-1126f3719da4
        97058ee2-2e81-426f-b674-04b7ec718c43

    Print a table of some columns
        $ {this} {db} | jq -r '.[]|[._time_utc,.study,._run_id]|@tsv' | column -t

    Show which _run_ids have which study
        $ {this} {db} | jq -r '.[]|[.study,._run_id]|@tsv' | uniq | column -t | sort -k1
        foo           02ca9694-696e-4fdd-ac08-8c343080bb63
        bar           0a1fb364-8681-4178-869e-1126f3719da4
        prod=foo:bar  97058ee2-2e81-426f-b674-04b7ec718c43

    Complex example: show the start time of each run
        $ {this} {db} > /tmp/json
        $ for x in $(jq -r '.[]|._run_id' /tmp/json | sort -u); do \
        ... echo $x $(jq -r "[.[]|select(._run_id==\"$x\")|._time_utc]|min" /tmp/json)
        ... done
        02ca9694-696e-4fdd-ac08-8c343080bb63 2018-09-03T00:00:24Z
        0a1fb364-8681-4178-869e-1126f3719da4 2018-09-02T22:23:47Z
        97058ee2-2e81-426f-b674-04b7ec718c43 2018-09-02T22:09:44Z
""".format(this=os.path.basename(__file__), db='results.pk')

if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    df = ps.df_read(args['<file>'])
    print(ps.df_to_json(df, orient=args['-o']))
Beispiel #8
0
def test_df_io():
    from pandas.util.testing import assert_frame_equal
    letters = string.ascii_letters
    ri = np.random.randint
    rn = np.random.rand
    # random string
    rs = lambda n: ''.join(letters[ii] for ii in ri(0, len(letters), n))

    for fmt in ['pickle', 'json']:
        df = pd.DataFrame()
        for _ in range(2):
            vals = [
                ri(0, 100),
                rs(5),
                np.nan,
                '"{}"'.format(rs(5)),
                "'{}'".format(rs(5)),
                (ri(0, 99), rn(), '{}'.format(rs(5))),
                [ri(0, 99), rn(), "{}".format(rs(5))],
                rn(),
                rn(5),
                rn(5, 5),
                list(rn(5)),
                {
                    'a': 1,
                    'b': 3,
                    'c': [1, 2, 3]
                },
            ]
            if fmt == 'pickle':
                vals += [
                    True,
                    False,
                    None,
                    set(ri(0, 99, 10)),
                ]
            row = pd.DataFrame([dict(zip(letters, vals))])
            df = df.append(row, ignore_index=True)

        if fmt == 'json':
            for orient in [
                    None, 'split', 'records', 'index', 'columns', '_default_'
            ]:
                print("orient: ", orient)
                fn = tempfile.mktemp(
                    prefix='psweep_test_df_io_{}_{}_'.format(fmt, orient))
                if orient != '_default_':
                    ps.df_write(df, fn, fmt=fmt, orient=orient)
                    read = ps.df_read(fn, fmt=fmt, orient=orient)
                else:
                    ps.df_write(df, fn, fmt=fmt)
                    read = ps.df_read(fn, fmt=fmt)
                os.remove(fn)
                assert_frame_equal(df,
                                   read,
                                   check_exact=False,
                                   check_less_precise=12)
        elif fmt == 'pickle':
            fn = tempfile.mktemp(prefix='psweep_test_df_io_{}_'.format(fmt))
            ps.df_write(df, fn, fmt=fmt)
            read = ps.df_read(fn, fmt=fmt)
            os.remove(fn)
            ##            assert_frame_equal(df, read, check_exact=True)
            assert_frame_equal(df, read)
        else:
            raise Exception("unknown fmt")
import os

import common

from psweep import psweep as ps

pj = os.path.join

if __name__ == '__main__':

    # jpeg compression quality (percent)
    quality = 40

    # all data, not part of this repo
    basedir = ps.fullpath('~/work/data/hackathon/calc')
    df = ps.df_read(f'{basedir}/results.pk')

    # new column, detect failed run
    df = df.reindex(columns=df.columns.tolist() + ['fail_state'])
    cases = [
        (r'std::bad_alloc', 'bad_alloc'),
        (r'Killed', 'killed'),
    ]

    img_dct_rgb = {}
    img_dct_gray = {}
    for ii, pset_id in enumerate(df._pset_id.values):
        with open(pj(basedir, pset_id, 'log')) as fd:
            txt = fd.read()
        go = True
        for regex, fail_state in cases: