Beispiel #1
0
def explain_dir(dirname, keys):
    fname = op.join(dirname, 'soops-parameters.csv')
    df = pd.read_csv(fname, index_col='pkey')
    lmax = max(map(len, df.keys()))
    fmt = '{{}} {{:>{}s}}: {{}}'.format(lmax)
    for key, val in df.iloc[0].to_dict().items():
        output(fmt.format('*' if key in keys else ' ', key, val))
Beispiel #2
0
def build_pdf(filename):
    status = run_command('pdflatex -interaction=nonstopmode',
                         filename,
                         repeat=3,
                         silent=True)
    if status:
        output('build_pdf() failed with status {}!'.format(status))
Beispiel #3
0
def get_plot_style(indices, styles):
    style_kwargs = {}
    for key, key_styles in styles.items():
        for skey, style_vals in key_styles.items():
            if skey in style_kwargs:
                output('style key "{}" of "{}" already in use!'.format(
                    skey, key))

            if key in indices:
                style_kwargs[skey] = style_vals[indices[key] % len(style_vals)]

    return style_kwargs
Beispiel #4
0
def find_studies(options):
    output.prefix = 'find:'

    dfs = []
    for root_dir in options.directories:
        for fname in locate_files('soops-parameters.csv', root_dir=root_dir):
            if op.exists(fname):
                try:
                    df = pd.read_csv(fname, index_col='pkey')

                except pd.errors.EmptyDataError:
                    continue

                else:
                    dfs.append(df)

    if len(dfs):
        apdf = pd.concat(dfs)
        apdf = apdf.rename(columns=lambda x: x.lstrip('-').replace('-', '_'))
        apdf = apdf.sort_values('output_dir', ignore_index=True)

        if options.query is not None:
            sdf = apdf.query(options.query)

            for ii in range(len(sdf)):
                row = sdf.iloc[ii]
                output('result {} in {}:\n{}'.format(ii, row['output_dir'], row))

    if options.shell or (options.query is None):
        output('{} parameter sets stored in `apdf` DataFrame'.format(len(apdf)))
        output('column names:\n{}'.format(apdf.keys()))
        from soops.base import shell; shell()

    return apdf
Beispiel #5
0
def print_info(options):
    output.prefix = 'info:'

    run_mod = import_file(options.run_mod)
    if hasattr(run_mod, 'get_run_info'):
        (run_cmd, opt_args, output_dir_key,
         _is_finished) = run_mod.get_run_info()

    else:
        output('no get_run_info() in {}, exiting'.format(options.run_mod))
        return

    keys = collect_keys(run_cmd, opt_args,
                        omit=(output_dir_key, 'script_dir'))

    if options.explain is None:
        for ik, key in enumerate(keys):
            output('{:3d}: {}'.format(ik, key))

    else:
        for dirname in options.explain:
            output(dirname)
            explain_dir(dirname, keys)

    if options.shell:
        from soops.base import shell; shell()
Beispiel #6
0
def run_plugins(info, df, output_dir):
    if not len(info):
        return

    output('run plugins:')
    par_cols = get_parametric_columns(df)
    data = Struct(par_cols=par_cols, output_dir=output_dir)
    for fun in info:
        output('running {}()...'.format(fun.__name__))
        data = fun(df, data=data)
        output('...done')

    return data
Beispiel #7
0
def run_plugins(info,
                df,
                output_dir,
                par_keys,
                store_filename,
                plugin_args=None):
    if not len(info):
        return

    if plugin_args is None: plugin_args = {}

    used = {fun.__name__ for fun in info if fun.__name__ in plugin_args}
    unused = set(plugin_args.keys()).difference(used)
    if len(unused):
        output('WARNING: unused plugin arguments:', unused)

    def wrap_fun(fun):
        args = plugin_args.get(fun.__name__)
        if args is None:
            _fun = fun

        else:

            def _fun(df, data=None):
                return fun(df, data=data, **args)

        return _fun

    output('run plugins:')
    data = init_plugin_data(df, par_keys, output_dir, store_filename)
    for fun in info:
        output('running {}()...'.format(fun.__name__))
        wfun = wrap_fun(fun)
        _data = wfun(df, data=data)
        data = _data if _data is not None else data
        output('...done')

    return data
Beispiel #8
0
def apply_scoops(info, directories, debug_mode=False):
    if not len(info):
        return pd.DataFrame({}), pd.DataFrame({}), None

    data = []
    metadata = []
    par_keys = set()
    for idir, directory in enumerate(directories):
        output('directory {}: {}'.format(idir, directory))

        name0 = info[0][0]
        filenames = locate_files(name0, directory)
        home = op.expanduser('~')
        for ir, filename in enumerate(filenames):
            rdir = op.dirname(filename)
            output('results directory {}: {}'.format(ir, rdir))

            rdata = {'rdir': rdir.replace(home, '~'), 'rfiles': []}
            rmetadata = {}
            output('results files:')
            for item in info:
                if len(item) == 2:
                    filename, fun = item
                    has_parameters = False

                elif len(item) == 3:
                    filename, fun, has_parameters = item

                else:
                    raise ValueError('scoop info item has to have length'
                                     ' 2 or 3! ({})'.format(item))

                output(filename)
                path = op.join(rdir, filename)
                if not op.exists(path):
                    paths = list(locate_files(path))
                    output('expanded:',
                           [path.replace(rdir, '<rdir>') for path in paths])
                    if len(paths) == 0:
                        paths = None

                else:
                    paths = None

                try:
                    if paths is None:
                        out = fun(path, rdata=rdata)

                    else:
                        out = fun(paths, rdata=rdata)

                except KeyboardInterrupt:
                    raise

                except Exception as exc:
                    output('- failed with:')
                    output(exc)
                    if debug_mode: raise
                    continue

                else:
                    if out is None:
                        output('- nothing returned!')
                        out = {}

                    if paths is None:
                        paths = [path]

                    rdata['rfiles'].append(filename)
                    mtimes = []
                    for path in paths:
                        try:
                            mtime = datetime.fromtimestamp(op.getmtime(path))

                        except FileNotFoundError:
                            mtime = np.nan

                        mtimes.append(mtime)

                    rmetadata.update({
                        'data_row': len(data),
                        'data_columns': tuple(out.keys()),
                        'filename': path,
                        'filenames': paths,
                        'mtimes': mtimes,
                    })
                    rdata.update(out)
                    metadata.append(pd.Series(rmetadata))
                    if has_parameters:
                        par_keys.update(out.keys())

            rdata['time'] = datetime.utcnow()

            data.append(pd.Series(rdata))

    df = pd.DataFrame(data)
    mdf = pd.DataFrame(metadata)

    return df, mdf, par_keys
Beispiel #9
0
def scoop_outputs(options):
    output.prefix = ''

    scoop_mod = import_file(options.scoop_mod)

    if (not options.reuse or
            not (op.exists(options.results) and op.isfile(options.results))):
        new_results = True

        if hasattr(scoop_mod, 'get_scoop_info'):
            scoop_info = scoop_mod.get_scoop_info()

        else:
            output('no get_scoop_info() in {}, exiting'.format(
                options.scoop_mod))
            return

        df, mdf, par_keys = apply_scoops(scoop_info, options.directories,
                                         options.debug)

        if options.filter is not None:
            idf = [
                ii for ii, rfiles in df['rfiles'].items()
                if options.filter.intersection(rfiles)
            ]
            df = df.iloc[idf]
            df.index = np.arange(len(df))

            imdf = [
                ii for ii, data_row in mdf['data_row'].items()
                if data_row in idf
            ]
            mdf = mdf.iloc[imdf]
            mdf.index = np.arange(len(mdf))

    else:
        new_results = False
        with pd.HDFStore(options.results, mode='r') as store:
            df = store.get('df')
            mdf = store.get('mdf')
            par_keys = set(store.get('par_keys').to_list())
            std_keys = ('/df', '/mdf', '/par_keys')
            user_keys = set(store.keys()).difference(std_keys)
            output('user data:')
            output(user_keys)

    output('data keys:')
    output(df.keys())
    output('metadata keys:')
    output(mdf.keys())

    if options.sort:
        df = df.sort_values(options.sort)
        df.index = np.arange(len(df))

    warnings.simplefilter(action='ignore',
                          category=pd.errors.PerformanceWarning)

    results_filename = options.results
    ensure_path(results_filename)
    if new_results or options.write:
        with pd.HDFStore(results_filename, mode='w') as store:
            store.put('df', df)
            store.put('mdf', mdf)
            store.put('par_keys', pd.Series(list(par_keys)))

        if options.save_csv:
            filename = op.join(options.output_dir, 'results.csv')
            df.to_csv(filename)

        filename = op.join(options.output_dir, 'results-meta.csv')
        mdf.to_csv(filename)

    if options.call_plugins:
        if options.plugin_mod is not None:
            plugin_mod = import_file(options.plugin_mod)

        else:
            plugin_mod = scoop_mod

        if hasattr(plugin_mod, 'get_plugin_info'):
            plugin_info = plugin_mod.get_plugin_info()
            fun_names = [fun.__name__ for fun in plugin_info]
            output('available plugins:', fun_names)

            if options.use_plugins is not None:
                aux = []
                for fun_name in options.use_plugins:
                    try:
                        ii = fun_names.index(fun_name)

                    except ValueError:
                        raise ValueError(
                            'unknown plugin! ({})'.format(fun_name))

                    aux.append(plugin_info[ii])

                plugin_info = aux

            elif options.omit_plugins is not None:
                plugin_info = [
                    fun for fun in plugin_info
                    if fun.__name__ not in options.omit_plugins
                ]

            data = run_plugins(plugin_info,
                               df,
                               options.output_dir,
                               par_keys,
                               results_filename,
                               plugin_args=options.plugin_args)
            output('plugin data keys:')
            output(data.keys())

        else:
            output('no get_plugin_info() in {}'.format(plugin_mod.__name__))

    if options.shell:
        from soops.base import shell
        shell()
Beispiel #10
0
def apply_scoops(info, directories):
    if not len(info):
        return pd.DataFrame({}), pd.DataFrame({})

    data = []
    metadata = []
    for idir, directory in enumerate(directories):
        output('directory {}: {}'.format(idir, directory))

        name0 = info[0][0]
        filenames = locate_files(name0, directory)
        home = op.expanduser('~')
        for ir, filename in enumerate(filenames):
            rdir = op.dirname(filename)
            output('results directory {}: {}'.format(ir, rdir))

            rdata = {'rdir': rdir.replace(home, '~')}
            rmetadata = {}
            output('results files:')
            for filename, fun in info:
                output(filename)
                path = op.join(rdir, filename)
                try:
                    out = fun(path, rdata=rdata)

                except KeyboardInterrupt:
                    raise

                except Exception as exc:
                    output('- failed with:')
                    output(exc)
                    continue

                else:
                    try:
                        mtime = datetime.fromtimestamp(op.getmtime(path))

                    except FileNotFoundError:
                        mtime = np.nan

                    rmetadata.update({
                        'data_row': len(data),
                        'data_columns': tuple(out.keys()),
                        'filename': path,
                        'mtime': mtime,
                    })
                    rdata.update(out)
                    metadata.append(pd.Series(rmetadata))

            rdata['time'] = datetime.utcnow()

            data.append(pd.Series(rdata))

    df = pd.DataFrame(data)
    mdf = pd.DataFrame(metadata)
    return df, mdf
Beispiel #11
0
def scoop_outputs(options):
    output.prefix = ''

    scoop_mod = import_file(options.scoop_mod)

    if (options.results is None or
            not (op.exists(options.results) and op.isfile(options.results))):

        if hasattr(scoop_mod, 'get_scoop_info'):
            scoop_info = scoop_mod.get_scoop_info()

        else:
            output('no get_scoop_info() in {}, exiting'.format(
                options.scoop_mod))
            return

        df, mdf = apply_scoops(scoop_info, options.directories)

    else:
        df = pd.read_hdf(options.results, 'df')
        mdf = pd.read_hdf(options.results, 'mdf')

    output('data keys:')
    output(df.keys())
    output('metadata keys:')
    output(mdf.keys())

    if options.sort:
        df = df.sort_values(options.sort)
        df.index = np.arange(len(df))

    warnings.simplefilter(action='ignore',
                          category=pd.errors.PerformanceWarning)

    filename = op.join(options.output_dir, 'results.csv')
    ensure_path(filename)
    df.to_csv(filename)
    filename = op.join(options.output_dir, 'results-meta.csv')
    mdf.to_csv(filename)

    filename = op.join(options.output_dir, 'results.h5')
    store = pd.HDFStore(filename, mode='w')
    store.put('df', df)
    store.put('mdf', mdf)
    store.close()

    if options.call_plugins:
        if options.plugin_mod is not None:
            plugin_mod = import_file(options.plugin_mod)

        else:
            plugin_mod = scoop_mod

        if hasattr(plugin_mod, 'get_plugin_info'):
            plugin_info = plugin_mod.get_plugin_info()
            output('available plugins:', [fun.__name__ for fun in plugin_info])

            if options.use_plugins is not None:
                plugin_info = [
                    fun for fun in plugin_info
                    if fun.__name__ in options.use_plugins
                ]

            elif options.omit_plugins is not None:
                plugin_info = [
                    fun for fun in plugin_info
                    if fun.__name__ not in options.omit_plugins
                ]

            run_plugins(plugin_info, df, options.output_dir)

        else:
            output('no get_plugin_info() in {}'.format(plugin_mod.__name__))

    if options.shell:
        from soops.base import shell
        shell()
Beispiel #12
0
def run_parametric(options):
    output.prefix = 'run:'

    run_mod = import_file(options.run_mod)
    if hasattr(run_mod, 'get_run_info'):
        (run_cmd, opt_args, output_dir_key,
         _is_finished) = run_mod.get_run_info()

    else:
        output('no get_run_info() in {}, exiting'.format(options.run_mod))
        return

    if isinstance(_is_finished, str):
        is_finished = lambda x: op.exists(op.join(x, _is_finished))

    else:
        is_finished = _is_finished

    dconf = parse_as_dict(options.conf, free_word=True)
    key_order = sorted(dconf.keys())

    filename = op.join(options.output_dir, 'options.txt')
    ensure_path(filename)
    save_options(filename, [('options', vars(options))],
                 quote_command_line=True)

    output.set_output(filename=op.join(options.output_dir, 'output_log.txt'),
                      combined=options.verbose)

    recompute = options.recompute

    cluster = LocalCluster(n_workers=options.n_workers, threads_per_worker=1)
    client = Client(cluster)

    par_seqs = [make_key_list(key, dconf[key]) for key in key_order]

    count = 0
    for _all_pars in itertools.product(*par_seqs):
        if not check_contracted(_all_pars, options, key_order): continue
        count += 1

    output('number of parameter sets:', count)

    calls = []
    iset = 0
    for _all_pars in itertools.product(*par_seqs):
        if not check_contracted(_all_pars, options, key_order): continue
        output('parameter set:', iset)
        output(_all_pars)

        _it, keys, vals = zip(*_all_pars)
        all_pars = dict(zip(keys, vals))
        it = '_'.join('%d' % ii for ii in _it)

        podir = all_pars[output_dir_key] % it
        all_pars[output_dir_key] = podir

        all_pars['script_dir'] = op.normpath(op.dirname(options.run_mod))

        if (recompute > 1) or (recompute and not is_finished(podir)):
            cmd = make_cmd(run_cmd, opt_args, all_pars)
            output(cmd)

            call = client.submit(subprocess.call, cmd, shell=True, pure=False)
            call.iset = iset
            call.it = it
            call.all_pars = all_pars
            calls.append(call)

        else:
            call = client.submit(lambda: None)
            call.iset = iset
            call.it = it
            call.all_pars = all_pars
            calls.append(call)

        iset += 1

    for call in as_completed(calls):
        output(call.iset)
        output(call.it)
        output(call.all_pars)
        output(call, call.result())

    client.close()

    if options.shell:
        from soops.base import shell
        shell()
Beispiel #13
0
def run_parametric(options):
    output.prefix = 'run:'

    run_mod = import_file(options.run_mod)
    if hasattr(run_mod, 'get_run_info'):
        (run_cmd, opt_args, output_dir_key,
         _is_finished) = run_mod.get_run_info()

    else:
        output('no get_run_info() in {}, exiting'.format(options.run_mod))
        return

    if isinstance(_is_finished, str):
        is_finished = (lambda pars, options: op.exists(
            op.join(pars[output_dir_key], _is_finished)))

    else:
        is_finished = _is_finished

    dconf = parse_as_dict(options.conf, free_word=True)

    seq_keys = [
        key for key, val in dconf.items() if isinstance(val, str) and (
            val.startswith('@arange') or val.startswith('@linspace'))
    ]
    for key in seq_keys:
        sfun = 'np.' + dconf[key][1:]
        dconf[key] = list(eval(sfun, {'np': np}, {}))

    if options.generate_pars is not None:
        dgenerate_pars = options.generate_pars.copy()

        fun_name = dgenerate_pars.pop('function')
        generate_pars = getattr(run_mod, fun_name)

        gkeys = [key for key, val in dconf.items() if val == '@generate']
        output('generated parameters:', gkeys)

        gconf = generate_pars(Struct(dgenerate_pars), gkeys, dconf, options)
        if set(gkeys) != set(gconf.keys()):
            raise ValueError(
                'generated keys mismatch! (conf: {}, generated: {})'.format(
                    set(gkeys), set(gconf.keys())))

        dconf.update(gconf)

    keys = set(dconf.keys())
    keys.update(opt_args.keys())

    if options.compute_pars is not None:
        dcompute_pars = options.compute_pars.copy()

        class_name = dcompute_pars.pop('class')
        ComputePars = getattr(run_mod, class_name)

        keys.update(dcompute_pars.keys())

    key_order = collect_keys(run_cmd,
                             opt_args,
                             omit=(output_dir_key, 'script_dir'))
    if not (keys.issuperset(key_order) and
            (keys.difference(key_order) == set([output_dir_key]))):
        raise ValueError(
            'parametric keys mismatch! (conf: {},  collected: {})'.format(
                keys, key_order))

    filename = op.join(options.output_dir, 'options.txt')
    ensure_path(filename)
    save_options(filename, [('options', vars(options))],
                 quote_command_line=True)

    output.set_output(filename=op.join(options.output_dir, 'output_log.txt'),
                      combined=options.verbose)

    par_seqs = [
        make_key_list(key, dconf.get(key, '@undefined')) for key in key_order
    ]

    contracts = get_contracts(options.contract, par_seqs, key_order)

    if options.compute_pars is not None:
        compute_pars = ComputePars(dcompute_pars, par_seqs, key_order, options)

    else:
        compute_pars = lambda x: {}

    output_dir_template = dconf[output_dir_key]

    # Load existing parameter sets.
    dfs = []
    root_dir = output_dir_template.split('%s')[0]
    for fname in locate_files('soops-parameters.csv', root_dir=root_dir):
        if op.exists(fname):
            try:
                df = pd.read_csv(fname, index_col='pkey')

            except pd.errors.EmptyDataError:
                continue

            else:
                dfs.append(df)

    if len(dfs):
        apdf = pd.concat(dfs)
        iseq = apdf[output_dir_key].apply(_get_iset).max() + 1

    else:
        apdf = pd.DataFrame()
        iseq = 0

    pkeys = set(apdf.index)

    count = 0
    for _all_pars in product(*par_seqs, contracts=contracts):
        count += 1

    output('number of parameter sets:', count)

    cluster = LocalCluster(n_workers=options.n_workers, threads_per_worker=1)
    client = Client(cluster)

    calls = []
    for _all_pars in product(*par_seqs, contracts=contracts):
        _it, keys, vals = zip(*_all_pars)
        all_pars = dict(zip(keys, vals))
        all_pars.update(compute_pars(all_pars))
        it = ' '.join('%d' % ii for ii in _it)

        pkey = hashlib.md5(str(all_pars).encode('utf-8')).hexdigest()
        if pkey in pkeys:
            podir = apdf.loc[pkey, output_dir_key]
            iset = _get_iset(podir)

        else:
            iset = iseq
            podir = output_dir_template % ('{:03d}-{}'.format(iset, pkey))

        output('parameter set:', iset)
        output(_all_pars)

        all_pars[output_dir_key] = podir
        ensure_path(podir + op.sep)

        all_pars['script_dir'] = op.normpath(op.dirname(options.run_mod))

        recompute = options.recompute
        if ((recompute > 1)
                or (recompute and not is_finished(all_pars, options))):

            sdf = pd.DataFrame({'finished': False, **all_pars}, index=[pkey])
            sdf.to_csv(op.join(podir, 'soops-parameters.csv'),
                       index_label='pkey')

            if pkey in pkeys:
                apdf.loc[pkey] = sdf.iloc[0]

            else:
                apdf = apdf.append(sdf)

            cmd = make_cmd(run_cmd, opt_args, all_pars)
            dtime = datetime.now()
            output('submitting at', get_timestamp(dtime=dtime))
            output(cmd)

            if options.dry_run:
                call = client.submit(lambda: None)

            elif options.run_function == 'subprocess.run':
                call = client.submit(subprocess.run,
                                     cmd,
                                     shell=True,
                                     pure=False)

            elif options.run_function == 'psutil.Popen':
                call = client.submit(run_with_psutil, cmd, options, pure=False)

            else:
                call = client.submit(os.system, cmd, pure=False)

            call.iset = iset
            call.it = it
            call.pkey = pkey
            call.podir = podir
            call.update_parameters = True
            call.all_pars = all_pars
            call.dtime = dtime
            calls.append(call)

            iseq += 1

        else:
            call = client.submit(lambda: None)
            call.iset = iset
            call.it = it
            call.pkey = pkey
            call.podir = podir
            call.update_parameters = not apdf.loc[pkey, 'finished']
            call.all_pars = all_pars
            call.dtime = datetime.now()
            calls.append(call)

    pfilename = op.join(options.output_dir, 'all_parameters.csv')
    apdf.to_csv(pfilename, mode='w', index_label='pkey')

    for call in as_completed(calls):
        dtime = datetime.now()
        output(call.iset)
        output(call.it)
        output('in', call.podir)
        output('completed at', get_timestamp(dtime=dtime), 'in',
               dtime - call.dtime)
        output(call.all_pars)
        output(call)
        output(call.result())
        if call.update_parameters:
            finished = True
            if options.timeout is not None:
                import psutil
                if isinstance(call.result(), psutil.TimeoutExpired):
                    finished = False

            apdf.loc[call.pkey, 'finished'] = finished
            sdf = apdf.loc[[call.pkey]]
            sdf.to_csv(op.join(call.podir, 'soops-parameters.csv'),
                       index_label='pkey')
            apdf.to_csv(pfilename, mode='w', index_label='pkey')

    client.close()

    if options.shell:
        from soops.base import shell
        shell()