def explain_dir(dirname, keys): fname = op.join(dirname, 'soops-parameters.csv') df = pd.read_csv(fname, index_col='pkey') lmax = max(map(len, df.keys())) fmt = '{{}} {{:>{}s}}: {{}}'.format(lmax) for key, val in df.iloc[0].to_dict().items(): output(fmt.format('*' if key in keys else ' ', key, val))
def build_pdf(filename): status = run_command('pdflatex -interaction=nonstopmode', filename, repeat=3, silent=True) if status: output('build_pdf() failed with status {}!'.format(status))
def get_plot_style(indices, styles): style_kwargs = {} for key, key_styles in styles.items(): for skey, style_vals in key_styles.items(): if skey in style_kwargs: output('style key "{}" of "{}" already in use!'.format( skey, key)) if key in indices: style_kwargs[skey] = style_vals[indices[key] % len(style_vals)] return style_kwargs
def find_studies(options): output.prefix = 'find:' dfs = [] for root_dir in options.directories: for fname in locate_files('soops-parameters.csv', root_dir=root_dir): if op.exists(fname): try: df = pd.read_csv(fname, index_col='pkey') except pd.errors.EmptyDataError: continue else: dfs.append(df) if len(dfs): apdf = pd.concat(dfs) apdf = apdf.rename(columns=lambda x: x.lstrip('-').replace('-', '_')) apdf = apdf.sort_values('output_dir', ignore_index=True) if options.query is not None: sdf = apdf.query(options.query) for ii in range(len(sdf)): row = sdf.iloc[ii] output('result {} in {}:\n{}'.format(ii, row['output_dir'], row)) if options.shell or (options.query is None): output('{} parameter sets stored in `apdf` DataFrame'.format(len(apdf))) output('column names:\n{}'.format(apdf.keys())) from soops.base import shell; shell() return apdf
def print_info(options): output.prefix = 'info:' run_mod = import_file(options.run_mod) if hasattr(run_mod, 'get_run_info'): (run_cmd, opt_args, output_dir_key, _is_finished) = run_mod.get_run_info() else: output('no get_run_info() in {}, exiting'.format(options.run_mod)) return keys = collect_keys(run_cmd, opt_args, omit=(output_dir_key, 'script_dir')) if options.explain is None: for ik, key in enumerate(keys): output('{:3d}: {}'.format(ik, key)) else: for dirname in options.explain: output(dirname) explain_dir(dirname, keys) if options.shell: from soops.base import shell; shell()
def run_plugins(info, df, output_dir): if not len(info): return output('run plugins:') par_cols = get_parametric_columns(df) data = Struct(par_cols=par_cols, output_dir=output_dir) for fun in info: output('running {}()...'.format(fun.__name__)) data = fun(df, data=data) output('...done') return data
def run_plugins(info, df, output_dir, par_keys, store_filename, plugin_args=None): if not len(info): return if plugin_args is None: plugin_args = {} used = {fun.__name__ for fun in info if fun.__name__ in plugin_args} unused = set(plugin_args.keys()).difference(used) if len(unused): output('WARNING: unused plugin arguments:', unused) def wrap_fun(fun): args = plugin_args.get(fun.__name__) if args is None: _fun = fun else: def _fun(df, data=None): return fun(df, data=data, **args) return _fun output('run plugins:') data = init_plugin_data(df, par_keys, output_dir, store_filename) for fun in info: output('running {}()...'.format(fun.__name__)) wfun = wrap_fun(fun) _data = wfun(df, data=data) data = _data if _data is not None else data output('...done') return data
def apply_scoops(info, directories, debug_mode=False): if not len(info): return pd.DataFrame({}), pd.DataFrame({}), None data = [] metadata = [] par_keys = set() for idir, directory in enumerate(directories): output('directory {}: {}'.format(idir, directory)) name0 = info[0][0] filenames = locate_files(name0, directory) home = op.expanduser('~') for ir, filename in enumerate(filenames): rdir = op.dirname(filename) output('results directory {}: {}'.format(ir, rdir)) rdata = {'rdir': rdir.replace(home, '~'), 'rfiles': []} rmetadata = {} output('results files:') for item in info: if len(item) == 2: filename, fun = item has_parameters = False elif len(item) == 3: filename, fun, has_parameters = item else: raise ValueError('scoop info item has to have length' ' 2 or 3! ({})'.format(item)) output(filename) path = op.join(rdir, filename) if not op.exists(path): paths = list(locate_files(path)) output('expanded:', [path.replace(rdir, '<rdir>') for path in paths]) if len(paths) == 0: paths = None else: paths = None try: if paths is None: out = fun(path, rdata=rdata) else: out = fun(paths, rdata=rdata) except KeyboardInterrupt: raise except Exception as exc: output('- failed with:') output(exc) if debug_mode: raise continue else: if out is None: output('- nothing returned!') out = {} if paths is None: paths = [path] rdata['rfiles'].append(filename) mtimes = [] for path in paths: try: mtime = datetime.fromtimestamp(op.getmtime(path)) except FileNotFoundError: mtime = np.nan mtimes.append(mtime) rmetadata.update({ 'data_row': len(data), 'data_columns': tuple(out.keys()), 'filename': path, 'filenames': paths, 'mtimes': mtimes, }) rdata.update(out) metadata.append(pd.Series(rmetadata)) if has_parameters: par_keys.update(out.keys()) rdata['time'] = datetime.utcnow() data.append(pd.Series(rdata)) df = pd.DataFrame(data) mdf = pd.DataFrame(metadata) return df, mdf, par_keys
def scoop_outputs(options): output.prefix = '' scoop_mod = import_file(options.scoop_mod) if (not options.reuse or not (op.exists(options.results) and op.isfile(options.results))): new_results = True if hasattr(scoop_mod, 'get_scoop_info'): scoop_info = scoop_mod.get_scoop_info() else: output('no get_scoop_info() in {}, exiting'.format( options.scoop_mod)) return df, mdf, par_keys = apply_scoops(scoop_info, options.directories, options.debug) if options.filter is not None: idf = [ ii for ii, rfiles in df['rfiles'].items() if options.filter.intersection(rfiles) ] df = df.iloc[idf] df.index = np.arange(len(df)) imdf = [ ii for ii, data_row in mdf['data_row'].items() if data_row in idf ] mdf = mdf.iloc[imdf] mdf.index = np.arange(len(mdf)) else: new_results = False with pd.HDFStore(options.results, mode='r') as store: df = store.get('df') mdf = store.get('mdf') par_keys = set(store.get('par_keys').to_list()) std_keys = ('/df', '/mdf', '/par_keys') user_keys = set(store.keys()).difference(std_keys) output('user data:') output(user_keys) output('data keys:') output(df.keys()) output('metadata keys:') output(mdf.keys()) if options.sort: df = df.sort_values(options.sort) df.index = np.arange(len(df)) warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) results_filename = options.results ensure_path(results_filename) if new_results or options.write: with pd.HDFStore(results_filename, mode='w') as store: store.put('df', df) store.put('mdf', mdf) store.put('par_keys', pd.Series(list(par_keys))) if options.save_csv: filename = op.join(options.output_dir, 'results.csv') df.to_csv(filename) filename = op.join(options.output_dir, 'results-meta.csv') mdf.to_csv(filename) if options.call_plugins: if options.plugin_mod is not None: plugin_mod = import_file(options.plugin_mod) else: plugin_mod = scoop_mod if hasattr(plugin_mod, 'get_plugin_info'): plugin_info = plugin_mod.get_plugin_info() fun_names = [fun.__name__ for fun in plugin_info] output('available plugins:', fun_names) if options.use_plugins is not None: aux = [] for fun_name in options.use_plugins: try: ii = fun_names.index(fun_name) except ValueError: raise ValueError( 'unknown plugin! ({})'.format(fun_name)) aux.append(plugin_info[ii]) plugin_info = aux elif options.omit_plugins is not None: plugin_info = [ fun for fun in plugin_info if fun.__name__ not in options.omit_plugins ] data = run_plugins(plugin_info, df, options.output_dir, par_keys, results_filename, plugin_args=options.plugin_args) output('plugin data keys:') output(data.keys()) else: output('no get_plugin_info() in {}'.format(plugin_mod.__name__)) if options.shell: from soops.base import shell shell()
def apply_scoops(info, directories): if not len(info): return pd.DataFrame({}), pd.DataFrame({}) data = [] metadata = [] for idir, directory in enumerate(directories): output('directory {}: {}'.format(idir, directory)) name0 = info[0][0] filenames = locate_files(name0, directory) home = op.expanduser('~') for ir, filename in enumerate(filenames): rdir = op.dirname(filename) output('results directory {}: {}'.format(ir, rdir)) rdata = {'rdir': rdir.replace(home, '~')} rmetadata = {} output('results files:') for filename, fun in info: output(filename) path = op.join(rdir, filename) try: out = fun(path, rdata=rdata) except KeyboardInterrupt: raise except Exception as exc: output('- failed with:') output(exc) continue else: try: mtime = datetime.fromtimestamp(op.getmtime(path)) except FileNotFoundError: mtime = np.nan rmetadata.update({ 'data_row': len(data), 'data_columns': tuple(out.keys()), 'filename': path, 'mtime': mtime, }) rdata.update(out) metadata.append(pd.Series(rmetadata)) rdata['time'] = datetime.utcnow() data.append(pd.Series(rdata)) df = pd.DataFrame(data) mdf = pd.DataFrame(metadata) return df, mdf
def scoop_outputs(options): output.prefix = '' scoop_mod = import_file(options.scoop_mod) if (options.results is None or not (op.exists(options.results) and op.isfile(options.results))): if hasattr(scoop_mod, 'get_scoop_info'): scoop_info = scoop_mod.get_scoop_info() else: output('no get_scoop_info() in {}, exiting'.format( options.scoop_mod)) return df, mdf = apply_scoops(scoop_info, options.directories) else: df = pd.read_hdf(options.results, 'df') mdf = pd.read_hdf(options.results, 'mdf') output('data keys:') output(df.keys()) output('metadata keys:') output(mdf.keys()) if options.sort: df = df.sort_values(options.sort) df.index = np.arange(len(df)) warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) filename = op.join(options.output_dir, 'results.csv') ensure_path(filename) df.to_csv(filename) filename = op.join(options.output_dir, 'results-meta.csv') mdf.to_csv(filename) filename = op.join(options.output_dir, 'results.h5') store = pd.HDFStore(filename, mode='w') store.put('df', df) store.put('mdf', mdf) store.close() if options.call_plugins: if options.plugin_mod is not None: plugin_mod = import_file(options.plugin_mod) else: plugin_mod = scoop_mod if hasattr(plugin_mod, 'get_plugin_info'): plugin_info = plugin_mod.get_plugin_info() output('available plugins:', [fun.__name__ for fun in plugin_info]) if options.use_plugins is not None: plugin_info = [ fun for fun in plugin_info if fun.__name__ in options.use_plugins ] elif options.omit_plugins is not None: plugin_info = [ fun for fun in plugin_info if fun.__name__ not in options.omit_plugins ] run_plugins(plugin_info, df, options.output_dir) else: output('no get_plugin_info() in {}'.format(plugin_mod.__name__)) if options.shell: from soops.base import shell shell()
def run_parametric(options): output.prefix = 'run:' run_mod = import_file(options.run_mod) if hasattr(run_mod, 'get_run_info'): (run_cmd, opt_args, output_dir_key, _is_finished) = run_mod.get_run_info() else: output('no get_run_info() in {}, exiting'.format(options.run_mod)) return if isinstance(_is_finished, str): is_finished = lambda x: op.exists(op.join(x, _is_finished)) else: is_finished = _is_finished dconf = parse_as_dict(options.conf, free_word=True) key_order = sorted(dconf.keys()) filename = op.join(options.output_dir, 'options.txt') ensure_path(filename) save_options(filename, [('options', vars(options))], quote_command_line=True) output.set_output(filename=op.join(options.output_dir, 'output_log.txt'), combined=options.verbose) recompute = options.recompute cluster = LocalCluster(n_workers=options.n_workers, threads_per_worker=1) client = Client(cluster) par_seqs = [make_key_list(key, dconf[key]) for key in key_order] count = 0 for _all_pars in itertools.product(*par_seqs): if not check_contracted(_all_pars, options, key_order): continue count += 1 output('number of parameter sets:', count) calls = [] iset = 0 for _all_pars in itertools.product(*par_seqs): if not check_contracted(_all_pars, options, key_order): continue output('parameter set:', iset) output(_all_pars) _it, keys, vals = zip(*_all_pars) all_pars = dict(zip(keys, vals)) it = '_'.join('%d' % ii for ii in _it) podir = all_pars[output_dir_key] % it all_pars[output_dir_key] = podir all_pars['script_dir'] = op.normpath(op.dirname(options.run_mod)) if (recompute > 1) or (recompute and not is_finished(podir)): cmd = make_cmd(run_cmd, opt_args, all_pars) output(cmd) call = client.submit(subprocess.call, cmd, shell=True, pure=False) call.iset = iset call.it = it call.all_pars = all_pars calls.append(call) else: call = client.submit(lambda: None) call.iset = iset call.it = it call.all_pars = all_pars calls.append(call) iset += 1 for call in as_completed(calls): output(call.iset) output(call.it) output(call.all_pars) output(call, call.result()) client.close() if options.shell: from soops.base import shell shell()
def run_parametric(options): output.prefix = 'run:' run_mod = import_file(options.run_mod) if hasattr(run_mod, 'get_run_info'): (run_cmd, opt_args, output_dir_key, _is_finished) = run_mod.get_run_info() else: output('no get_run_info() in {}, exiting'.format(options.run_mod)) return if isinstance(_is_finished, str): is_finished = (lambda pars, options: op.exists( op.join(pars[output_dir_key], _is_finished))) else: is_finished = _is_finished dconf = parse_as_dict(options.conf, free_word=True) seq_keys = [ key for key, val in dconf.items() if isinstance(val, str) and ( val.startswith('@arange') or val.startswith('@linspace')) ] for key in seq_keys: sfun = 'np.' + dconf[key][1:] dconf[key] = list(eval(sfun, {'np': np}, {})) if options.generate_pars is not None: dgenerate_pars = options.generate_pars.copy() fun_name = dgenerate_pars.pop('function') generate_pars = getattr(run_mod, fun_name) gkeys = [key for key, val in dconf.items() if val == '@generate'] output('generated parameters:', gkeys) gconf = generate_pars(Struct(dgenerate_pars), gkeys, dconf, options) if set(gkeys) != set(gconf.keys()): raise ValueError( 'generated keys mismatch! (conf: {}, generated: {})'.format( set(gkeys), set(gconf.keys()))) dconf.update(gconf) keys = set(dconf.keys()) keys.update(opt_args.keys()) if options.compute_pars is not None: dcompute_pars = options.compute_pars.copy() class_name = dcompute_pars.pop('class') ComputePars = getattr(run_mod, class_name) keys.update(dcompute_pars.keys()) key_order = collect_keys(run_cmd, opt_args, omit=(output_dir_key, 'script_dir')) if not (keys.issuperset(key_order) and (keys.difference(key_order) == set([output_dir_key]))): raise ValueError( 'parametric keys mismatch! (conf: {}, collected: {})'.format( keys, key_order)) filename = op.join(options.output_dir, 'options.txt') ensure_path(filename) save_options(filename, [('options', vars(options))], quote_command_line=True) output.set_output(filename=op.join(options.output_dir, 'output_log.txt'), combined=options.verbose) par_seqs = [ make_key_list(key, dconf.get(key, '@undefined')) for key in key_order ] contracts = get_contracts(options.contract, par_seqs, key_order) if options.compute_pars is not None: compute_pars = ComputePars(dcompute_pars, par_seqs, key_order, options) else: compute_pars = lambda x: {} output_dir_template = dconf[output_dir_key] # Load existing parameter sets. dfs = [] root_dir = output_dir_template.split('%s')[0] for fname in locate_files('soops-parameters.csv', root_dir=root_dir): if op.exists(fname): try: df = pd.read_csv(fname, index_col='pkey') except pd.errors.EmptyDataError: continue else: dfs.append(df) if len(dfs): apdf = pd.concat(dfs) iseq = apdf[output_dir_key].apply(_get_iset).max() + 1 else: apdf = pd.DataFrame() iseq = 0 pkeys = set(apdf.index) count = 0 for _all_pars in product(*par_seqs, contracts=contracts): count += 1 output('number of parameter sets:', count) cluster = LocalCluster(n_workers=options.n_workers, threads_per_worker=1) client = Client(cluster) calls = [] for _all_pars in product(*par_seqs, contracts=contracts): _it, keys, vals = zip(*_all_pars) all_pars = dict(zip(keys, vals)) all_pars.update(compute_pars(all_pars)) it = ' '.join('%d' % ii for ii in _it) pkey = hashlib.md5(str(all_pars).encode('utf-8')).hexdigest() if pkey in pkeys: podir = apdf.loc[pkey, output_dir_key] iset = _get_iset(podir) else: iset = iseq podir = output_dir_template % ('{:03d}-{}'.format(iset, pkey)) output('parameter set:', iset) output(_all_pars) all_pars[output_dir_key] = podir ensure_path(podir + op.sep) all_pars['script_dir'] = op.normpath(op.dirname(options.run_mod)) recompute = options.recompute if ((recompute > 1) or (recompute and not is_finished(all_pars, options))): sdf = pd.DataFrame({'finished': False, **all_pars}, index=[pkey]) sdf.to_csv(op.join(podir, 'soops-parameters.csv'), index_label='pkey') if pkey in pkeys: apdf.loc[pkey] = sdf.iloc[0] else: apdf = apdf.append(sdf) cmd = make_cmd(run_cmd, opt_args, all_pars) dtime = datetime.now() output('submitting at', get_timestamp(dtime=dtime)) output(cmd) if options.dry_run: call = client.submit(lambda: None) elif options.run_function == 'subprocess.run': call = client.submit(subprocess.run, cmd, shell=True, pure=False) elif options.run_function == 'psutil.Popen': call = client.submit(run_with_psutil, cmd, options, pure=False) else: call = client.submit(os.system, cmd, pure=False) call.iset = iset call.it = it call.pkey = pkey call.podir = podir call.update_parameters = True call.all_pars = all_pars call.dtime = dtime calls.append(call) iseq += 1 else: call = client.submit(lambda: None) call.iset = iset call.it = it call.pkey = pkey call.podir = podir call.update_parameters = not apdf.loc[pkey, 'finished'] call.all_pars = all_pars call.dtime = datetime.now() calls.append(call) pfilename = op.join(options.output_dir, 'all_parameters.csv') apdf.to_csv(pfilename, mode='w', index_label='pkey') for call in as_completed(calls): dtime = datetime.now() output(call.iset) output(call.it) output('in', call.podir) output('completed at', get_timestamp(dtime=dtime), 'in', dtime - call.dtime) output(call.all_pars) output(call) output(call.result()) if call.update_parameters: finished = True if options.timeout is not None: import psutil if isinstance(call.result(), psutil.TimeoutExpired): finished = False apdf.loc[call.pkey, 'finished'] = finished sdf = apdf.loc[[call.pkey]] sdf.to_csv(op.join(call.podir, 'soops-parameters.csv'), index_label='pkey') apdf.to_csv(pfilename, mode='w', index_label='pkey') client.close() if options.shell: from soops.base import shell shell()