def test_default_values(): """ Tests the default values of the configuration file. First with a simple configuration and then with some parameters customized, to check if missing dict entries are set correctly. """ # First with simple configuration config_file = './study/profit_default.yaml' config = Config.from_file(config_file) assert config.get('base_dir') == path.abspath('./study') assert config.get('run_dir') == config.get('base_dir') assert config['files'].get('input') == path.join(config.get('base_dir'), 'input.txt') assert config['files'].get('output') == path.join(config.get('base_dir'), 'output.txt') assert config['fit'].get('surrogate') == 'GPy' assert config['fit'].get('kernel') == 'RBF' # Now check when dicts are only partially set config_file = './study/profit_default_2.yaml' config = Config.from_file(config_file) assert config['files'].get('input') == path.join(config.get('base_dir'), 'custom_input.in') assert config['files'].get('output') == path.join(config.get('base_dir'), 'output.txt') assert config['fit'].get('surrogate') == 'GPy' assert config['fit'].get('kernel') == 'RBF' assert config['fit'].get('plot') is True
def test_1D(): """Test a simple function f(u) = cos(10*u) + u.""" config_file = 'study_1D/profit_1D.yaml' config = Config.from_file(config_file) model_file = './study_1D/model_1D_Custom.hdf5' try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'Custom' assert sur.trained assert sur.kernel.__name__ == 'RBF' assert allclose(sur.hyperparameters['length_scale'], 0.15975022, rtol=PARAM_RTOL) assert allclose(sur.hyperparameters['sigma_f'], 0.91133526, rtol=PARAM_RTOL) assert allclose(sur.hyperparameters['sigma_n'], 0.00014507, rtol=PARAM_RTOL) finally: clean(config) if path.exists(model_file): remove(model_file)
def test_2D(): """Test a Rosenbrock 2D function with two random inputs.""" config_file = 'study_2D/profit_2D.yaml' config = Config.from_file(config_file) model_file = './study_2D/model_2D_Custom.hdf5' try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'Custom' assert sur.trained assert sur.kernel.__name__ == 'RBF' assert sur.ndim == 2 assert allclose(sur.hyperparameters['length_scale'], 0.96472754, rtol=PARAM_RTOL) assert allclose(sur.hyperparameters['sigma_f'], 15.02288291, rtol=PARAM_RTOL) assert allclose(sur.hyperparameters['sigma_n'], 8.83125694e-06, rtol=PARAM_RTOL) finally: clean(config) if path.exists(model_file): remove(model_file)
def __init__(self, config=None, yaml=None): from chaospy import (generate_quadrature, orth_ttr, fit_quadrature, E, Std, descriptives) self.params = OrderedDict() self.backend = None self.param_files = None if yaml: print(' load configuration from %s'%yaml) config = Config.from_file(yaml) if config: if (config['uq']['backend'] == 'ChaosPy'): self.backend = ChaosPy(config['uq']['order']) # TODO: extend self.Normal = self.backend.Normal self.Uniform = self.backend.Uniform params = config['uq']['params'] for pkey in params: if params[pkey]['dist'] == 'Uniform': self.params[pkey] = self.Uniform(params[pkey]['min'], params[pkey]['max']) if 'param_files' in config['uq']: self.param_files = config['uq']['param_files'] self.template_dir = 'template/' self.run_dir = 'run/'
def test_1D(): """Test a simple function f(u) = cos(10*u) + u.""" config_file = 'study_1D/profit_1D.yaml' config = Config.from_file(config_file) model_file = './study_1D/model_1D.hdf5' try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) run(f"profit fit {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'GPy' assert sur.trained assert sur.model.kern.name == 'rbf' assert allclose(sur.model.likelihood.variance[0], 4.809421284738159e-11, atol=NLL_ATOL) assert allclose(sur.model.kern.variance[0], 1.6945780226638725, rtol=PARAM_RTOL) assert allclose(sur.model.kern.lengthscale, 0.22392982500520792, rtol=PARAM_RTOL) finally: clean(config) if path.exists(model_file): remove(model_file)
def test_2D(): """Test a Rosenbrock 2D function with two random inputs.""" config_file = 'study_2D/profit_2D.yaml' config = Config.from_file(config_file) model_file = './study_2D/model_2D.hdf5' try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) run(f"profit fit {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'GPy' assert sur.trained assert sur.model.kern.name == 'rbf' assert sur.model.kern.input_dim == 2 assert allclose(sur.model.likelihood.variance[0], 2.657441549034709e-08, atol=NLL_ATOL) assert allclose(sur.model.kern.variance[0], 270.2197671669302, rtol=PARAM_RTOL) assert allclose(sur.model.kern.lengthscale[0], 1.079943283873971, rtol=PARAM_RTOL) finally: clean(config) if path.exists(model_file): remove(model_file)
def test_txt_input(): """Tests if the input files in the single run directories are created from the template.""" config_file = './study/profit.yaml' config = Config.from_file(config_file) run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) assert path.isfile('./study/run_000/mockup.in') clean(config)
def from_env(cls, label='run'): from profit.config import Config base_config = Config.from_file(checkenv('PROFIT_CONFIG_PATH')) config = base_config[label] if config['custom']: cls.handle_config(config, base_config) run_id = int(checkenv('PROFIT_RUN_ID')) + int( os.environ.get('PROFIT_ARRAY_ID', 0)) return cls.from_config(config, run_id)
def test_yaml_py_config(): """Tests if .yaml and .py configuration files are equal by comparing dict keys and values.""" yaml_file = '././study/profit.yaml' py_file = '././study/profit_config.py' config_yaml = Config.from_file(yaml_file) config_py = Config.from_file(py_file) def assert_dict(dict_items1, dict_items2): for (key1, value1), (key2, value2) in zip(dict_items1, dict_items2): assert key1 == key2 if type(value1) is dict: assert_dict(value1.items(), value2.items()) elif type(value1) is ndarray: assert value1.dtype == value2.dtype assert value1.shape == value2.shape elif key1 != 'config_path': assert value1 == value2 assert_dict(config_yaml.items(), config_py.items())
def test_hdf5_input_output(): """Checks the data inside a .hdf5 input file.""" config_file = './study/profit_hdf5.yaml' config = Config.from_file(config_file) try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) data_in = load(config['files'].get('input')) assert data_in.shape == (2, 1) assert data_in.dtype.names == ('u', 'v', 'w') finally: clean(config)
def multi_test_1d(study, config_file, output_file): """ test 1D with different config files """ config_file = path.join(study, config_file) output_file = path.join(study, output_file) config = Config.from_file(config_file) try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) output = load(output_file) assert output.shape == (7, 1) assert all(output['f'] - array([0.7836, -0.5511, 1.0966, 0.4403, 1.6244, -0.4455, 0.0941]).reshape((7, 1)) < 1e-4) finally: clean(config)
def test_symlinks(): """Checks if relative symbolic links are handled correctly.""" config_file = './study/profit_symlink.yaml' config = Config.from_file(config_file) base_file = './study/run_000/mockup.in' link_file = './study/run_000/some_subdir/symlink_link.txt' try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) with open(link_file, 'r') as link: with open(base_file, 'r') as base: link_data = link.read() base_data = base.read() assert link_data == base_data and not link_data.startswith('{') finally: clean(config)
def test_txt_json_input(): """Checks if the numpy arrays resulting from a text and a json input are equal.""" config_file = './study/profit_json.yaml' config = Config.from_file(config_file) try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) with open(path.join(config['run_dir'], 'run_000', 'mockup_json.in')) as jf: json_input = jload(jf) json_input = array([float(val) for val in json_input.values()]) with open(path.join(config['run_dir'], 'run_000', 'mockup.in')) as tf: txt_input = genfromtxt(tf) assert json_input.dtype == txt_input.dtype assert json_input.shape == txt_input.shape finally: clean(config)
def test_numpytxt(): from numpy import array from profit.run.default import NumpytxtPostprocessor BASE_CONFIG = Config.from_file('numpy.yaml') config = { 'class': 'numpytxt', 'path': 'numpytxt.csv', 'options': { 'delimiter': ',' } } data = array([0], dtype=[('f', float, (3, )), ('g', float)])[0] NumpytxtPostprocessor.handle_config(config, BASE_CONFIG) post = NumpytxtPostprocessor(config) post(data) assert all(data['f'] == [1.4, 1.3, 1.2]) assert data['g'] == 10
def test_2D_independent(): """Test a Fermi function which returns a vector over energy and is sampled over different temperatures.""" config_file = 'study_independent/profit_independent.yaml' config = Config.from_file(config_file) model_file = config['fit'].get('save') try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) run(f"profit fit {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'GPy' assert sur.trained assert sur.model.kern.name == 'rbf' assert sur.model.kern.input_dim == 1 assert allclose(sur.model.likelihood.variance[0], 2.8769632382230903e-05, atol=NLL_ATOL) assert allclose(sur.model.kern.variance[0], 0.4382486018781694, rtol=PARAM_RTOL) assert allclose(sur.model.kern.lengthscale[0], 0.24077767526116695, rtol=PARAM_RTOL) finally: clean(config) if path.exists(model_file): remove(model_file)
def test_zeromq(): from threading import Thread from time import sleep from profit.run.zeromq import ZeroMQInterface, ZeroMQRunnerInterface BASE_CONFIG = Config.from_file('numpy.yaml') MAX_IDS = BASE_CONFIG['ntrain'] config = {'class': 'zeromq'} ZeroMQRunnerInterface.handle_config(config, BASE_CONFIG) def runner(): rif = ZeroMQRunnerInterface(config, MAX_IDS, BASE_CONFIG['input'], BASE_CONFIG['output']) try: rif.input[['u', 'v']][RUN_ID] = VALUE_U, VALUE_V for i in range(3): rif.poll() sleep(0.5) assert rif.output['f'][RUN_ID] == VALUE_F assert rif.internal['TIME'][RUN_ID] == VALUE_T assert rif.internal['DONE'][RUN_ID] finally: rif.clean() def worker(): wif = ZeroMQInterface(config, run_id=RUN_ID) assert_wif(wif) wif.output['f'] = VALUE_F wif.time = VALUE_T wif.done() rt = Thread(target=runner) wt = Thread(target=worker) rt.start() wt.start() wt.join() rt.join()
def test_multi_output(): """Test a 1D function with two outputs.""" config_file = 'study_multi_output/profit_multi_output.yaml' config = Config.from_file(config_file) model_file = config['fit'].get('save') try: run(f"profit run {config_file}", shell=True, timeout=TIMEOUT) run(f"profit fit {config_file}", shell=True, timeout=TIMEOUT) sur = Surrogate.load_model(model_file) assert sur.get_label() == 'GPy' assert sur.trained assert sur.model.kern.name == 'ICM' assert allclose(sur.model.likelihood.likelihoods_list[0].variance[0], 0.00032075301845035454, atol=NLL_ATOL) assert allclose(sur.model.likelihood.likelihoods_list[0].variance[0], 3.773865299540149e-09, atol=NLL_ATOL) assert allclose(sur.model.kern.rbf.variance[0], 0.52218353, rtol=PARAM_RTOL) assert allclose(sur.model.kern.rbf.lengthscale, 0.20184872, rtol=PARAM_RTOL) finally: clean(config) from os.path import splitext # .hdf5 is not yet supported for multi output model, so it is saved as .pkl instead. model_file = splitext(model_file)[0] + '.pkl' if path.exists(model_file): remove(model_file)
def test_memmap(): from profit.run.default import MemmapInterface, MemmapRunnerInterface import os BASE_CONFIG = Config.from_file('numpy.yaml') MAX_IDS = BASE_CONFIG['ntrain'] config = {'class': 'memmap'} try: MemmapRunnerInterface.handle_config(config, BASE_CONFIG) rif = MemmapRunnerInterface(config, MAX_IDS, BASE_CONFIG['input'], BASE_CONFIG['output']) rif.input[['u', 'v']][1] = VALUE_U, VALUE_V wif = MemmapInterface(config, RUN_ID) assert_wif(wif) wif.output['f'] = VALUE_F wif.time = VALUE_T wif.done() assert rif.output['f'][RUN_ID] == VALUE_F assert rif.internal['TIME'][RUN_ID] == VALUE_T assert rif.internal['DONE'][RUN_ID] finally: if 'path' in config and os.path.exists(config['path']): os.remove(config['path'])
def main(): print(sys.argv) if len(sys.argv) < 2: print_usage() return if len(sys.argv) < 3: config_file = os.path.join(os.getcwd(), 'profit.yaml') else: config_file = os.path.abspath(sys.argv[2]) config = Config.from_file(config_file) sys.path.append(config['base_dir']) if (sys.argv[1] == 'pre'): eval_points = get_eval_points(config) try: profit.fill_run_dir(eval_points, template_dir=config['template_dir'], run_dir=config['run_dir'], overwrite=False) except RuntimeError: question = ("Warning: Run directories in {} already exist " "and will be overwritten. Continue? (y/N) ").format( config['run_dir']) if (yes): print(question + 'y') else: answer = input(question) if (not yes) and not (answer == 'y' or answer == 'Y'): exit() profit.fill_run_dir(eval_points, template_dir=config['template_dir'], run_dir=config['run_dir'], overwrite=True) elif (sys.argv[1] == 'run'): print(read_input(config['base_dir'])) if config['run']: run = profit.run.LocalCommand(config['run']['cmd'], config['run']['ntask']) run.start() else: raise RuntimeError('No "run" entry in profit.yaml') elif (sys.argv[1] == 'collect'): from numpy import array, empty, nan, savetxt from .util import save_txt spec = importlib.util.spec_from_file_location('interface', config['interface']) interface = importlib.util.module_from_spec(spec) spec.loader.exec_module(interface) data = empty((config['ntrain'], len(config['output']))) for krun in range(config['ntrain']): run_dir_single = os.path.join( config['run_dir'], str(krun).zfill(3) ) #.zfill(3) is an option that forces krun to have 3 digits print(run_dir_single) try: os.chdir(run_dir_single) data[krun, :] = interface.get_output() except: data[krun, :] = nan finally: os.chdir(config['base_dir']) savetxt('output.txt', data, header=' '.join(config['output'])) elif (sys.argv[1] == 'fit'): from numpy import loadtxt from h5py import File x = loadtxt('input.txt') y = loadtxt('output.txt') fresp = fit(x, y) with File('profit.hdf5', 'w') as h5f: h5f['xtrain'] = fresp.xtrain h5f['ytrain'] = fresp.ytrain h5f['yscale'] = fresp.yscale h5f['ndim'] = fresp.ndim h5f['variables'] = [v.numpy() for v in fresp.m.variables] elif (sys.argv[1] == 'ui'): from profit.ui import app app.app.run_server(debug=True) else: print_usage() return
def main(): """ Main command line interface sys.argv is an array whose values are the entered series of command (e.g.: sys.argv=['profit','run', '--active-learning', '/home/user/example']) """ """ Get parameters from argv """ parser = ArgumentParser( usage='profit <mode> (base-dir)', description= "Probabilistic Response Model Fitting with Interactive Tools", formatter_class=RawTextHelpFormatter) parser.add_argument( 'mode', # ToDo: subparsers? metavar='mode', choices=['run', 'fit', 'ui', 'clean'], help='run ... start simulation runs \n' 'fit ... fit data with Gaussian Process \n' 'ui ... visualise results \n' 'clean ... remove run directories and input/output files') parser.add_argument( 'base_dir', metavar='base-dir', help='path to config file (default: current working directory)', default=getcwd(), nargs='?') args = parser.parse_args() print(args) """ Instantiate Config class from the given file """ config_file = safe_path_to_file(args.base_dir, default='profit.yaml') config = Config.from_file(config_file) sys.path.append(config['base_dir']) if args.mode == 'run': from tqdm import tqdm from profit.pre import get_eval_points, write_input from profit.util import save runner = Runner.from_config(config['run'], config) eval_points = get_eval_points(config) write_input(config['files']['input'], eval_points) if 'activelearning' in (safe_str(v['kind']) for v in config['input'].values()): from profit.fit import ActiveLearning from profit.sur.sur import Surrogate runner.fill(eval_points) if 'active_learning' not in config: config['active_learning'] = {} ActiveLearning.handle_config(config['active_learning'], config) al = ActiveLearning.from_config(runner, config['active_learning'], config) al.run_first() al.learn() if config['active_learning'].get('save'): al.save(config['active_learning']['save']) else: params_array = [row[0] for row in eval_points] runner.spawn_array(tqdm(params_array), blocking=True) if config['run']['clean']: runner.clean() if config['files']['output'].endswith('.txt'): data = runner.structured_output_data save(config['files']['output'], data.reshape(data.size, 1)) else: save(config['files']['output'], runner.output_data) elif args.mode == 'fit': from numpy import arange, hstack, meshgrid from profit.util import load from profit.sur.sur import Surrogate sur = Surrogate.from_config(config['fit'], config) if not sur.trained: x = load(config['files']['input']) y = load(config['files']['output']) x = hstack([x[key] for key in x.dtype.names]) y = hstack([y[key] for key in y.dtype.names]) sur.train(x, y) if config['fit'].get('save'): sur.save_model(config['fit']['save']) if config['fit'].get('plot'): try: xpred = [ arange(minv, maxv, step) for minv, maxv, step in config['fit']['plot'].get('xpred') ] xpred = hstack( [xi.flatten().reshape(-1, 1) for xi in meshgrid(*xpred)]) except AttributeError: xpred = None sur.plot(xpred, independent=config['independent'], show=True) elif args.mode == 'ui': from profit.ui import init_app app = init_app(config) app.run_server(debug=True) elif args.mode == 'clean': from shutil import rmtree from os import path, remove run_dir = config['run_dir'] question = "Are you sure you want to remove the run directories in {} " \ "and input/output files? (y/N) ".format(config['run_dir']) if yes: print(question + 'y') else: answer = input(question) if not answer.lower().startswith('y'): print('exit...') sys.exit() for krun in range(config['ntrain']): single_run_dir = path.join(run_dir, f'run_{krun:03d}') if path.exists(single_run_dir): rmtree(single_run_dir) if path.exists(config['files']['input']): remove(config['files']['input']) if path.exists(config['files']['output']): remove(config['files']['output']) runner = Runner.from_config(config['run'], config) runner.clean() try: rmtree(config['run']['log_path']) except FileNotFoundError: pass