def __call__(self, inp): import os import datetime import dps from dps import cfg # noqa from dps.config import DEFAULT_CONFIG from dps.utils import ExperimentStore os.nice(10) print("Entered _BuildDataset at: ") print(datetime.datetime.now()) idx, seed, n_examples = inp print("idx: {}, seed: {}, n_examples: {}".format(idx, seed, n_examples)) dps.reset_config() params = self.params.copy() params.update(seed=seed, n_examples=n_examples) with DEFAULT_CONFIG.copy(): cfg.update_from_command_line() print(cfg) experiment_store = ExperimentStore(os.path.join(cfg.local_experiments_dir, cfg.env_name)) exp_dir = experiment_store.new_experiment("", seed, add_date=1, force_fresh=1, update_latest=False) params["data_dir"] = exp_dir.path print(params) self.cls(**params) print("Leaving _BuildDataset at: ") print(datetime.datetime.now())
def test_hyper_bare(test_config): config = DEFAULT_CONFIG.copy() config.update(a2c.config) config.update(simple_addition.config) config.update(test_config) config['max_steps'] = 101 config['checkpoint_step'] = 43 config['n_train'] = 2**5 _raw_run(config)
def test_time_limit(test_config): config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update(max_time=120, max_steps=10000, seed=100) config.update(test_config) start = time.time() with config: training_loop() elapsed = start - time.time() assert elapsed < config.max_time + 1
def test_time_limit_between_stages(test_config): config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update(max_time=120, max_steps=10, seed=100) config.update(hooks=[AlarmHook(False, 0)]) config.update(test_config) start = time.time() with config: result = training_loop() print(result) elapsed = start - time.time() assert elapsed < 20
def _run(env_str, alg_str, _config=None, **kwargs): env_config, alg_config = parse_env_alg(env_str, alg_str) config = DEFAULT_CONFIG.copy() config.update(alg_config) config.update(env_config) if _config is not None: config.update(_config) config.update(kwargs) with config: cfg.update_from_command_line() return training_loop()
def test_stage_hook(test_config): """ Test that we can safely use hooks to add new stages. """ config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update( max_steps=11, eval_step=10, n_train=100, seed=100, hooks=[DummyHook(3, dict(max_steps=21))], curriculum=[dict()], width=1, ) config.update(test_config) with config: data = training_loop() assert data.n_stages == 3 assert not data.history[0]["stage_config"] assert data.history[1]["stage_config"]["max_steps"] == 21 assert data.history[2]["stage_config"]["max_steps"] == 21
def test_hyper_time_limited(test_config): config = DEFAULT_CONFIG.copy() config.update(a2c.config) config.update(simple_addition.config) config.update(test_config) config['max_steps'] = 100000 config['checkpoint_step'] = 43 distributions = dict(n_train=2**np.array([5, 6, 7])) n_repeats = 2 session = build_and_submit( name="test_hyper", config=config, distributions=distributions, n_repeats=n_repeats, kind='parallel', host_pool=':', wall_time='1min', cleanup_time='10seconds', slack_time='5seconds', ppn=2, load_avg_threshold=1e6) path = session.job_path files = os.listdir(path) assert set(files) == set( ['orig.zip', 'experiments', 'os_environ.txt', 'results.zip', 'pip_freeze.txt', 'dps_git_summary.txt', 'nodefile.txt', 'results.txt', 'job_log.txt', 'uname.txt', 'lscpu.txt'] ) experiments = os.listdir(os.path.join(path, 'experiments')) for exp in experiments: assert exp.startswith('exp_') assert os.path.isdir(os.path.join(path, 'experiments', exp)) assert len(experiments) == n_repeats * len(distributions['n_train']) with open(os.path.join(path, 'results.txt'), 'r') as f: results = f.read() assert "n_ops: 6" in results assert "n_completed_ops: 6" in results assert "n_partially_completed_ops: 0" in results assert "n_ready_incomplete_ops: 0" in results assert "n_not_ready_incomplete_ops: 0" in results
def __call__(self, new): start_time = time.time() print("Entered _RunTrainingLoop at: ") print(datetime.datetime.now()) os.nice(10) print("Sampled values: ") print(new) print("Base config: ") print(self.base_config) exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split()) dps.reset_config() config = DEFAULT_CONFIG.copy() config.update(self.base_config) config.update(new) config.update( start_tensorboard=False, show_plots=False, ) with config: cfg.update_from_command_line() from dps.train import training_loop result = training_loop(exp_name=exp_name, start_time=start_time) print("Leaving _RunTrainingLoop at: ") print(datetime.datetime.now()) return result
parser = argparse.ArgumentParser() parser.add_argument("kind", choices="long med".split()) parser.add_argument("size", choices="14 21".split()) parser.add_argument("task", choices="addition arithmetic".split()) parser.add_argument("--c", action="store_true") args, _ = parser.parse_known_args() kind = args.kind distributions = dict( math_weight=list(2**np.linspace(-2, 2, 8)), ) env_config = envs.get_mnist_config(size=args.size, colour=args.c, task=args.task) config = DEFAULT_CONFIG.copy() config.update(alg_config) config.update(env_config) config.update( render_step=10000, eval_step=1000, per_process_gpu_memory_fraction=0.23, patience=1000000, max_experiences=100000000, max_steps=100000000, count_prior_decay_steps=1000, final_count_prior_log_odds=0.0125, hw_prior_std=0.5, kernel_size=1,
def run_experiment(name, config, readme, distributions=None, durations=None, alg=None, task="grid", name_variables=None, env_kwargs=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() parser.add_argument("duration", choices=list(durations.keys()) + ["local"]) args, _ = parser.parse_known_args() _config = DEFAULT_CONFIG.copy() env_kwargs = env_kwargs or {} env_kwargs['task'] = task env_config = get_env_config(**env_kwargs) _config.update(env_config) if alg: alg_config = getattr(alg_module, "{}_config".format(alg)) _config.update(alg_config) alg_name = sanitize(alg_config.alg_name) else: alg_name = "" _config.update(config) _config.update_from_command_line() _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name)) if args.duration == "local": _config.exp_name = "alg={}".format(alg_name) with _config: return training_loop() else: run_kwargs = Config( kind="slurm", pmem=5000, ignore_gpu=False, ) duration_args = durations[args.duration] if 'config' in duration_args: _config.update(duration_args['config']) del duration_args['config'] run_kwargs.update(durations[args.duration]) run_kwargs.update_from_command_line() if name_variables is not None: name_variables_str = "_".join("{}={}".format( sanitize(str(k)), sanitize(str(getattr(_config, k)))) for k in name_variables.split(",")) _config.env_name = "{}_{}".format(_config.env_name, name_variables_str) exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name, args.duration) build_and_submit(name=exp_name, config=_config, distributions=distributions, **run_kwargs)
lambda inp, output_size: fully_connected( inp, output_size, activation_fn=None), 1) estimator = NeuralValueEstimator(controller, env.obs_shape) alg = cfg.alg_class(estimator, name="critic") updater = RLUpdater(env, policy, alg) return updater config = DEFAULT_CONFIG.copy( get_updater=get_updater, build_env=build_env, log_name="policy_evaluation", max_steps=100000, display_step=100, T=3, reward_radius=0.2, max_step=0.1, restart_prob=0.0, l2l=False, n_val=200, threshold=1e-4, verbose=False, ) x = int(sys.argv[1]) if x == 0: print("TRPE") config.update(name="TRPE", delta_schedule='0.01', max_cg_steps=10,
return ClassificationEnv(train, val, one_hot=True) # For training networks on EMNIST datasets. EMNIST_CONFIG = DEFAULT_CONFIG.copy( name="emnist", env_name='emnist_pretrained', get_updater=get_differentiable_updater, build_env=build_emnist_env, shape=(14, 14), batch_size=128, eval_step=100, max_steps=100000, patience=10000, lr_schedule="Exp(0.001, 0, 10000, 0.9)", optimizer_spec="adam", threshold=-np.inf, n_train=60000, n_val=100, include_blank=True, classes=list(range(10)), n_controller_units=100, use_gpu=True, gpu_allow_growth=True, seed=347405995, stopping_criteria="01_loss,min", ) # OMNIGLOT ***************************************
def run_experiment( name, base_config, readme, distributions=None, durations=None, name_variables=None, alg_configs=None, env_configs=None, late_config=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() if env_configs is not None: parser.add_argument('env') if alg_configs is not None: parser.add_argument('alg') parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?") args, _ = parser.parse_known_args() config = DEFAULT_CONFIG.copy() config.update(base_config) if env_configs is not None: env_config = env_configs[args.env] config.update(env_config) if alg_configs is not None: alg_config = alg_configs[args.alg] config.update(alg_config) if late_config is not None: config.update(late_config) config.update_from_command_line() env_name = sanitize(config.get('env_name', '')) if name: config.env_name = "{}_env={}".format(name, env_name) else: config.env_name = "env={}".format(env_name) alg_name = sanitize(config.get("alg_name", "")) if args.duration == "local": config.exp_name = "alg={}".format(alg_name) with config: return training_loop() run_kwargs = Config( kind="slurm", pmem=5000, ignore_gpu=False, ) duration_args = durations[args.duration] if 'config' in duration_args: config.update(duration_args['config']) del duration_args['config'] run_kwargs.update(durations[args.duration]) run_kwargs.update_from_command_line() if name_variables is not None: name_variables_str = "_".join( "{}={}".format(sanitize(str(k)), sanitize(str(getattr(config, k)))) for k in name_variables.split(",")) config.env_name = "{}_{}".format(config.env_name, name_variables_str) exp_name = "{}_alg={}_duration={}".format(config.env_name, alg_name, args.duration) build_and_submit(name=exp_name, config=config, distributions=distributions, **run_kwargs)
from dps.utils import Config from dps.env import simple_addition from dps.rl.algorithms import a2c from dps.rl.policy import BuildEpsilonSoftmaxPolicy, BuildLstmController from dps.config import DEFAULT_CONFIG config = DEFAULT_CONFIG.copy( name="SimpleAddition", n_train=10000, n_val=100, max_steps=1000000, display_step=10, eval_step=10, patience=np.inf, power_through=False, load_path=-1, start_tensorboard=True, verbose=False, show_plots=True, use_gpu=False, threshold=0.01, # render_hook=rl_render_hook, render_hook=None, cpu_ram_limit_mb=5 * 1024, ) alg_config = Config( get_updater=a2c.A2C, build_policy=BuildEpsilonSoftmaxPolicy(), build_controller=BuildLstmController(), optimizer_spec="adam",