def test_time_limit(test_config): config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update(max_time=120, max_steps=10000, seed=100) config.update(test_config) start = time.time() with config: training_loop() elapsed = start - time.time() assert elapsed < config.max_time + 1
def test_time_limit_between_stages(test_config): config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update(max_time=120, max_steps=10, seed=100) config.update(hooks=[AlarmHook(False, 0)]) config.update(test_config) start = time.time() with config: result = training_loop() print(result) elapsed = start - time.time() assert elapsed < 20
def _run(env_str, alg_str, _config=None, **kwargs): env_config, alg_config = parse_env_alg(env_str, alg_str) config = DEFAULT_CONFIG.copy() config.update(alg_config) config.update(env_config) if _config is not None: config.update(_config) config.update(kwargs) with config: cfg.update_from_command_line() return training_loop()
def __call__(self, new): import os stdout_path = f"./stdout_pid={os.getpid()}" with redirect_stream('stdout', stdout_path, tee=True): start_time = time.time() print("Entered _RunTrainingLoop at: ") print(datetime.datetime.now()) os.nice(10) print("Sampled values: ") print(new) print("Base config: ") print(self.base_config) exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split()) config = get_default_config() config.update(self.base_config) config.update(new) config.update( start_tensorboard=False, show_plots=False, update_latest=False, git_record_mode='none', in_parallel_session=True, # Need these present so that they're are picked up when we get args from command line. local_experiments_dir='', backup_dir='', env_name='', max_time=0, ) with config: # This is used for passing args 'local_experiments_dir', 'backup_dir', 'env_name', and 'max_time' cfg.update_from_command_line(strict=False) from dps.train import training_loop result = training_loop(exp_name=exp_name, start_time=start_time) print("Leaving _RunTrainingLoop at: ") print(datetime.datetime.now()) return result
def test_stage_hook(test_config): """ Test that we can safely use hooks to add new stages. """ config = DEFAULT_CONFIG.copy() config.update(simple_addition.config) config.update(reinforce_config) config.update( max_steps=11, eval_step=10, n_train=100, seed=100, hooks=[DummyHook(3, dict(max_steps=21))], curriculum=[dict()], width=1, ) config.update(test_config) with config: data = training_loop() assert data.n_stages == 3 assert not data.history[0]["stage_config"] assert data.history[1]["stage_config"]["max_steps"] == 21 assert data.history[2]["stage_config"]["max_steps"] == 21
def __call__(self, new): start_time = time.time() print("Entered _RunTrainingLoop at: ") print(datetime.datetime.now()) os.nice(10) print("Sampled values: ") print(new) print("Base config: ") print(self.base_config) exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split()) dps.reset_config() config = DEFAULT_CONFIG.copy() config.update(self.base_config) config.update(new) config.update( start_tensorboard=False, show_plots=False, ) with config: cfg.update_from_command_line() from dps.train import training_loop result = training_loop(exp_name=exp_name, start_time=start_time) print("Leaving _RunTrainingLoop at: ") print(datetime.datetime.now()) return result
def run_experiment(name, config, readme, distributions=None, durations=None, alg=None, task="grid", name_variables=None, env_kwargs=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() parser.add_argument("duration", choices=list(durations.keys()) + ["local"]) args, _ = parser.parse_known_args() _config = DEFAULT_CONFIG.copy() env_kwargs = env_kwargs or {} env_kwargs['task'] = task env_config = get_env_config(**env_kwargs) _config.update(env_config) if alg: alg_config = getattr(alg_module, "{}_config".format(alg)) _config.update(alg_config) alg_name = sanitize(alg_config.alg_name) else: alg_name = "" _config.update(config) _config.update_from_command_line() _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name)) if args.duration == "local": _config.exp_name = "alg={}".format(alg_name) with _config: return training_loop() else: run_kwargs = Config( kind="slurm", pmem=5000, ignore_gpu=False, ) duration_args = durations[args.duration] if 'config' in duration_args: _config.update(duration_args['config']) del duration_args['config'] run_kwargs.update(durations[args.duration]) run_kwargs.update_from_command_line() if name_variables is not None: name_variables_str = "_".join("{}={}".format( sanitize(str(k)), sanitize(str(getattr(_config, k)))) for k in name_variables.split(",")) _config.env_name = "{}_{}".format(_config.env_name, name_variables_str) exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name, args.duration) build_and_submit(name=exp_name, config=_config, distributions=distributions, **run_kwargs)
def _raw_run(config): with config: return training_loop()
def run_experiment(name, base_config, readme, distributions=None, durations=None, name_variables=None, alg_configs=None, env_configs=None, late_config=None, cl_mode='lax', run_kwargs_base=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() if env_configs is not None: parser.add_argument('env') if alg_configs is not None: parser.add_argument('alg') parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?") args, _ = parser.parse_known_args() config = get_default_config() config.update(base_config) if env_configs is not None: env_config = env_configs[args.env] config.update(env_config) if alg_configs is not None: alg_config = alg_configs[args.alg] config.update(alg_config) if late_config is not None: config.update(late_config) env_name = sanitize(config.get('env_name', '')) alg_name = sanitize(config.get("alg_name", "")) run_kwargs = Config( kind="slurm", ignore_gpu=False, ) if run_kwargs_base is not None: run_kwargs.update(run_kwargs_base) if args.duration == "local": run_kwargs.update(durations.get('local', {})) else: run_kwargs.update(durations[args.duration]) if 'config' in run_kwargs: config.update(run_kwargs.config) del run_kwargs['config'] if cl_mode is not None: if cl_mode == 'strict': config.update_from_command_line(strict=True) elif cl_mode == 'lax': config.update_from_command_line(strict=False) else: raise Exception("Unknown value for cl_mode: {}".format(cl_mode)) if args.duration == "local": config.exp_name = "alg={}".format(alg_name) with config: return training_loop() else: if 'distributions' in run_kwargs: distributions = run_kwargs['distributions'] del run_kwargs['distributions'] if name_variables is not None: name_variables_str = "_".join( "{}={}".format(sanitize(k), sanitize(getattr(config, k))) for k in name_variables.split(",")) env_name = "{}_{}".format(env_name, name_variables_str) config.env_name = env_name exp_name = "env={}_alg={}_duration={}".format(env_name, alg_name, args.duration) init() build_and_submit(name, exp_name, config, distributions=distributions, **run_kwargs)
def build_and_submit(category, exp_name, config, distributions, n_param_settings=0, n_repeats=1, do_local_test=False, kind="local", readme="", tasks_per_gpu=1, **run_kwargs): """ Build a job and submit it. Meant to be called from within a script. Parameters ---------- category: str High-level category of the experiment. Determines the ExperimentStore where the experiment data will be stored. exp_name: str Low-level name of the experiment. config: Config instance or dict Configuration to use as the base config for all jobs. distributions: dict Object used to generate variations of the base config (so that different jobs test different parameters). n_param_settings: int Number of different configurations to sample from `distributions`. If not supplied, it is assumed that `distributions` actually specifies a grid search, and an attempt is made to generate all possible configurations int that grid search. n_repeats: int Number of experiments to run (with different random seeds) for each generated configuration. do_local_test: bool If True, sample one of the generated configurations and use it to run a short test locally, to ensure that the jobs will run properly. kind: str One of pbs, slurm, slurm-local, parallel, local. Specifies which method should be used to run the jobs in parallel. readme: str A string outlining the purpose/context for the created experiment. **run_kwargs: Additional arguments that are ultimately passed to `ParallelSession` in order to run the job. """ # Get run_kwargs from command line sig = inspect.signature(ParallelSession.__init__) default_run_kwargs = sig.bind_partial() default_run_kwargs.apply_defaults() cl_run_kwargs = clify.command_line(default_run_kwargs.arguments).parse() run_kwargs.update(cl_run_kwargs) if config.seed is None or config.seed < 0: config.seed = gen_seed() assert kind in "pbs slurm slurm-local parallel local".split() assert 'build_command' not in config config['build_command'] = ' '.join(sys.argv) print(config['build_command']) if kind == "local": with config: from dps.train import training_loop return training_loop() else: config.name = category config = config.copy() if readme == "_vim_": readme = edit_text(prefix="dps_readme_", editor="vim", initial_text="README.md: \n") scratch = os.path.join(cfg.parallel_experiments_build_dir, category) archive_path, n_tasks = build_search(scratch, exp_name, distributions, config, add_date=1, _zip=True, do_local_test=do_local_test, n_param_settings=n_param_settings, n_repeats=n_repeats, readme=readme) run_kwargs.update(archive_path=archive_path, category=category, exp_name=exp_name, kind=kind) gpu_kind = run_kwargs.get('gpu_kind', None) resources = compute_required_resources(n_tasks, tasks_per_gpu, gpu_kind) run_kwargs.update(resources) parallel_session = submit_job(**run_kwargs) return parallel_session
x = int(sys.argv[1]) if x == 0: print("TRPE") config.update(name="TRPE", delta_schedule='0.01', max_cg_steps=10, max_line_search_steps=10, alg_class=TrustRegionPolicyEvaluation) elif x == 1: print("PPE") config.update(name="PPE", optimizer_spec="rmsprop", lr_schedule="1e-2", epsilon=0.2, opt_steps_per_update=100, S=1, alg_class=ProximalPolicyEvaluation) else: print("PE") config.update(name="PolicyEvaluation", optimizer_spec='rmsprop', lr_schedule='1e-5', opt_steps_per_update=100, alg_class=PolicyEvaluation) with config: cfg.update_from_command_line() training_loop()
def run_experiment( name, base_config, readme, distributions=None, durations=None, name_variables=None, alg_configs=None, env_configs=None, late_config=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() if env_configs is not None: parser.add_argument('env') if alg_configs is not None: parser.add_argument('alg') parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?") args, _ = parser.parse_known_args() config = DEFAULT_CONFIG.copy() config.update(base_config) if env_configs is not None: env_config = env_configs[args.env] config.update(env_config) if alg_configs is not None: alg_config = alg_configs[args.alg] config.update(alg_config) if late_config is not None: config.update(late_config) config.update_from_command_line() env_name = sanitize(config.get('env_name', '')) if name: config.env_name = "{}_env={}".format(name, env_name) else: config.env_name = "env={}".format(env_name) alg_name = sanitize(config.get("alg_name", "")) if args.duration == "local": config.exp_name = "alg={}".format(alg_name) with config: return training_loop() run_kwargs = Config( kind="slurm", pmem=5000, ignore_gpu=False, ) duration_args = durations[args.duration] if 'config' in duration_args: config.update(duration_args['config']) del duration_args['config'] run_kwargs.update(durations[args.duration]) run_kwargs.update_from_command_line() if name_variables is not None: name_variables_str = "_".join( "{}={}".format(sanitize(str(k)), sanitize(str(getattr(config, k)))) for k in name_variables.split(",")) config.env_name = "{}_{}".format(config.env_name, name_variables_str) exp_name = "{}_alg={}_duration={}".format(config.env_name, alg_name, args.duration) build_and_submit(name=exp_name, config=config, distributions=distributions, **run_kwargs)