def __call__(self, inp): import os import datetime import dps from dps import cfg # noqa from dps.config import DEFAULT_CONFIG from dps.utils import ExperimentStore os.nice(10) print("Entered _BuildDataset at: ") print(datetime.datetime.now()) idx, seed, n_examples = inp print("idx: {}, seed: {}, n_examples: {}".format(idx, seed, n_examples)) dps.reset_config() params = self.params.copy() params.update(seed=seed, n_examples=n_examples) with DEFAULT_CONFIG.copy(): cfg.update_from_command_line() print(cfg) experiment_store = ExperimentStore(os.path.join(cfg.local_experiments_dir, cfg.env_name)) exp_dir = experiment_store.new_experiment("", seed, add_date=1, force_fresh=1, update_latest=False) params["data_dir"] = exp_dir.path print(params) self.cls(**params) print("Leaving _BuildDataset at: ") print(datetime.datetime.now())
def _run(env_str, alg_str, _config=None, **kwargs): env_config, alg_config = parse_env_alg(env_str, alg_str) config = DEFAULT_CONFIG.copy() config.update(alg_config) config.update(env_config) if _config is not None: config.update(_config) config.update(kwargs) with config: cfg.update_from_command_line() return training_loop()
def __call__(self, new): import os stdout_path = f"./stdout_pid={os.getpid()}" with redirect_stream('stdout', stdout_path, tee=True): start_time = time.time() print("Entered _RunTrainingLoop at: ") print(datetime.datetime.now()) os.nice(10) print("Sampled values: ") print(new) print("Base config: ") print(self.base_config) exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split()) config = get_default_config() config.update(self.base_config) config.update(new) config.update( start_tensorboard=False, show_plots=False, update_latest=False, git_record_mode='none', in_parallel_session=True, # Need these present so that they're are picked up when we get args from command line. local_experiments_dir='', backup_dir='', env_name='', max_time=0, ) with config: # This is used for passing args 'local_experiments_dir', 'backup_dir', 'env_name', and 'max_time' cfg.update_from_command_line(strict=False) from dps.train import training_loop result = training_loop(exp_name=exp_name, start_time=start_time) print("Leaving _RunTrainingLoop at: ") print(datetime.datetime.now()) return result
def __call__(self, new): start_time = time.time() print("Entered _RunTrainingLoop at: ") print(datetime.datetime.now()) os.nice(10) print("Sampled values: ") print(new) print("Base config: ") print(self.base_config) exp_name = '_'.join("{}={}".format(k, new[k]) for k in 'idx repeat'.split()) dps.reset_config() config = DEFAULT_CONFIG.copy() config.update(self.base_config) config.update(new) config.update( start_tensorboard=False, show_plots=False, ) with config: cfg.update_from_command_line() from dps.train import training_loop result = training_loop(exp_name=exp_name, start_time=start_time) print("Leaving _RunTrainingLoop at: ") print(datetime.datetime.now()) return result
from dps import cfg from dps.utils import Config from auto_yolo.envs import run_experiment if __name__ == "__main__": _config = Config() with _config: cfg.update_from_command_line() run_experiment("local_run", _config, "")
def build_and_submit( name, config, distributions, n_param_settings=0, n_repeats=1, do_local_test=False, kind="local", readme="", **run_kwargs): """ Build a job and submit it. Meant to be called from within a script. Parameters ---------- name: str Name of the experiment. config: Config instance or dict Configuration to use as the base config for all jobs. distributions: dict Object used to generate variations of the base config (so that different jobs test different parameters). n_param_settings: int Number of different configurations to sample from `distributions`. If not supplied, it is assumed that `distributions` actually specifies a grid search, and an attempt is made to generate all possible configurations int that grid search. n_repeats: int Number of experiments to run (with different random seeds) for each generated configuration. do_local_test: bool If True, sample one of the generated configurations and use it to run a short test locally, to ensure that the jobs will run properly. kind: str One of pbs, slurm, slurm-local, parallel, local. Specifies which method should be used to run the jobs in parallel. readme: str A string outlining the purpose/context for the created search. **run_kwargs: Additional arguments that are ultimately passed to `ParallelSession` in order to run the job. """ # Get run_kwargs from command line sig = inspect.signature(ParallelSession.__init__) default_run_kwargs = sig.bind_partial() default_run_kwargs.apply_defaults() cl_run_kwargs = clify.command_line(default_run_kwargs.arguments).parse() run_kwargs.update(cl_run_kwargs) if config.seed is None or config.seed < 0: config.seed = gen_seed() assert kind in "pbs slurm slurm-local parallel local".split() assert 'build_command' not in config config['build_command'] = ' '.join(sys.argv) print(config['build_command']) with config: cfg.update_from_command_line() if kind == "local": with config: from dps.train import training_loop return training_loop() else: config.name = name config = config.copy( start_tensorboard=False, show_plots=False, ) if readme == "_vim_": readme = edit_text( prefix="dps_readme_", editor="vim", initial_text="README.md: \n") archive_path = build_search( cfg.parallel_experiments_build_dir, name, distributions, config, add_date=1, _zip=True, do_local_test=do_local_test, n_param_settings=n_param_settings, n_repeats=n_repeats, readme=readme) run_kwargs.update( archive_path=archive_path, name=name, kind=kind, parallel_exe=cfg.parallel_exe) parallel_session = submit_job(**run_kwargs) return parallel_session