def sweep_2_jobs(sweep_runner, overrides): """ Runs a sweep with two jobs """ overrides.append("a=0,1") sweep = sweep_runner( calling_file=None, calling_module="hydra.test_utils.a_module", config_path="configs/compose.yaml", overrides=overrides, ) base = OmegaConf.create({"foo": 10, "bar": 100, "a": 0}) with sweep: temp_dir = Path(sweep.temp_dir) assert len(sweep.returns[0]) == 2 for i in range(2): job_ret = sweep.returns[0][i] expected_conf = OmegaConf.merge( base, OmegaConf.from_dotlist(job_ret.overrides) ) assert job_ret.overrides == ["a={}".format(i)] assert job_ret.cfg == expected_conf assert job_ret.hydra_cfg.hydra.job.name == "a_module" verify_dir_outputs(job_ret, job_ret.overrides) path = temp_dir / str(i) assert path.exists(), "'{}' does not exist, dirs: {}".format( path, [x for x in temp_dir.iterdir() if x.is_dir()] )
def run_job(config, task_function, job_dir_key, job_subdir_key): old_cwd = os.getcwd() working_dir = str(config.select(job_dir_key)) if job_subdir_key is not None: # evaluate job_subdir_key lazily. # this is running on the client side in sweep and contains things such as job:id which # are only available there. subdir = str(config.select(job_subdir_key)) working_dir = os.path.join(working_dir, subdir) try: ret = JobReturn() ret.working_dir = working_dir task_cfg = copy.deepcopy(config) # TODO: update this after https://github.com/omry/omegaconf/issues/42 is resolved hydra_cfg = OmegaConf.create({"hydra": task_cfg["hydra"]}) del task_cfg["hydra"] ret.cfg = task_cfg ret.hydra_cfg = copy.deepcopy(HydraConfig()) ret.overrides = config.hydra.overrides.task.to_container() # handle output directories here Path(str(working_dir)).mkdir(parents=True, exist_ok=True) os.chdir(working_dir) hydra_output = Path(hydra_cfg.hydra.output_subdir) configure_log(hydra_cfg.hydra.job_logging, hydra_cfg.hydra.verbose) _save_config(task_cfg, "config.yaml", hydra_output) _save_config(hydra_cfg, "hydra.yaml", hydra_output) _save_config(config.hydra.overrides.task, "overrides.yaml", hydra_output) ret.return_value = task_function(task_cfg) ret.task_name = JobRuntime().get("name") return ret finally: os.chdir(old_cwd)
def test_to_absolute_path(orig_cwd, path, expected): # normalize paths to current OSg orig_cwd = str(Path(orig_cwd)) path = str(Path(path)) expected = str(Path(expected)) cfg = OmegaConf.create({"hydra": {"runtime": {"cwd": orig_cwd}}}) HydraConfig().set_config(cfg) assert utils.to_absolute_path(path) == expected
def test_interpolating_dir_hydra_to_app( task_runner, calling_file, calling_module # noqa: F811 ): basedir = "foo" with task_runner( calling_file=calling_file, calling_module=calling_module, config_path="config.yaml", overrides=["experiment.base_dir=" + basedir], ) as task: path = Path(task.temp_dir) / basedir assert path.exists()
def to_absolute_path(path): """ converts the specified path to be absolute path. if the input path is relative, it's interpreted as relative to the original working directory if it's absolute, it's returned as is :param path: :return: """ path = Path(path) if path.is_absolute(): ret = path else: ret = Path(get_original_cwd()) / path return str(ret)
def launch(self, job_overrides): setup_globals() configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs locally".format(len(job_overrides))) runs = [] for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch(self, job_overrides): """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info("Ray Launcher is launching {} jobs locally".format( len(job_overrides))) log.info("Sweep output dir : {}".format(sweep_dir)) runs = [] for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig().set_config(sweep_config) ray_remote_cfg = get_key(self.config, 'ray.remote') if ray_remote_cfg: run_job_ray = ray.remote(**ray_remote_cfg)(launch) else: run_job_ray = ray.remote(launch) ret = run_job_ray.remote( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return [ray.get(run) for run in runs]
def launch(self, job_overrides): setup_globals() configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) LOGGER.info("Launching {} jobs locally".format(len(job_overrides))) def run_task(job): idx, overrides = job LOGGER.info("\t#{} : {}".format( idx, " ".join(filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # id is concatenated overrides here sweep_config.hydra.job.id = '_'.join(sorted(overrides)) sweep_config.hydra.job.num = idx HydraConfig().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return (idx, ret) if self.ncpu > 1: jobs = list(enumerate(job_overrides)) runs = sorted([ ret for ret in MPI( jobs=jobs, func=run_task, ncpu=int(self.ncpu), batch=1) ]) runs = [i[1] for i in runs] else: runs = [run_task(job)[1] for job in enumerate(job_overrides)] return runs
def _save_config(cfg, filename, output_dir): Path(str(output_dir)).mkdir(parents=True, exist_ok=True) with open(str(output_dir / filename), "w") as file: file.write(cfg.pretty())
def create_files(in_files): for f in in_files: dirname = os.path.dirname(f) if dirname != "": Path.mkdir(dirname, parents=True) Path(f).touch(exist_ok=True)