def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, "Singleton"], ): Singleton.set_state(singleton_state) configure_log(self.config.hydra.job_logging, self.config.hydra.verbose) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables if "SLURM_JOB_ID" in os.environ: job.id = os.environ["SLURM_JOB_ID"] else: job.id = job_id sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def launch_jobs(temp_dir: str) -> None: runs = [] with open(os.path.join(temp_dir, JOB_SPEC_PICKLE), "rb") as f: job_spec = pickle.load(f) # nosec singleton_state = job_spec["singleton_state"] sweep_configs = job_spec["sweep_configs"] task_function = job_spec["task_function"] instance_id = _get_instance_id() sweep_dir = None for sweep_config in sweep_configs: with open_dict(sweep_config): sweep_config.hydra.job.id = ( f"{instance_id}_{sweep_config.hydra.job.num}" ) setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) ray_init_cfg = sweep_config.hydra.launcher.ray_init_cfg ray_remote_cfg = sweep_config.hydra.launcher.ray_remote_cfg if not sweep_dir: sweep_dir = Path(str(HydraConfig.get().sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) start_ray(ray_init_cfg) ray_obj = launch_job_on_ray( ray_remote_cfg, sweep_config, task_function, singleton_state ) runs.append(ray_obj) result = [ray.get(run) for run in runs] _dump_job_return(result, temp_dir)
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, Singleton], ) -> JobReturn: # lazy import to ensure plugin discovery remains fast import submitit assert self.config_loader is not None assert self.config is not None assert self.task_function is not None Singleton.set_state(singleton_state) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables job.id = submitit.JobEnvironment().job_id # type: ignore sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info(f"Launching {len(job_overrides)} jobs locally") runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def test_sweep_config_cache(self, hydra_restore_singletons: Any, path: str, monkeypatch: Any) -> None: setup_globals() config_loader = ConfigLoaderImpl( config_search_path=create_config_search_path(path)) master_cfg = config_loader.load_configuration( config_name="config.yaml", strict=False, overrides=["time='${now:%H-%M-%S}'", "home='${env:HOME}'"], run_mode=RunMode.RUN, ) # trigger resolution by type assertion assert type(master_cfg.time) == str assert type(master_cfg.home) == str master_cfg_cache = OmegaConf.get_cache(master_cfg) assert "now" in master_cfg_cache.keys( ) and "env" in master_cfg_cache.keys() assert master_cfg.home == os.environ["HOME"] sweep_cfg = config_loader.load_sweep_config( master_config=master_cfg, sweep_overrides=["time='${now:%H-%M-%S}'", "home='${env:HOME}'"], ) sweep_cfg_cache = OmegaConf.get_cache(sweep_cfg) assert len( sweep_cfg_cache.keys()) == 1 and "now" in sweep_cfg_cache.keys() assert sweep_cfg_cache["now"] == master_cfg_cache["now"] monkeypatch.setenv("HOME", "/another/home/dir/") assert sweep_cfg.home == os.getenv("HOME")
def launch(self, job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs locally".format(len(job_overrides))) runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def dispatch_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel Note that Joblib's default backend runs isolated Python processes, see https://joblib.readthedocs.io/en/latest/parallel.html#shared-memory-semantics """ setup_globals() Singleton.set_state(singleton_state) log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides)))) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def launch( self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int ) -> Sequence[JobReturn]: setup_globals() assert self.hydra_context is not None assert self.config is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs on slurm".format(len(job_overrides))) runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides) ) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) log.info("\tJob name : {}".format(slurm_utils.resolve_name(sweep_config.slurm.job_name))) slurm_utils.write_slurm(sweep_config) slurm_utils.write_sh(sweep_config, " ".join(filter_overrides(overrides))) slurm_utils.launch_job(sweep_config) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) if sweep_config.wait: time.sleep(1) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.hydra_context is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Example Launcher(foo={self.foo}, bar={self.bar}) is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx # If your launcher is executing code in a different process, it is important to restore # the singleton state in the new process. # To do this, you will likely need to serialize the singleton state along with the other # parameters passed to the child process. # happening on this process (executing launcher) state = Singleton.get_state() # happening on the spawned process (executing task_function in run_job) Singleton.set_state(state) ret = run_job( hydra_context=self.hydra_context, task_function=self.task_function, config=sweep_config, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch( launcher: RayAWSLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None setup_commands = launcher.env_setup.commands with read_write(setup_commands): setup_commands.extend([ f"pip install {package}=={version}" for package, version in launcher.env_setup.pip_packages.items() ]) setup_commands.extend(launcher.ray_cfg.cluster.setup_commands) with read_write(launcher.ray_cfg.cluster): launcher.ray_cfg.cluster.setup_commands = setup_commands configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ") with tempfile.TemporaryDirectory() as local_tmp_dir: sweep_configs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides)) with open_dict(sweep_config): # job.id will be set on the EC2 instance before running the job. sweep_config.hydra.job.num = idx sweep_configs.append(sweep_config) _pickle_jobs( tmp_dir=local_tmp_dir, sweep_configs=sweep_configs, # type: ignore task_function=launcher.task_function, singleton_state=Singleton.get_state(), ) with tempfile.NamedTemporaryFile(suffix=".yaml", delete=False) as f: with open(f.name, "w") as file: OmegaConf.save(config=launcher.ray_cfg.cluster, f=file.name, resolve=True) launcher.ray_yaml_path = f.name log.info( f"Saving RayClusterConf in a temp yaml file: {launcher.ray_yaml_path}." ) return launch_jobs(launcher, local_tmp_dir, Path(HydraConfig.get().sweep.dir))
def test_py_version_resolver(hydra_restore_singletons: Any, monkeypatch: Any) -> Any: monkeypatch.setattr(sys, "version_info", (3, 7, 2)) utils.setup_globals() assert OmegaConf.create({"key": "${python_version:}"}).key == "3.7" assert OmegaConf.create({"key": "${python_version:major}"}).key == "3" assert OmegaConf.create({"key": "${python_version:minor}"}).key == "3.7" assert OmegaConf.create({"key": "${python_version:micro}"}).key == "3.7.2"
def __init__(self, task_name: str, config_loader: ConfigLoader) -> None: """ :param task_name: task name :param config_loader: config loader """ setup_globals() self.config_loader = config_loader JobRuntime().set("name", task_name)
def launch( launcher: RayAWSLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.hydra_context is not None assert launcher.task_function is not None setup_commands = launcher.env_setup.commands packages = filter( lambda x: x[1] is not None, launcher.env_setup.pip_packages.items() ) with read_write(setup_commands): setup_commands.extend( [f"pip install {package}=={version}" for package, version in packages] ) setup_commands.extend(launcher.ray_cfg.cluster.setup_commands) with read_write(launcher.ray_cfg.cluster): launcher.ray_cfg.cluster.setup_commands = setup_commands configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) logging_config = OmegaConf.to_container( launcher.logging, resolve=True, enum_to_str=True ) sdk.configure_logging(**logging_config) log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ") with tempfile.TemporaryDirectory() as local_tmp_dir: sweep_configs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.hydra_context.config_loader.load_sweep_config( launcher.config, list(overrides) ) with open_dict(sweep_config): # job.id will be set on the EC2 instance before running the job. sweep_config.hydra.job.num = idx sweep_configs.append(sweep_config) _pickle_jobs( tmp_dir=local_tmp_dir, hydra_context=launcher.hydra_context, sweep_configs=sweep_configs, # type: ignore task_function=launcher.task_function, singleton_state=Singleton.get_state(), ) return launch_jobs( launcher, local_tmp_dir, Path(launcher.config.hydra.sweep.dir) )
def launch( launcher: JoblibLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert launcher.config is not None assert launcher.task_function is not None assert launcher.hydra_context is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) # Joblib's backend is hard-coded to loky since the threading # backend is incompatible with Hydra joblib_cfg = launcher.joblib joblib_cfg["backend"] = "loky" process_joblib_cfg(joblib_cfg) log.info( "Joblib.Parallel({}) is launching {} jobs".format( ",".join([f"{k}={v}" for k, v in joblib_cfg.items()]), len(job_overrides), ) ) log.info("Launching jobs, sweep output dir : {}".format(sweep_dir)) for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides)))) singleton_state = Singleton.get_state() runs = Parallel(**joblib_cfg)( delayed(execute_job)( initial_job_idx + idx, overrides, launcher.hydra_context, launcher.config, launcher.task_function, singleton_state, ) for idx, overrides in enumerate(job_overrides) ) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs
def test_foo(restore_singletons: Any) -> Any: utils.setup_globals() config_loader = ConfigLoaderImpl( config_search_path=create_config_search_path( "pkg://hydra.test_utils.configs")) cfg = config_loader.load_configuration( config_name="accessing_hydra_config", overrides=[]) HydraConfig.instance().set_config(cfg) with open_dict(cfg): del cfg["hydra"] assert cfg.job_name == "UNKNOWN_NAME" assert cfg.config_name == "accessing_hydra_config"
def _run_job( sweep_config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) return run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", )
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"TaskSpooler Launcher is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] singleton_state = Singleton.get_state() for idx, overrides in enumerate(job_overrides): overrides = list(overrides) overrides.extend(self.hydra_overrides) overrides = tuple(overrides) ret = execute_job( initial_job_idx + idx, overrides, self.config_loader, self.config, self.task_function, singleton_state, self.cmd_prefix, self.tsp_prefix, ) runs.append(ret) time.sleep(self.time_between_submit) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) if self.tail_jobs: Parallel(n_jobs=len(job_overrides), backend='threading')( delayed(self.tail_job)(run.return_value) for run in runs) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( "Example Launcher(foo={}, bar={}) is launching {} jobs locally". format(self.foo, self.bar, len(job_overrides))) log.info("Sweep output dir : {}".format(sweep_dir)) runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = "job_id_for_{}".format(idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def __init__(self, task_name: str, config_loader: ConfigLoader) -> None: """ :param task_name: task name :param config_loader: config loader """ setup_globals() self.config_loader = config_loader for source in config_loader.get_sources(): # if specified, make sure main config search path exists if source.provider == "main": if not source.exists(""): raise MissingConfigException( missing_cfg_file=source.path, message=f"Primary config dir not found: {source}", ) JobRuntime().set("name", task_name)
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], cmd_prefix: str, tsp_prefix: str, ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) lst = " ".join(overrides) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) def tsp_task_function(task_cfg): working_dir = os.getcwd() cmd = f"{cmd_prefix} {lst}" log.info(f"\t#{idx} : {lst}") cmd = f"cd {hydra.utils.get_original_cwd()} && {cmd} hydra.run.dir={working_dir}" job_id = int(subprocess.check_output(cmd, shell=True).rstrip()) log.info( f"Submitted {idx} to TaskSpooler. View logs: {tsp_prefix} -t {job_id}" ) return job_id ret = run_job( config=sweep_config, task_function=tsp_task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) ret.id = ret.return_value return ret
def launch( launcher: RayLocalLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Ray Launcher is launching {len(job_overrides)} jobs, " f"sweep output dir: {sweep_dir}" ) start_ray(launcher.ray_init_cfg) runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides) ) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx ray_obj = launch_job_on_ray( launcher.ray_remote_cfg, sweep_config, launcher.task_function, Singleton.get_state(), ) runs.append(ray_obj) return [ray.get(run) for run in runs]
def launch(self, job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info("Joblib.Parallel({}) is launching {} jobs".format( ",".join([f"{k}={v}" for k, v in self.joblib.items()]), len(job_overrides), )) log.info("Launching jobs, sweep output dir : {}".format(sweep_dir)) singleton_state = Singleton.get_state() for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) runs = Parallel(**self.joblib)(delayed(execute_job)( idx, overrides, self.config_loader, self.config, self.task_function, singleton_state, ) for idx, overrides in enumerate(job_overrides)) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs
def test_sweep_config_cache(self, hydra_restore_singletons: Any, path: str, monkeypatch: Any) -> None: setup_globals() monkeypatch.setenv("TEST_ENV", "test_env") config_loader = ConfigLoaderImpl( config_search_path=create_config_search_path(path)) master_cfg = config_loader.load_configuration( config_name="config.yaml", overrides=[ "+time=${now:%H-%M-%S}", "+test_env=${oc.env:TEST_ENV}" ], run_mode=RunMode.RUN, ) # trigger resolution by type assertion assert type(master_cfg.time) == str assert type(master_cfg.test_env) == str master_cfg_cache = OmegaConf.get_cache(master_cfg) assert "now" in master_cfg_cache.keys() # oc.env is not cached as of OmegaConf 2.1 assert "oc.env" not in master_cfg_cache.keys() assert master_cfg.test_env == "test_env" sweep_cfg = config_loader.load_sweep_config( master_config=master_cfg, sweep_overrides=[ "+time=${now:%H-%M-%S}", "+test_env=${oc.env:TEST_ENV}" ], ) sweep_cfg_cache = OmegaConf.get_cache(sweep_cfg) assert len( sweep_cfg_cache.keys()) == 1 and "now" in sweep_cfg_cache.keys() assert sweep_cfg_cache["now"] == master_cfg_cache["now"] monkeypatch.setenv("TEST_ENV", "test_env2") assert sweep_cfg.test_env == "test_env2"
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """Implementation of Launcher.launch :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) logger.info( f"Local Launcher is launching {len(job_overrides)} jobs locally") logger.info(f"Launching jobs, sweep output dir : {sweep_dir}") for idx, overrides in enumerate(job_overrides): logger.info("\t#{} : {}".format( idx, " ".join(filter_overrides(overrides)))) results = [] workers = [] for i, overrides in enumerate(job_overrides): idx = initial_job_idx + i lst = " ".join(filter_overrides(overrides)) logger.info(f"\t#{idx} : {lst}") sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx p = Process(target=run_job, kwargs=dict(config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir")) p.start() workers.append(p) # wait for current/last batch of workers if ((i + 1) % self._n_jobs == 0) or ((i + 1) == len(job_overrides)): for w in workers: w.join() # forward exceptions from the workers if w.exception(): raise w.exception() # book keeping results.extend([p.result() for p in workers]) workers = [] assert len(results) == len(job_overrides) return results
def launch( launcher: RQLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> JobReturn: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) # RQ configuration rq_cfg = launcher.rq # Redis configuration is_async = not rq_cfg.redis.mock if is_async: connection = Redis( host=rq_cfg.redis.host, port=rq_cfg.redis.port, db=rq_cfg.redis.db, password=rq_cfg.redis.password, ) else: log.info("Running in synchronous mode") connection = FakeStrictRedis() queue = Queue( name=rq_cfg.queue, connection=connection, is_async=is_async, serializer=cloudpickle, ) # Enqueue jobs jobs = [] singleton_state = Singleton.get_state() log.info( f"RQ Launcher is enqueuing {len(job_overrides)} job(s) in queue : {rq_cfg.queue}" ) log.info("Sweep output dir : {}".format(sweep_dir)) if not sweep_dir.is_absolute(): log.warn( "Using relative sweep dir: Please be aware that dir will be relative to where workers are started from." ) for idx, overrides in enumerate(job_overrides): description = " ".join(filter_overrides(overrides)) enqueue_keywords = OmegaConf.to_container(rq_cfg.enqueue, resolve=True) if enqueue_keywords["job_timeout"] is None: enqueue_keywords["job_timeout"] = -1 if enqueue_keywords["result_ttl"] is None: enqueue_keywords["result_ttl"] = -1 if enqueue_keywords["failure_ttl"] is None: enqueue_keywords["failure_ttl"] = -1 if enqueue_keywords["job_id"] is None: enqueue_keywords["job_id"] = str(uuid.uuid4()) if enqueue_keywords["description"] is None: enqueue_keywords["description"] = description sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = enqueue_keywords["job_id"] sweep_config.hydra.job.num = initial_job_idx + idx job = queue.enqueue( execute_job, sweep_config=sweep_config, task_function=launcher.task_function, singleton_state=singleton_state, **enqueue_keywords, ) jobs.append(job) log.info(f"Enqueued {job.get_id()}") log.info(f"\t#{idx+1} : {description}") log.info("Finished enqueuing") if rq_cfg.stop_after_enqueue: raise StopAfterEnqueue log.info(f"Polling job statuses every {rq_cfg.wait_polling} sec") while True: job_ids_done = [ job.get_id() for job in jobs if job.get_status() in ["finished", "failed"] ] if len(job_ids_done) == len(jobs): break else: time.sleep(rq_cfg.wait_polling) runs = [] for job in jobs: result = job.result if job.result is not None else None runs.append(result) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs