def hydra_restore_singletons() -> None: """ Restore singletons state after the function returns """ state = copy.deepcopy(Singleton.get_state()) yield Singleton.set_state(state)
def launch_jobs(temp_dir: str) -> None: runs = [] with open(os.path.join(temp_dir, JOB_SPEC_PICKLE), "rb") as f: job_spec = pickle.load(f) # nosec singleton_state = job_spec["singleton_state"] sweep_configs = job_spec["sweep_configs"] task_function = job_spec["task_function"] instance_id = _get_instance_id() sweep_dir = None for sweep_config in sweep_configs: with open_dict(sweep_config): sweep_config.hydra.job.id = ( f"{instance_id}_{sweep_config.hydra.job.num}" ) setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) ray_init_cfg = sweep_config.hydra.launcher.ray_init_cfg ray_remote_cfg = sweep_config.hydra.launcher.ray_remote_cfg if not sweep_dir: sweep_dir = Path(str(HydraConfig.get().sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) start_ray(ray_init_cfg) ray_obj = launch_job_on_ray( ray_remote_cfg, sweep_config, task_function, singleton_state ) runs.append(ray_obj) result = [ray.get(run) for run in runs] _dump_job_return(result, temp_dir)
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, Singleton], ) -> JobReturn: # lazy import to ensure plugin discovery remains fast import submitit assert self.config_loader is not None assert self.config is not None assert self.task_function is not None Singleton.set_state(singleton_state) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables job.id = submitit.JobEnvironment().job_id # type: ignore sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, "Singleton"], ): Singleton.set_state(singleton_state) configure_log(self.config.hydra.job_logging, self.config.hydra.verbose) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables if "SLURM_JOB_ID" in os.environ: job.id = os.environ["SLURM_JOB_ID"] else: job.id = job_id sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def dispatch_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel Note that Joblib's default backend runs isolated Python processes, see https://joblib.readthedocs.io/en/latest/parallel.html#shared-memory-semantics """ setup_globals() Singleton.set_state(singleton_state) log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides)))) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.hydra_context is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Example Launcher(foo={self.foo}, bar={self.bar}) is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx # If your launcher is executing code in a different process, it is important to restore # the singleton state in the new process. # To do this, you will likely need to serialize the singleton state along with the other # parameters passed to the child process. # happening on this process (executing launcher) state = Singleton.get_state() # happening on the spawned process (executing task_function in run_job) Singleton.set_state(state) ret = run_job( hydra_context=self.hydra_context, task_function=self.task_function, config=sweep_config, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def restore_singletons() -> Any: """ A fixture to restore singletons state after this the function. This is useful for functions that are making a one-off change to singlestons that should not effect other tests """ state = copy.deepcopy(Singleton.get_state()) yield Singleton.set_state(state)
def hydra_restore_singletons() -> None: """ Restore singletons state after the function returns """ state = copy.deepcopy(Singleton.get_state()) resolvers = copy.deepcopy(BaseContainer._resolvers) yield Singleton.set_state(state) BaseContainer._resolvers = resolvers
def test_restore_singleton_state_hack() -> None: """ This is a hack that allow us to undo changes to the ConfigStore. During this test, the config store is being modified in Python imports. Python imports can only run once, so clearing the state during the tests will break The tests because it will not be reinitialized. A solution is to undo the changes after the last test. The reason this logic is in a test is that if it's outside it's being executed during Pytest's test collection phase, which is before the tests are dunning - so it does not solve the problem. """ Singleton.set_state(state)
def _run_job( sweep_config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) return run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", )
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], cmd_prefix: str, tsp_prefix: str, ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) lst = " ".join(overrides) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) def tsp_task_function(task_cfg): working_dir = os.getcwd() cmd = f"{cmd_prefix} {lst}" log.info(f"\t#{idx} : {lst}") cmd = f"cd {hydra.utils.get_original_cwd()} && {cmd} hydra.run.dir={working_dir}" job_id = int(subprocess.check_output(cmd, shell=True).rstrip()) log.info( f"Submitted {idx} to TaskSpooler. View logs: {tsp_prefix} -t {job_id}" ) return job_id ret = run_job( config=sweep_config, task_function=tsp_task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) ret.id = ret.return_value return ret
def test_singleton_get_state(hydra_restore_singletons: Any) -> None: s = Singleton.get_state() assert Plugins not in s["instances"] assert Plugins in Singleton._instances Singleton.set_state(s) assert Plugins in Singleton._instances