def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info(f"Launching {len(job_overrides)} jobs locally") runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def run( self, config_name: Optional[str], task_function: TaskFunction, overrides: List[str], with_log_configuration: bool = True, ) -> JobReturn: cfg = self.compose_config( config_name=config_name, overrides=overrides, with_log_configuration=with_log_configuration, run_mode=RunMode.RUN, ) callbacks = Callbacks(cfg) callbacks.on_run_start(config=cfg, config_name=config_name) ret = run_job( hydra_context=HydraContext(config_loader=self.config_loader, callbacks=callbacks), task_function=task_function, config=cfg, job_dir_key="hydra.run.dir", job_subdir_key=None, configure_logging=with_log_configuration, ) callbacks.on_run_end(config=cfg, config_name=config_name, job_return=ret) # access the result to trigger an exception in case the job failed. _ = ret.return_value return ret
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, Singleton], ) -> JobReturn: # lazy import to ensure plugin discovery remains fast import submitit assert self.config_loader is not None assert self.config is not None assert self.task_function is not None Singleton.set_state(singleton_state) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables job.id = submitit.JobEnvironment().job_id # type: ignore sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, "Singleton"], ): Singleton.set_state(singleton_state) configure_log(self.config.hydra.job_logging, self.config.hydra.verbose) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables if "SLURM_JOB_ID" in os.environ: job.id = os.environ["SLURM_JOB_ID"] else: job.id = job_id sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def dispatch_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel Note that Joblib's default backend runs isolated Python processes, see https://joblib.readthedocs.io/en/latest/parallel.html#shared-memory-semantics """ setup_globals() Singleton.set_state(singleton_state) log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides)))) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) return ret
def run( self, config_name: Optional[str], task_function: TaskFunction, overrides: List[str], with_log_configuration: bool = True, ) -> JobReturn: cfg = self.compose_config( config_name=config_name, overrides=overrides, with_log_configuration=with_log_configuration, run_mode=RunMode.RUN, ) ret = run_job( config=cfg, task_function=task_function, job_dir_key="hydra.run.dir", job_subdir_key=None, configure_logging=with_log_configuration, ) # access the result to trigger an exception in case the job failed. _ = ret.return_value return ret
def launch(self, job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs locally".format(len(job_overrides))) runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.hydra_context is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Example Launcher(foo={self.foo}, bar={self.bar}) is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx # If your launcher is executing code in a different process, it is important to restore # the singleton state in the new process. # To do this, you will likely need to serialize the singleton state along with the other # parameters passed to the child process. # happening on this process (executing launcher) state = Singleton.get_state() # happening on the spawned process (executing task_function in run_job) Singleton.set_state(state) ret = run_job( hydra_context=self.hydra_context, task_function=self.task_function, config=sweep_config, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def _run_job( sweep_config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], ) -> JobReturn: setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) return run_job( config=sweep_config, task_function=task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", )
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( "Example Launcher(foo={}, bar={}) is launching {} jobs locally". format(self.foo, self.bar, len(job_overrides))) log.info("Sweep output dir : {}".format(sweep_dir)) runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = "job_id_for_{}".format(idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def run( self, config_name: Optional[str], task_function: TaskFunction, overrides: List[str], ) -> JobReturn: cfg = self.compose_config(config_name=config_name, overrides=overrides, with_log_configuration=True) HydraConfig.instance().set_config(cfg) return run_job( config=cfg, task_function=task_function, job_dir_key="hydra.run.dir", job_subdir_key=None, )
def execute_job( idx: int, overrides: Sequence[str], config_loader: ConfigLoader, config: DictConfig, task_function: TaskFunction, singleton_state: Dict[Any, Any], cmd_prefix: str, tsp_prefix: str, ) -> JobReturn: """Calls `run_job` in parallel """ setup_globals() Singleton.set_state(singleton_state) lst = " ".join(overrides) sweep_config = config_loader.load_sweep_config(config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name, idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) def tsp_task_function(task_cfg): working_dir = os.getcwd() cmd = f"{cmd_prefix} {lst}" log.info(f"\t#{idx} : {lst}") cmd = f"cd {hydra.utils.get_original_cwd()} && {cmd} hydra.run.dir={working_dir}" job_id = int(subprocess.check_output(cmd, shell=True).rstrip()) log.info( f"Submitted {idx} to TaskSpooler. View logs: {tsp_prefix} -t {job_id}" ) return job_id ret = run_job( config=sweep_config, task_function=tsp_task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) ret.id = ret.return_value return ret
def run( self, config_name: Optional[str], task_function: TaskFunction, overrides: List[str], with_log_configuration: bool = True, ) -> JobReturn: cfg = self.compose_config( config_name=config_name, overrides=overrides, with_log_configuration=with_log_configuration, run_mode=RunMode.RUN, ) return run_job( config=cfg, task_function=task_function, job_dir_key="hydra.run.dir", job_subdir_key=None, configure_logging=with_log_configuration, )