def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info(f"Launching {len(job_overrides)} jobs locally") runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def compose_config( self, config_name: Optional[str], overrides: List[str], run_mode: RunMode, with_log_configuration: bool = False, from_shell: bool = True, validate_sweep_overrides: bool = True, ) -> DictConfig: """ :param config_name: :param overrides: :param run_mode: compose config for run or for multirun? :param with_log_configuration: True to configure logging subsystem from the loaded config :param from_shell: True if the parameters are passed from the shell. used for more helpful error messages :return: """ cfg = self.config_loader.load_configuration( config_name=config_name, overrides=overrides, run_mode=run_mode, from_shell=from_shell, validate_sweep_overrides=validate_sweep_overrides, ) if with_log_configuration: configure_log(cfg.hydra.hydra_logging, cfg.hydra.verbose) global log log = logging.getLogger(__name__) self._print_debug_info(config_name, overrides, run_mode) return cfg
def compose_config( self, config_name: Optional[str], overrides: List[str], run_mode: RunMode, strict: Optional[bool] = None, with_log_configuration: bool = False, from_shell: bool = True, ) -> DictConfig: """ :param config_name: :param overrides: :param run_mode: compose config for run or for multirun? :param with_log_configuration: True to configure logging subsystem from the loaded config :param strict: None for default behavior (default to true for config file, false if no config file). otherwise forces specific behavior. :param from_shell: True if the parameters are passed from the shell. used for more helpful error messages :return: """ cfg = self.config_loader.load_configuration( config_name=config_name, overrides=overrides, strict=strict, run_mode=run_mode, from_shell=from_shell, ) if with_log_configuration: configure_log(cfg.hydra.hydra_logging, cfg.hydra.verbose) global log log = logging.getLogger(__name__) self._print_debug_info(cfg) return cfg
def __call__( self, sweep_overrides: List[str], job_dir_key: str, job_num: int, job_id: str, singleton_state: Dict[type, "Singleton"], ): Singleton.set_state(singleton_state) configure_log(self.config.hydra.job_logging, self.config.hydra.verbose) setup_globals() sweep_config = self.config_loader.load_sweep_config( self.config, sweep_overrides) with open_dict(sweep_config.hydra.job) as job: # Populate new job variables if "SLURM_JOB_ID" in os.environ: job.id = os.environ["SLURM_JOB_ID"] else: job.id = job_id sweep_config.hydra.job.num = job_num return run_job( config=sweep_config, task_function=self.task_function, job_dir_key=job_dir_key, job_subdir_key="hydra.sweep.subdir", )
def compose_config( self, config_name: Optional[str], overrides: List[str], strict: Optional[bool] = None, with_log_configuration: bool = False, ) -> DictConfig: """ :param self: :param config_name: :param overrides: :param with_log_configuration: True to configure logging subsystem from the loaded config :param strict: None for default behavior (default to true for config file, false if no config file). otherwise forces specific behavior. :return: """ cfg = self.config_loader.load_configuration(config_name=config_name, overrides=overrides, strict=strict) with open_dict(cfg): from hydra import __version__ cfg.hydra.runtime.version = __version__ cfg.hydra.runtime.cwd = os.getcwd() if with_log_configuration: configure_log(cfg.hydra.hydra_logging, cfg.hydra.verbose) global log log = logging.getLogger(__name__) self._print_debug_info() return cfg
def launch( self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int ) -> Sequence[JobReturn]: setup_globals() assert self.hydra_context is not None assert self.config is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs on slurm".format(len(job_overrides))) runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides) ) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) log.info("\tJob name : {}".format(slurm_utils.resolve_name(sweep_config.slurm.job_name))) slurm_utils.write_slurm(sweep_config) slurm_utils.write_sh(sweep_config, " ".join(filter_overrides(overrides))) slurm_utils.launch_job(sweep_config) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) if sweep_config.wait: time.sleep(1) return runs
def launch(self, job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]: setup_globals() assert self.config is not None assert self.task_function is not None assert self.config_loader is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) log.info("Launching {} jobs locally".format(len(job_overrides))) runs: List[JobReturn] = [] for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = idx sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.hydra_context is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Example Launcher(foo={self.foo}, bar={self.bar}) is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx lst = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {lst}") sweep_config = self.hydra_context.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx # If your launcher is executing code in a different process, it is important to restore # the singleton state in the new process. # To do this, you will likely need to serialize the singleton state along with the other # parameters passed to the child process. # happening on this process (executing launcher) state = Singleton.get_state() # happening on the spawned process (executing task_function in run_job) Singleton.set_state(state) ret = run_job( hydra_context=self.hydra_context, task_function=self.task_function, config=sweep_config, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch( launcher: RayAWSLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None setup_commands = launcher.env_setup.commands with read_write(setup_commands): setup_commands.extend([ f"pip install {package}=={version}" for package, version in launcher.env_setup.pip_packages.items() ]) setup_commands.extend(launcher.ray_cfg.cluster.setup_commands) with read_write(launcher.ray_cfg.cluster): launcher.ray_cfg.cluster.setup_commands = setup_commands configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ") with tempfile.TemporaryDirectory() as local_tmp_dir: sweep_configs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides)) with open_dict(sweep_config): # job.id will be set on the EC2 instance before running the job. sweep_config.hydra.job.num = idx sweep_configs.append(sweep_config) _pickle_jobs( tmp_dir=local_tmp_dir, sweep_configs=sweep_configs, # type: ignore task_function=launcher.task_function, singleton_state=Singleton.get_state(), ) with tempfile.NamedTemporaryFile(suffix=".yaml", delete=False) as f: with open(f.name, "w") as file: OmegaConf.save(config=launcher.ray_cfg.cluster, f=file.name, resolve=True) launcher.ray_yaml_path = f.name log.info( f"Saving RayClusterConf in a temp yaml file: {launcher.ray_yaml_path}." ) return launch_jobs(launcher, local_tmp_dir, Path(HydraConfig.get().sweep.dir))
def launch( launcher: RayAWSLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.hydra_context is not None assert launcher.task_function is not None setup_commands = launcher.env_setup.commands packages = filter( lambda x: x[1] is not None, launcher.env_setup.pip_packages.items() ) with read_write(setup_commands): setup_commands.extend( [f"pip install {package}=={version}" for package, version in packages] ) setup_commands.extend(launcher.ray_cfg.cluster.setup_commands) with read_write(launcher.ray_cfg.cluster): launcher.ray_cfg.cluster.setup_commands = setup_commands configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) logging_config = OmegaConf.to_container( launcher.logging, resolve=True, enum_to_str=True ) sdk.configure_logging(**logging_config) log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ") with tempfile.TemporaryDirectory() as local_tmp_dir: sweep_configs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.hydra_context.config_loader.load_sweep_config( launcher.config, list(overrides) ) with open_dict(sweep_config): # job.id will be set on the EC2 instance before running the job. sweep_config.hydra.job.num = idx sweep_configs.append(sweep_config) _pickle_jobs( tmp_dir=local_tmp_dir, hydra_context=launcher.hydra_context, sweep_configs=sweep_configs, # type: ignore task_function=launcher.task_function, singleton_state=Singleton.get_state(), ) return launch_jobs( launcher, local_tmp_dir, Path(launcher.config.hydra.sweep.dir) )
def launch( launcher: JoblibLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert launcher.config is not None assert launcher.task_function is not None assert launcher.hydra_context is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) # Joblib's backend is hard-coded to loky since the threading # backend is incompatible with Hydra joblib_cfg = launcher.joblib joblib_cfg["backend"] = "loky" process_joblib_cfg(joblib_cfg) log.info( "Joblib.Parallel({}) is launching {} jobs".format( ",".join([f"{k}={v}" for k, v in joblib_cfg.items()]), len(job_overrides), ) ) log.info("Launching jobs, sweep output dir : {}".format(sweep_dir)) for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides)))) singleton_state = Singleton.get_state() runs = Parallel(**joblib_cfg)( delayed(execute_job)( initial_job_idx + idx, overrides, launcher.hydra_context, launcher.config, launcher.task_function, singleton_state, ) for idx, overrides in enumerate(job_overrides) ) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"TaskSpooler Launcher is launching {len(job_overrides)} jobs locally" ) log.info(f"Sweep output dir : {sweep_dir}") runs = [] singleton_state = Singleton.get_state() for idx, overrides in enumerate(job_overrides): overrides = list(overrides) overrides.extend(self.hydra_overrides) overrides = tuple(overrides) ret = execute_job( initial_job_idx + idx, overrides, self.config_loader, self.config, self.task_function, singleton_state, self.cmd_prefix, self.tsp_prefix, ) runs.append(ret) time.sleep(self.time_between_submit) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) if self.tail_jobs: Parallel(n_jobs=len(job_overrides), backend='threading')( delayed(self.tail_job)(run.return_value) for run in runs) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( "Example Launcher(foo={}, bar={}) is launching {} jobs locally". format(self.foo, self.bar, len(job_overrides))) log.info("Sweep output dir : {}".format(sweep_dir)) runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = "job_id_for_{}".format(idx) sweep_config.hydra.job.num = idx HydraConfig.instance().set_config(sweep_config) ret = run_job( config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir", ) runs.append(ret) # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job. # This is needed for launchers that calls run_job in the same process and not spawn a new one. configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) return runs
def launch( launcher: RayLocalLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> Sequence[JobReturn]: setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info( f"Ray Launcher is launching {len(job_overrides)} jobs, " f"sweep output dir: {sweep_dir}" ) start_ray(launcher.ray_init_cfg) runs = [] for idx, overrides in enumerate(job_overrides): idx = initial_job_idx + idx ostr = " ".join(filter_overrides(overrides)) log.info(f"\t#{idx} : {ostr}") sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides) ) with open_dict(sweep_config): # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance) # In that case, it will not be available here because we are still in the main process. # but instead should be populated remotely before calling the task_function. sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx ray_obj = launch_job_on_ray( launcher.ray_remote_cfg, sweep_config, launcher.task_function, Singleton.get_state(), ) runs.append(ray_obj) return [ray.get(run) for run in runs]
def launch(self, job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = Path(str(self.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) log.info("Joblib.Parallel({}) is launching {} jobs".format( ",".join([f"{k}={v}" for k, v in self.joblib.items()]), len(job_overrides), )) log.info("Launching jobs, sweep output dir : {}".format(sweep_dir)) singleton_state = Singleton.get_state() for idx, overrides in enumerate(job_overrides): log.info("\t#{} : {}".format(idx, " ".join( filter_overrides(overrides)))) runs = Parallel(**self.joblib)(delayed(execute_job)( idx, overrides, self.config_loader, self.config, self.task_function, singleton_state, ) for idx, overrides in enumerate(job_overrides)) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs
def compose_config( self, config_name: Optional[str], overrides: List[str], strict: Optional[bool] = None, with_log_configuration: bool = False, ) -> DictConfig: """ :param self: :param config_name: :param overrides: :param with_log_configuration: True to configure logging subsystem from the loaded config :param strict: None for default behavior (default to true for config file, false if no config file). otherwise forces specific behavior. :return: """ for source in self.config_loader.get_sources(): # if specified, make sure main config search path exists if source.provider == "main": if not source.exists(""): raise MissingConfigException( missing_cfg_file=source.path, message=f"Primary config dir not found: {source}", ) cfg = self.config_loader.load_configuration(config_name=config_name, overrides=overrides, strict=strict) with open_dict(cfg): from hydra import __version__ cfg.hydra.runtime.version = __version__ cfg.hydra.runtime.cwd = os.getcwd() if with_log_configuration: configure_log(cfg.hydra.hydra_logging, cfg.hydra.verbose) global log log = logging.getLogger(__name__) self._print_debug_info() return cfg
def launch( launcher: RQLauncher, job_overrides: Sequence[Sequence[str]], initial_job_idx: int, ) -> JobReturn: """ :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert launcher.config is not None assert launcher.config_loader is not None assert launcher.task_function is not None configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose) sweep_dir = Path(str(launcher.config.hydra.sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) # RQ configuration rq_cfg = launcher.rq # Redis configuration is_async = not rq_cfg.redis.mock if is_async: connection = Redis( host=rq_cfg.redis.host, port=rq_cfg.redis.port, db=rq_cfg.redis.db, password=rq_cfg.redis.password, ) else: log.info("Running in synchronous mode") connection = FakeStrictRedis() queue = Queue( name=rq_cfg.queue, connection=connection, is_async=is_async, serializer=cloudpickle, ) # Enqueue jobs jobs = [] singleton_state = Singleton.get_state() log.info( f"RQ Launcher is enqueuing {len(job_overrides)} job(s) in queue : {rq_cfg.queue}" ) log.info("Sweep output dir : {}".format(sweep_dir)) if not sweep_dir.is_absolute(): log.warn( "Using relative sweep dir: Please be aware that dir will be relative to where workers are started from." ) for idx, overrides in enumerate(job_overrides): description = " ".join(filter_overrides(overrides)) enqueue_keywords = OmegaConf.to_container(rq_cfg.enqueue, resolve=True) if enqueue_keywords["job_timeout"] is None: enqueue_keywords["job_timeout"] = -1 if enqueue_keywords["result_ttl"] is None: enqueue_keywords["result_ttl"] = -1 if enqueue_keywords["failure_ttl"] is None: enqueue_keywords["failure_ttl"] = -1 if enqueue_keywords["job_id"] is None: enqueue_keywords["job_id"] = str(uuid.uuid4()) if enqueue_keywords["description"] is None: enqueue_keywords["description"] = description sweep_config = launcher.config_loader.load_sweep_config( launcher.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = enqueue_keywords["job_id"] sweep_config.hydra.job.num = initial_job_idx + idx job = queue.enqueue( execute_job, sweep_config=sweep_config, task_function=launcher.task_function, singleton_state=singleton_state, **enqueue_keywords, ) jobs.append(job) log.info(f"Enqueued {job.get_id()}") log.info(f"\t#{idx+1} : {description}") log.info("Finished enqueuing") if rq_cfg.stop_after_enqueue: raise StopAfterEnqueue log.info(f"Polling job statuses every {rq_cfg.wait_polling} sec") while True: job_ids_done = [ job.get_id() for job in jobs if job.get_status() in ["finished", "failed"] ] if len(job_ids_done) == len(jobs): break else: time.sleep(rq_cfg.wait_polling) runs = [] for job in jobs: result = job.result if job.result is not None else None runs.append(result) assert isinstance(runs, List) for run in runs: assert isinstance(run, JobReturn) return runs
def launch(self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int) -> Sequence[JobReturn]: """Implementation of Launcher.launch :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run. :param initial_job_idx: Initial job idx in batch. :return: an array of return values from run_job with indexes corresponding to the input list indexes. """ setup_globals() assert self.config is not None assert self.config_loader is not None assert self.task_function is not None configure_log(self.config.hydra.hydra_logging, self.config.hydra.verbose) sweep_dir = self.config.hydra.sweep.dir Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True) logger.info( f"Local Launcher is launching {len(job_overrides)} jobs locally") logger.info(f"Launching jobs, sweep output dir : {sweep_dir}") for idx, overrides in enumerate(job_overrides): logger.info("\t#{} : {}".format( idx, " ".join(filter_overrides(overrides)))) results = [] workers = [] for i, overrides in enumerate(job_overrides): idx = initial_job_idx + i lst = " ".join(filter_overrides(overrides)) logger.info(f"\t#{idx} : {lst}") sweep_config = self.config_loader.load_sweep_config( self.config, list(overrides)) with open_dict(sweep_config): sweep_config.hydra.job.id = f"job_id_for_{idx}" sweep_config.hydra.job.num = idx p = Process(target=run_job, kwargs=dict(config=sweep_config, task_function=self.task_function, job_dir_key="hydra.sweep.dir", job_subdir_key="hydra.sweep.subdir")) p.start() workers.append(p) # wait for current/last batch of workers if ((i + 1) % self._n_jobs == 0) or ((i + 1) == len(job_overrides)): for w in workers: w.join() # forward exceptions from the workers if w.exception(): raise w.exception() # book keeping results.extend([p.result() for p in workers]) workers = [] assert len(results) == len(job_overrides) return results