Beispiel #1
0
def execute_job(
    idx: int,
    overrides: Sequence[str],
    config_loader: ConfigLoader,
    config: DictConfig,
    task_function: TaskFunction,
    singleton_state: Dict[Any, Any],
) -> JobReturn:
    """Calls `run_job` in parallel
    """
    setup_globals()
    Singleton.set_state(singleton_state)

    sweep_config = config_loader.load_sweep_config(config, list(overrides))
    with open_dict(sweep_config):
        sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name,
                                                   idx)
        sweep_config.hydra.job.num = idx
    HydraConfig.instance().set_config(sweep_config)

    ret = run_job(
        config=sweep_config,
        task_function=task_function,
        job_dir_key="hydra.sweep.dir",
        job_subdir_key="hydra.sweep.subdir",
    )

    return ret
Beispiel #2
0
    def __call__(
        self,
        sweep_overrides: List[str],
        job_dir_key: str,
        job_num: int,
        job_id: str,
        singleton_state: Dict[type, "Singleton"],
    ):
        Singleton.set_state(singleton_state)
        configure_log(self.config.hydra.job_logging, self.config.hydra.verbose)
        setup_globals()
        sweep_config = self.config_loader.load_sweep_config(
            self.config, sweep_overrides)
        with open_dict(sweep_config.hydra.job) as job:
            # Populate new job variables
            if "SLURM_JOB_ID" in os.environ:
                job.id = os.environ["SLURM_JOB_ID"]
            else:
                job.id = job_id
            sweep_config.hydra.job.num = job_num

        return run_job(
            config=sweep_config,
            task_function=self.task_function,
            job_dir_key=job_dir_key,
            job_subdir_key="hydra.sweep.subdir",
        )
Beispiel #3
0
    def __call__(
        self,
        sweep_overrides: List[str],
        job_dir_key: str,
        job_num: int,
        job_id: str,
        singleton_state: Dict[type, Singleton],
    ) -> JobReturn:
        # lazy import to ensure plugin discovery remains fast
        import submitit

        assert self.config_loader is not None
        assert self.config is not None
        assert self.task_function is not None

        Singleton.set_state(singleton_state)
        setup_globals()
        sweep_config = self.config_loader.load_sweep_config(
            self.config, sweep_overrides)

        with open_dict(sweep_config.hydra.job) as job:
            # Populate new job variables
            job.id = submitit.JobEnvironment().job_id  # type: ignore
            sweep_config.hydra.job.num = job_num

        return run_job(
            config=sweep_config,
            task_function=self.task_function,
            job_dir_key=job_dir_key,
            job_subdir_key="hydra.sweep.subdir",
        )
Beispiel #4
0
def dispatch_job(
    idx: int,
    overrides: Sequence[str],
    config_loader: ConfigLoader,
    config: DictConfig,
    task_function: TaskFunction,
    singleton_state: Dict[Any, Any],
) -> JobReturn:
    """Calls `run_job` in parallel

    Note that Joblib's default backend runs isolated Python processes, see
    https://joblib.readthedocs.io/en/latest/parallel.html#shared-memory-semantics
    """
    setup_globals()
    Singleton.set_state(singleton_state)

    log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides))))
    sweep_config = config_loader.load_sweep_config(config, list(overrides))
    with open_dict(sweep_config):
        sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name,
                                                   idx)
        sweep_config.hydra.job.num = idx
    HydraConfig.instance().set_config(sweep_config)

    ret = run_job(
        config=sweep_config,
        task_function=task_function,
        job_dir_key="hydra.sweep.dir",
        job_subdir_key="hydra.sweep.subdir",
    )

    return ret
Beispiel #5
0
def hydra_restore_singletons() -> None:
    """
    Restore singletons state after the function returns
    """
    state = copy.deepcopy(Singleton.get_state())
    yield
    Singleton.set_state(state)
Beispiel #6
0
def launch_jobs(temp_dir: str) -> None:
    runs = []
    with open(os.path.join(temp_dir, JOB_SPEC_PICKLE), "rb") as f:
        job_spec = pickle.load(f)  # nosec
        singleton_state = job_spec["singleton_state"]
        sweep_configs = job_spec["sweep_configs"]
        task_function = job_spec["task_function"]

        instance_id = _get_instance_id()

        sweep_dir = None

        for sweep_config in sweep_configs:
            with open_dict(sweep_config):
                sweep_config.hydra.job.id = (
                    f"{instance_id}_{sweep_config.hydra.job.num}"
                )
            setup_globals()
            Singleton.set_state(singleton_state)
            HydraConfig.instance().set_config(sweep_config)
            ray_init_cfg = sweep_config.hydra.launcher.ray_init_cfg
            ray_remote_cfg = sweep_config.hydra.launcher.ray_remote_cfg

            if not sweep_dir:
                sweep_dir = Path(str(HydraConfig.get().sweep.dir))
                sweep_dir.mkdir(parents=True, exist_ok=True)

            start_ray(ray_init_cfg)
            ray_obj = launch_job_on_ray(
                ray_remote_cfg, sweep_config, task_function, singleton_state
            )
            runs.append(ray_obj)

    result = [ray.get(run) for run in runs]
    _dump_job_return(result, temp_dir)
Beispiel #7
0
    def launch(self, job_overrides: Sequence[Sequence[str]],
               initial_job_idx: int) -> Sequence[JobReturn]:
        """
        :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
        :param initial_job_idx: Initial job idx in batch.
        :return: an array of return values from run_job with indexes corresponding to the input list indexes.
        """
        setup_globals()
        assert self.config is not None
        assert self.hydra_context is not None
        assert self.task_function is not None

        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        log.info(
            f"Example Launcher(foo={self.foo}, bar={self.bar}) is launching {len(job_overrides)} jobs locally"
        )
        log.info(f"Sweep output dir : {sweep_dir}")
        runs = []

        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            lst = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {lst}")
            sweep_config = self.hydra_context.config_loader.load_sweep_config(
                self.config, list(overrides))
            with open_dict(sweep_config):
                # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance)
                # In that case, it will not be available here because we are still in the main process.
                # but instead should be populated remotely before calling the task_function.
                sweep_config.hydra.job.id = f"job_id_for_{idx}"
                sweep_config.hydra.job.num = idx

            # If your launcher is executing code in a different process, it is important to restore
            # the singleton state in the new process.
            # To do this, you will likely need to serialize the singleton state along with the other
            # parameters passed to the child process.

            # happening on this process (executing launcher)
            state = Singleton.get_state()

            # happening on the spawned process (executing task_function in run_job)
            Singleton.set_state(state)

            ret = run_job(
                hydra_context=self.hydra_context,
                task_function=self.task_function,
                config=sweep_config,
                job_dir_key="hydra.sweep.dir",
                job_subdir_key="hydra.sweep.subdir",
            )
            runs.append(ret)
            # reconfigure the logging subsystem for Hydra as the run_job call configured it for the Job.
            # This is needed for launchers that calls run_job in the same process and not spawn a new one.
            configure_log(self.config.hydra.hydra_logging,
                          self.config.hydra.verbose)
        return runs
Beispiel #8
0
def restore_singletons() -> Any:
    """
    A fixture to restore singletons state after this the function.
    This is useful for functions that are making a one-off change to singlestons that should not effect
    other tests
    """
    state = copy.deepcopy(Singleton.get_state())
    yield
    Singleton.set_state(state)
Beispiel #9
0
def hydra_restore_singletons() -> None:
    """
    Restore singletons state after the function returns
    """
    state = copy.deepcopy(Singleton.get_state())
    resolvers = copy.deepcopy(BaseContainer._resolvers)
    yield
    Singleton.set_state(state)
    BaseContainer._resolvers = resolvers
Beispiel #10
0
def test_restore_singleton_state_hack() -> None:
    """
    This is a hack that allow us to undo changes to the ConfigStore.
    During this test, the config store is being modified in Python imports.
    Python imports can only run once, so clearing the state during the tests will break
    The tests because it will not be reinitialized.

    A solution is to undo the changes after the last test.
    The reason this logic is in a test is that if it's outside it's being executed during
    Pytest's test collection phase, which is before the tests are dunning - so it does not solve the problem.
    """
    Singleton.set_state(state)
Beispiel #11
0
def _run_job(
    sweep_config: DictConfig,
    task_function: TaskFunction,
    singleton_state: Dict[Any, Any],
) -> JobReturn:
    setup_globals()
    Singleton.set_state(singleton_state)
    HydraConfig.instance().set_config(sweep_config)
    return run_job(
        config=sweep_config,
        task_function=task_function,
        job_dir_key="hydra.sweep.dir",
        job_subdir_key="hydra.sweep.subdir",
    )
Beispiel #12
0
    def launch(
        self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int
    ) -> Sequence[JobReturn]:
        # lazy import to ensure plugin discovery remains fast
        import submitit

        num_jobs = len(job_overrides)
        assert num_jobs > 0
        params = self.params
        # build executor
        init_params = {"folder": self.params["submitit_folder"]}
        specific_init_keys = {"max_num_timeout"}

        init_params.update(
            **{
                f"{self._EXECUTOR}_{x}": y
                for x, y in params.items()
                if x in specific_init_keys
            }
        )
        init_keys = specific_init_keys | {"submitit_folder"}
        executor = submitit.AutoExecutor(cluster=self._EXECUTOR, **init_params)

        # specify resources/parameters
        baseparams = set(dataclasses.asdict(BaseTarget()).keys())
        params = {
            x if x in baseparams else f"{self._EXECUTOR}_{x}": y
            for x, y in params.items()
            if x not in init_keys
        }
        executor.update_parameters(**params)

        log.info(
            f"Submitit '{self._EXECUTOR}' sweep output dir : "
            f"{self.config.hydra.sweep.dir}"
        )
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        if "mode" in self.config.hydra.sweep:
            mode = int(str(self.config.hydra.sweep.mode), 8)
            os.chmod(sweep_dir, mode=mode)

        params = []

        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            lst = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {lst}")
            params.append(
                (
                    list(overrides),
                    "hydra.sweep.dir",
                    idx,
                    f"job_id_for_{idx}",
                    Singleton.get_state(),
                )
            )

        jobs = executor.map_array(self, *zip(*params))
        return [j.results()[0] for j in jobs]
Beispiel #13
0
def launch(
    launcher: RayAWSLauncher,
    job_overrides: Sequence[Sequence[str]],
    initial_job_idx: int,
) -> Sequence[JobReturn]:
    setup_globals()
    assert launcher.config is not None
    assert launcher.config_loader is not None
    assert launcher.task_function is not None

    setup_commands = launcher.env_setup.commands
    with read_write(setup_commands):
        setup_commands.extend([
            f"pip install {package}=={version}"
            for package, version in launcher.env_setup.pip_packages.items()
        ])
        setup_commands.extend(launcher.ray_cfg.cluster.setup_commands)

    with read_write(launcher.ray_cfg.cluster):
        launcher.ray_cfg.cluster.setup_commands = setup_commands

    configure_log(launcher.config.hydra.hydra_logging,
                  launcher.config.hydra.verbose)

    log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ")

    with tempfile.TemporaryDirectory() as local_tmp_dir:
        sweep_configs = []
        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            ostr = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {ostr}")
            sweep_config = launcher.config_loader.load_sweep_config(
                launcher.config, list(overrides))
            with open_dict(sweep_config):
                # job.id will be set on the EC2 instance before running the job.
                sweep_config.hydra.job.num = idx

            sweep_configs.append(sweep_config)

        _pickle_jobs(
            tmp_dir=local_tmp_dir,
            sweep_configs=sweep_configs,  # type: ignore
            task_function=launcher.task_function,
            singleton_state=Singleton.get_state(),
        )

        with tempfile.NamedTemporaryFile(suffix=".yaml", delete=False) as f:
            with open(f.name, "w") as file:
                OmegaConf.save(config=launcher.ray_cfg.cluster,
                               f=file.name,
                               resolve=True)
            launcher.ray_yaml_path = f.name
            log.info(
                f"Saving RayClusterConf in a temp yaml file: {launcher.ray_yaml_path}."
            )

            return launch_jobs(launcher, local_tmp_dir,
                               Path(HydraConfig.get().sweep.dir))
Beispiel #14
0
    def launch(
        self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int
    ) -> Sequence[JobReturn]:
        # lazy import to ensure plugin discovery remains fast
        import submitit

        num_jobs = len(job_overrides)
        assert num_jobs > 0

        # make sure you don't change inplace
        queue_parameters = self.queue_parameters.copy()
        OmegaConf.set_struct(queue_parameters, True)
        if self.queue == "auto":
            max_num_timeout = self.queue_parameters.auto.max_num_timeout
            with open_dict(queue_parameters):
                del queue_parameters.auto["max_num_timeout"]
            executor = submitit.AutoExecutor(
                folder=self.folder, max_num_timeout=max_num_timeout
            )
        elif self.queue == "slurm":
            max_num_timeout = self.queue_parameters.slurm.max_num_timeout
            with open_dict(queue_parameters):
                del queue_parameters.slurm["max_num_timeout"]
            executor = submitit.SlurmExecutor(
                folder=self.folder, max_num_timeout=max_num_timeout
            )
        elif self.queue == "local":
            executor = submitit.LocalExecutor(folder=self.folder)
        else:
            raise RuntimeError("Unsupported queue type {}".format(self.queue))

        executor.update_parameters(**queue_parameters[self.queue])

        log.info("Sweep output dir : {}".format(self.config.hydra.sweep.dir))
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        if "mode" in self.config.hydra.sweep:
            mode = int(str(self.config.hydra.sweep.mode), 8)
            os.chmod(sweep_dir, mode=mode)

        params = []

        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            lst = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {lst}")
            params.append(
                (
                    list(overrides),
                    "hydra.sweep.dir",
                    idx,
                    f"job_id_for_{idx}",
                    Singleton.get_state(),
                )
            )

        jobs = executor.map_array(self, *zip(*params))
        return [j.results()[0] for j in jobs]
    def launch(self, job_overrides: Sequence[Sequence[str]],
               initial_job_idx: int) -> Sequence[JobReturn]:
        # lazy import to ensure plugin discovery remains fast
        import submitit

        num_jobs = len(job_overrides)
        assert num_jobs > 0

        # make sure you don't change inplace
        queue_parameters = self.queue_parameters.copy()
        OmegaConf.set_struct(queue_parameters, True)
        executors = {
            QueueType.auto: submitit.AutoExecutor,
            QueueType.slurm: submitit.SlurmExecutor,
            QueueType.local: submitit.LocalExecutor,
        }
        init_parameters = {
            "cluster", "max_num_timeout", "slurm_max_num_timeout"
        }
        executor = executors[self.queue](
            folder=self.folder,
            **{
                x: y
                for x, y in queue_parameters[self.queue.value].items()
                if x in init_parameters
            },
        )
        executor.update_parameters(
            **{
                x: y
                for x, y in queue_parameters[self.queue.value].items()
                if x not in init_parameters
            })

        log.info("Submitit '{}' sweep output dir : {}".format(
            self.queue.value, self.config.hydra.sweep.dir))
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        if "mode" in self.config.hydra.sweep:
            mode = int(str(self.config.hydra.sweep.mode), 8)
            os.chmod(sweep_dir, mode=mode)

        params = []

        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            lst = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {lst}")
            params.append((
                list(overrides),
                "hydra.sweep.dir",
                idx,
                f"job_id_for_{idx}",
                Singleton.get_state(),
            ))

        jobs = executor.map_array(self, *zip(*params))
        return [j.results()[0] for j in jobs]
Beispiel #16
0
def execute_job(
    idx: int,
    overrides: Sequence[str],
    config_loader: ConfigLoader,
    config: DictConfig,
    task_function: TaskFunction,
    singleton_state: Dict[Any, Any],
    cmd_prefix: str,
    tsp_prefix: str,
) -> JobReturn:
    """Calls `run_job` in parallel
    """
    setup_globals()
    Singleton.set_state(singleton_state)

    lst = " ".join(overrides)

    sweep_config = config_loader.load_sweep_config(config, list(overrides))
    with open_dict(sweep_config):
        sweep_config.hydra.job.id = "{}_{}".format(sweep_config.hydra.job.name,
                                                   idx)
        sweep_config.hydra.job.num = idx
    HydraConfig.instance().set_config(sweep_config)

    def tsp_task_function(task_cfg):
        working_dir = os.getcwd()
        cmd = f"{cmd_prefix} {lst}"
        log.info(f"\t#{idx} : {lst}")
        cmd = f"cd {hydra.utils.get_original_cwd()} && {cmd} hydra.run.dir={working_dir}"
        job_id = int(subprocess.check_output(cmd, shell=True).rstrip())
        log.info(
            f"Submitted {idx} to TaskSpooler. View logs: {tsp_prefix} -t {job_id}"
        )
        return job_id

    ret = run_job(
        config=sweep_config,
        task_function=tsp_task_function,
        job_dir_key="hydra.sweep.dir",
        job_subdir_key="hydra.sweep.subdir",
    )
    ret.id = ret.return_value

    return ret
Beispiel #17
0
def launch(
    launcher: RayAWSLauncher,
    job_overrides: Sequence[Sequence[str]],
    initial_job_idx: int,
) -> Sequence[JobReturn]:
    setup_globals()
    assert launcher.config is not None
    assert launcher.hydra_context is not None
    assert launcher.task_function is not None

    setup_commands = launcher.env_setup.commands
    packages = filter(
        lambda x: x[1] is not None, launcher.env_setup.pip_packages.items()
    )
    with read_write(setup_commands):
        setup_commands.extend(
            [f"pip install {package}=={version}" for package, version in packages]
        )
        setup_commands.extend(launcher.ray_cfg.cluster.setup_commands)

    with read_write(launcher.ray_cfg.cluster):
        launcher.ray_cfg.cluster.setup_commands = setup_commands

    configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose)
    logging_config = OmegaConf.to_container(
        launcher.logging, resolve=True, enum_to_str=True
    )
    sdk.configure_logging(**logging_config)

    log.info(f"Ray Launcher is launching {len(job_overrides)} jobs, ")

    with tempfile.TemporaryDirectory() as local_tmp_dir:
        sweep_configs = []
        for idx, overrides in enumerate(job_overrides):
            idx = initial_job_idx + idx
            ostr = " ".join(filter_overrides(overrides))
            log.info(f"\t#{idx} : {ostr}")
            sweep_config = launcher.hydra_context.config_loader.load_sweep_config(
                launcher.config, list(overrides)
            )
            with open_dict(sweep_config):
                # job.id will be set on the EC2 instance before running the job.
                sweep_config.hydra.job.num = idx

            sweep_configs.append(sweep_config)

        _pickle_jobs(
            tmp_dir=local_tmp_dir,
            hydra_context=launcher.hydra_context,
            sweep_configs=sweep_configs,  # type: ignore
            task_function=launcher.task_function,
            singleton_state=Singleton.get_state(),
        )
        return launch_jobs(
            launcher, local_tmp_dir, Path(launcher.config.hydra.sweep.dir)
        )
Beispiel #18
0
def launch(
    launcher: JoblibLauncher,
    job_overrides: Sequence[Sequence[str]],
    initial_job_idx: int,
) -> Sequence[JobReturn]:
    """
    :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
    :param initial_job_idx: Initial job idx in batch.
    :return: an array of return values from run_job with indexes corresponding to the input list indexes.
    """
    setup_globals()
    assert launcher.config is not None
    assert launcher.task_function is not None
    assert launcher.hydra_context is not None

    configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose)
    sweep_dir = Path(str(launcher.config.hydra.sweep.dir))
    sweep_dir.mkdir(parents=True, exist_ok=True)

    # Joblib's backend is hard-coded to loky since the threading
    # backend is incompatible with Hydra
    joblib_cfg = launcher.joblib
    joblib_cfg["backend"] = "loky"
    process_joblib_cfg(joblib_cfg)

    log.info(
        "Joblib.Parallel({}) is launching {} jobs".format(
            ",".join([f"{k}={v}" for k, v in joblib_cfg.items()]),
            len(job_overrides),
        )
    )
    log.info("Launching jobs, sweep output dir : {}".format(sweep_dir))
    for idx, overrides in enumerate(job_overrides):
        log.info("\t#{} : {}".format(idx, " ".join(filter_overrides(overrides))))

    singleton_state = Singleton.get_state()

    runs = Parallel(**joblib_cfg)(
        delayed(execute_job)(
            initial_job_idx + idx,
            overrides,
            launcher.hydra_context,
            launcher.config,
            launcher.task_function,
            singleton_state,
        )
        for idx, overrides in enumerate(job_overrides)
    )

    assert isinstance(runs, List)
    for run in runs:
        assert isinstance(run, JobReturn)
    return runs
Beispiel #19
0
    def launch(self, job_overrides: Sequence[Sequence[str]],
               initial_job_idx: int) -> Sequence[JobReturn]:
        """
        :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
        :param initial_job_idx: Initial job idx in batch.
        :return: an array of return values from run_job with indexes corresponding to the input list indexes.
        """
        setup_globals()
        assert self.config is not None
        assert self.config_loader is not None
        assert self.task_function is not None

        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        log.info(
            f"TaskSpooler Launcher is launching {len(job_overrides)} jobs locally"
        )
        log.info(f"Sweep output dir : {sweep_dir}")
        runs = []
        singleton_state = Singleton.get_state()

        for idx, overrides in enumerate(job_overrides):
            overrides = list(overrides)
            overrides.extend(self.hydra_overrides)
            overrides = tuple(overrides)

            ret = execute_job(
                initial_job_idx + idx,
                overrides,
                self.config_loader,
                self.config,
                self.task_function,
                singleton_state,
                self.cmd_prefix,
                self.tsp_prefix,
            )
            runs.append(ret)
            time.sleep(self.time_between_submit)

        assert isinstance(runs, List)
        for run in runs:
            assert isinstance(run, JobReturn)

        if self.tail_jobs:
            Parallel(n_jobs=len(job_overrides), backend='threading')(
                delayed(self.tail_job)(run.return_value) for run in runs)
        return runs
Beispiel #20
0
def launch(
    launcher: RayLocalLauncher,
    job_overrides: Sequence[Sequence[str]],
    initial_job_idx: int,
) -> Sequence[JobReturn]:
    setup_globals()
    assert launcher.config is not None
    assert launcher.config_loader is not None
    assert launcher.task_function is not None

    configure_log(launcher.config.hydra.hydra_logging, launcher.config.hydra.verbose)
    sweep_dir = Path(str(launcher.config.hydra.sweep.dir))
    sweep_dir.mkdir(parents=True, exist_ok=True)
    log.info(
        f"Ray Launcher is launching {len(job_overrides)} jobs, "
        f"sweep output dir: {sweep_dir}"
    )

    start_ray(launcher.ray_init_cfg)

    runs = []
    for idx, overrides in enumerate(job_overrides):
        idx = initial_job_idx + idx
        ostr = " ".join(filter_overrides(overrides))
        log.info(f"\t#{idx} : {ostr}")
        sweep_config = launcher.config_loader.load_sweep_config(
            launcher.config, list(overrides)
        )
        with open_dict(sweep_config):
            # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance)
            # In that case, it will not be available here because we are still in the main process.
            # but instead should be populated remotely before calling the task_function.
            sweep_config.hydra.job.id = f"job_id_for_{idx}"
            sweep_config.hydra.job.num = idx
            ray_obj = launch_job_on_ray(
                launcher.ray_remote_cfg,
                sweep_config,
                launcher.task_function,
                Singleton.get_state(),
            )
            runs.append(ray_obj)

    return [ray.get(run) for run in runs]
Beispiel #21
0
    def launch(self,
               job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]:
        """
        :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
        :return: an array of return values from run_job with indexes corresponding to the input list indexes.
        """
        setup_globals()
        assert self.config is not None
        assert self.config_loader is not None
        assert self.task_function is not None

        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        log.info("Joblib.Parallel({}) is launching {} jobs".format(
            ",".join([f"{k}={v}" for k, v in self.joblib.items()]),
            len(job_overrides),
        ))
        log.info("Launching jobs, sweep output dir : {}".format(sweep_dir))

        singleton_state = Singleton.get_state()

        for idx, overrides in enumerate(job_overrides):
            log.info("\t#{} : {}".format(idx, " ".join(
                filter_overrides(overrides))))

        runs = Parallel(**self.joblib)(delayed(execute_job)(
            idx,
            overrides,
            self.config_loader,
            self.config,
            self.task_function,
            singleton_state,
        ) for idx, overrides in enumerate(job_overrides))

        assert isinstance(runs, List)
        for run in runs:
            assert isinstance(run, JobReturn)
        return runs
Beispiel #22
0
def launch(
    launcher: RQLauncher,
    job_overrides: Sequence[Sequence[str]],
    initial_job_idx: int,
) -> JobReturn:
    """
    :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
    :param initial_job_idx: Initial job idx in batch.
    :return: an array of return values from run_job with indexes corresponding to the input list indexes.
    """
    setup_globals()
    assert launcher.config is not None
    assert launcher.config_loader is not None
    assert launcher.task_function is not None

    configure_log(launcher.config.hydra.hydra_logging,
                  launcher.config.hydra.verbose)
    sweep_dir = Path(str(launcher.config.hydra.sweep.dir))
    sweep_dir.mkdir(parents=True, exist_ok=True)

    # RQ configuration
    rq_cfg = launcher.rq

    # Redis configuration
    is_async = not rq_cfg.redis.mock
    if is_async:
        connection = Redis(
            host=rq_cfg.redis.host,
            port=rq_cfg.redis.port,
            db=rq_cfg.redis.db,
            password=rq_cfg.redis.password,
        )
    else:
        log.info("Running in synchronous mode")
        connection = FakeStrictRedis()
    queue = Queue(
        name=rq_cfg.queue,
        connection=connection,
        is_async=is_async,
        serializer=cloudpickle,
    )

    # Enqueue jobs
    jobs = []
    singleton_state = Singleton.get_state()
    log.info(
        f"RQ Launcher is enqueuing {len(job_overrides)} job(s) in queue : {rq_cfg.queue}"
    )
    log.info("Sweep output dir : {}".format(sweep_dir))
    if not sweep_dir.is_absolute():
        log.warn(
            "Using relative sweep dir: Please be aware that dir will be relative to where workers are started from."
        )

    for idx, overrides in enumerate(job_overrides):
        description = " ".join(filter_overrides(overrides))

        enqueue_keywords = OmegaConf.to_container(rq_cfg.enqueue, resolve=True)
        if enqueue_keywords["job_timeout"] is None:
            enqueue_keywords["job_timeout"] = -1
        if enqueue_keywords["result_ttl"] is None:
            enqueue_keywords["result_ttl"] = -1
        if enqueue_keywords["failure_ttl"] is None:
            enqueue_keywords["failure_ttl"] = -1
        if enqueue_keywords["job_id"] is None:
            enqueue_keywords["job_id"] = str(uuid.uuid4())
        if enqueue_keywords["description"] is None:
            enqueue_keywords["description"] = description

        sweep_config = launcher.config_loader.load_sweep_config(
            launcher.config, list(overrides))
        with open_dict(sweep_config):
            sweep_config.hydra.job.id = enqueue_keywords["job_id"]
            sweep_config.hydra.job.num = initial_job_idx + idx

        job = queue.enqueue(
            execute_job,
            sweep_config=sweep_config,
            task_function=launcher.task_function,
            singleton_state=singleton_state,
            **enqueue_keywords,
        )
        jobs.append(job)

        log.info(f"Enqueued {job.get_id()}")
        log.info(f"\t#{idx+1} : {description}")

    log.info("Finished enqueuing")
    if rq_cfg.stop_after_enqueue:
        raise StopAfterEnqueue

    log.info(f"Polling job statuses every {rq_cfg.wait_polling} sec")
    while True:
        job_ids_done = [
            job.get_id() for job in jobs
            if job.get_status() in ["finished", "failed"]
        ]
        if len(job_ids_done) == len(jobs):
            break
        else:
            time.sleep(rq_cfg.wait_polling)

    runs = []
    for job in jobs:
        result = job.result if job.result is not None else None
        runs.append(result)

    assert isinstance(runs, List)
    for run in runs:
        assert isinstance(run, JobReturn)

    return runs
Beispiel #23
0
 def instance(*args: Any, **kwargs: Any) -> "GlobalHydra":
     return Singleton.instance(GlobalHydra, *args, **kwargs)  # type: ignore
Beispiel #24
0
def test_singleton_get_state(hydra_restore_singletons: Any) -> None:
    s = Singleton.get_state()
    assert Plugins not in s["instances"]
    assert Plugins in Singleton._instances
    Singleton.set_state(s)
    assert Plugins in Singleton._instances
Beispiel #25
0
from hydra._internal.core_plugins.file_config_source import FileConfigSource
from hydra._internal.core_plugins.importlib_resources_config_source import (
    ImportlibResourcesConfigSource, )
from hydra._internal.core_plugins.structured_config_source import StructuredConfigSource
from hydra.core.default_element import GroupDefault, InputDefault
from hydra.core.plugins import Plugins
from hydra.core.singleton import Singleton
from hydra.plugins.config_source import ConfigSource
from hydra.test_utils.config_source_common_tests import ConfigSourceTestSuite
from hydra.test_utils.test_utils import chdir_hydra_root

chdir_hydra_root()

# Manually save and restore singletons to work around an issue with things added to the config store via importing.
# restoring is done in test_restore_singleton_state_hack(), which must be the last test in this file.
state = copy.deepcopy(Singleton.get_state())


@pytest.mark.parametrize(
    "type_, path",
    [
        pytest.param(
            FileConfigSource,
            "file://tests/test_apps/config_source_test/dir",
            id="FileConfigSource",
        ),
        pytest.param(
            ImportlibResourcesConfigSource,
            "pkg://tests.test_apps.config_source_test.dir",
            id="ImportlibResourcesConfigSource",
        ),
Beispiel #26
0
 def instance(*args: Any, **kwargs: Any) -> "SourcesRegistry":
     return Singleton.instance(SourcesRegistry, *args,
                               **kwargs)  # type: ignore
Beispiel #27
0
 def instance(*args: Any, **kwargs: Any) -> "Plugins":
     ret = Singleton.instance(Plugins, *args, **kwargs)
     assert isinstance(ret, Plugins)
     return ret
Beispiel #28
0
 def instance(*args: Any, **kwargs: Any) -> "ConfigStore":
     return Singleton.instance(ConfigStore, *args, **kwargs)  # type: ignore
Beispiel #29
0
 def instance(*args: Any, **kwargs: Any) -> "HydraConfig":
     return Singleton.instance(HydraConfig, *args, **kwargs)  # type: ignore