Beispiel #1
0
    def compose_config(
        self,
        config_file: Optional[str],
        overrides: List[str],
        strict: Optional[bool] = None,
        with_log_configuration: bool = False,
    ) -> DictConfig:
        """
        :param self:
        :param config_file:
        :param overrides:
        :param with_log_configuration: True to configure logging subsystem from the loaded config
        :param strict: None for default behavior (default to true for config file, false if no config file).
                       otherwise forces specific behavior.
        :return:
        """
        cfg = self.config_loader.load_configuration(config_file=config_file,
                                                    overrides=overrides,
                                                    strict=strict)
        with open_dict(cfg):
            from .. import __version__

            cfg.hydra.runtime.version = __version__
            cfg.hydra.runtime.cwd = os.getcwd()
        if with_log_configuration:
            configure_log(cfg.hydra.hydra_logging, cfg.hydra.verbose)
            global log
            log = logging.getLogger(__name__)
            self._print_debug_info()
        return cfg
Beispiel #2
0
    def launch(self, job_overrides):
        setup_globals()
        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = self.config.hydra.sweep.dir
        Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True)
        log.info("Launching {} jobs locally".format(len(job_overrides)))
        runs = []

        for idx, overrides in enumerate(job_overrides):
            log.info("\t#{} : {}".format(idx, " ".join(
                filter_overrides(overrides))))
            sweep_config = self.config_loader.load_sweep_config(
                self.config, list(overrides))
            with open_dict(sweep_config):
                sweep_config.hydra.job.id = idx
                sweep_config.hydra.job.num = idx
            HydraConfig().set_config(sweep_config)
            ret = run_job(
                config=sweep_config,
                task_function=self.task_function,
                job_dir_key="hydra.sweep.dir",
                job_subdir_key="hydra.sweep.subdir",
            )
            runs.append(ret)
            configure_log(self.config.hydra.hydra_logging,
                          self.config.hydra.verbose)
        return runs
    def launch(self, job_overrides):
        """
        :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
        :return: an array of return values from run_job with indexes corresponding to the input list indexes.
        """
        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        log.info("Ray Launcher is launching {} jobs locally".format(
            len(job_overrides)))
        log.info("Sweep output dir : {}".format(sweep_dir))
        runs = []

        for idx, overrides in enumerate(job_overrides):
            log.info("\t#{} : {}".format(idx, " ".join(
                filter_overrides(overrides))))
            sweep_config = self.config_loader.load_sweep_config(
                self.config, list(overrides))
            with open_dict(sweep_config):
                # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance)
                # In that case, it will not be available here because we are still in the main process.
                # but instead should be populated remotely before calling the task_function.
                sweep_config.hydra.job.id = idx
                sweep_config.hydra.job.num = idx
            HydraConfig().set_config(sweep_config)

            ray_remote_cfg = get_key(self.config, 'ray.remote')
            if ray_remote_cfg:
                run_job_ray = ray.remote(**ray_remote_cfg)(launch)
            else:
                run_job_ray = ray.remote(launch)

            ret = run_job_ray.remote(
                config=sweep_config,
                task_function=self.task_function,
                job_dir_key="hydra.sweep.dir",
                job_subdir_key="hydra.sweep.subdir",
            )

            runs.append(ret)
            configure_log(self.config.hydra.hydra_logging,
                          self.config.hydra.verbose)

        return [ray.get(run) for run in runs]
Beispiel #4
0
    def launch(self,
               job_overrides: Sequence[Sequence[str]]) -> Sequence[JobReturn]:
        """
        :param job_overrides: a List of List<String>, where each inner list is the arguments for one job run.
        :return: an array of return values from run_job with indexes corresponding to the input list indexes.
        """
        setup_globals()
        assert self.config is not None
        assert self.config_loader is not None
        assert self.task_function is not None

        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = Path(str(self.config.hydra.sweep.dir))
        sweep_dir.mkdir(parents=True, exist_ok=True)
        log.info(
            "Example Launcher(foo={}, bar={}) is launching {} jobs locally".
            format(self.foo, self.bar, len(job_overrides)))
        log.info("Sweep output dir : {}".format(sweep_dir))
        runs = []

        for idx, overrides in enumerate(job_overrides):
            log.info("\t#{} : {}".format(idx, " ".join(
                filter_overrides(overrides))))
            sweep_config = self.config_loader.load_sweep_config(
                self.config, list(overrides))
            with open_dict(sweep_config):
                # This typically coming from the underlying scheduler (SLURM_JOB_ID for instance)
                # In that case, it will not be available here because we are still in the main process.
                # but instead should be populated remotely before calling the task_function.
                sweep_config.hydra.job.id = "job_id_for_{}".format(idx)
                sweep_config.hydra.job.num = idx
            HydraConfig().set_config(sweep_config)

            ret = run_job(
                config=sweep_config,
                task_function=self.task_function,
                job_dir_key="hydra.sweep.dir",
                job_subdir_key="hydra.sweep.subdir",
            )
            runs.append(ret)
            configure_log(self.config.hydra.hydra_logging,
                          self.config.hydra.verbose)
        return runs
Beispiel #5
0
 def run_task(job):
     idx, overrides = job
     LOGGER.info("\t#{} : {}".format(
         idx, " ".join(filter_overrides(overrides))))
     sweep_config = self.config_loader.load_sweep_config(
         self.config, list(overrides))
     with open_dict(sweep_config):
         # id is concatenated overrides here
         sweep_config.hydra.job.id = '_'.join(sorted(overrides))
         sweep_config.hydra.job.num = idx
     HydraConfig().set_config(sweep_config)
     ret = run_job(
         config=sweep_config,
         task_function=self.task_function,
         job_dir_key="hydra.sweep.dir",
         job_subdir_key="hydra.sweep.subdir",
     )
     configure_log(self.config.hydra.hydra_logging,
                   self.config.hydra.verbose)
     return (idx, ret)
Beispiel #6
0
    def launch(self, job_overrides):
        setup_globals()
        configure_log(self.config.hydra.hydra_logging,
                      self.config.hydra.verbose)
        sweep_dir = self.config.hydra.sweep.dir
        Path(str(sweep_dir)).mkdir(parents=True, exist_ok=True)
        LOGGER.info("Launching {} jobs locally".format(len(job_overrides)))

        def run_task(job):
            idx, overrides = job
            LOGGER.info("\t#{} : {}".format(
                idx, " ".join(filter_overrides(overrides))))
            sweep_config = self.config_loader.load_sweep_config(
                self.config, list(overrides))
            with open_dict(sweep_config):
                # id is concatenated overrides here
                sweep_config.hydra.job.id = '_'.join(sorted(overrides))
                sweep_config.hydra.job.num = idx
            HydraConfig().set_config(sweep_config)
            ret = run_job(
                config=sweep_config,
                task_function=self.task_function,
                job_dir_key="hydra.sweep.dir",
                job_subdir_key="hydra.sweep.subdir",
            )
            configure_log(self.config.hydra.hydra_logging,
                          self.config.hydra.verbose)
            return (idx, ret)

        if self.ncpu > 1:
            jobs = list(enumerate(job_overrides))
            runs = sorted([
                ret for ret in MPI(
                    jobs=jobs, func=run_task, ncpu=int(self.ncpu), batch=1)
            ])
            runs = [i[1] for i in runs]
        else:
            runs = [run_task(job)[1] for job in enumerate(job_overrides)]
        return runs
from hydra.experimental import initialize, compose
from hydra.plugins.common.utils import configure_log

from core_lib.helpers.generate_data import generate_email, generate_random_string
from examples.combined_core_lib.core_lib.combined_core_lib import CombineCoreLib

import pymysql

#
# Docker
#
from core_lib.client.solr_client import SolrClient
from examples.test_core_lib.core_lib.data_layers.data.db.user import User

pymysql.install_as_MySQLdb()
configure_log(None, True)

current_path = os.path.join(os.path.dirname(os.path.realpath(__file__)))
test_output_path = os.path.normpath(os.path.join(current_path, 'test_output'))
example_path = os.path.normpath(os.path.join(current_path, '../', 'examples'))
combined_core_lib_path = os.path.join(example_path, 'combined_core_lib')

from pathlib import Path

env_path = Path(os.path.join(current_path, 'test_data', 'load_env.env'))
load_dotenv(dotenv_path=env_path)

#
# Config
#
config_directory = os.path.join(combined_core_lib_path, 'config')