def test_masked_copy_is_deep() -> None: cfg = OmegaConf.create({"a": {"b": 1, "c": 2}}) expected = {"a": {"b": 1, "c": 2}} masked = OmegaConf.masked_copy(cfg, keys=["a"]) assert masked == expected cfg.a.b = 2 assert cfg != expected with pytest.raises(ValueError): OmegaConf.masked_copy("fail", []) # type: ignore
def test_masked_copy_is_deep(): cfg = OmegaConf.create({"a": {"b": 1, "c": 2}}) expected = {"a": {"b": 1, "c": 2}} masked = OmegaConf.masked_copy(cfg, keys=["a"]) assert masked == expected cfg.a.b = 2 assert cfg != expected
def cli(cfg): log.info('Entering application') cfg.workdir = str(Path.cwd()) fromdir = OmegaConf.select(cfg, 'fromdir', throw_on_missing=True) state = None if fromdir: fromdir = Path(to_absolute_path(fromdir)) cfg_orig, state = from_workdir(fromdir, cfg.state) cfg_orig = OmegaConf.masked_copy(cfg_orig, cfg_orig.keys() - cfg.keys()) cfg = OmegaConf.merge(cfg_orig, cfg) try: if cfg.seed is not None: log.info(f'Setting random seed: {cfg.seed}') torch.manual_seed(cfg.seed) device = cfg.device if cfg.anomaly: torch.autograd.set_detect_anomaly(True) log.warn('Setting anomaly detection on') cfg = instantiate_wf(cfg, fromdir, state) log.info(f'Moving to {device}...') cfg['task']['wf'].to(device) log.info(f'Moved to {device}') call(cfg['task'], _convert_='all') except hydra.errors.InstantiationException as e: raise e.__cause__ from None except KeyboardInterrupt: log.warning('Interrupted!')
def get_current_configuration_copy(file_name: str = "threeML_config.yml", overwrite: bool = False): """ write a copy of the CURRENT configuration to the config directory """ outfile: Path = get_path_of_user_config() / file_name if outfile.exists() and (not overwrite): raise RuntimeError(f"{outfile} exists! Set overwrite to True") else: _read_only_keys = ["LAT", "GBM", "catalogs"] _valid_keys = [] for k, v in threeML_config.items(): if k not in _read_only_keys: _valid_keys.append(k) config_copy = OmegaConf.masked_copy(threeML_config, _valid_keys) with outfile.open("w") as f: f.write( OmegaConf.to_yaml(config_copy, sort_keys=True, resolve=True))
def run_job( config: DictConfig, task_function: TaskFunction, job_dir_key: str, job_subdir_key: Optional[str], configure_logging: bool = True, ) -> "JobReturn": old_cwd = os.getcwd() working_dir = str(OmegaConf.select(config, job_dir_key)) orig_hydra_cfg = HydraConfig.instance().cfg if job_subdir_key is not None: # evaluate job_subdir_key lazily. # this is running on the client side in sweep and contains things such as job:id which # are only available there. subdir = str(OmegaConf.select(config, job_subdir_key)) working_dir = os.path.join(working_dir, subdir) try: ret = JobReturn() ret.working_dir = working_dir task_cfg = copy.deepcopy(config) hydra_cfg = OmegaConf.masked_copy(task_cfg, "hydra") # maintain parent to preserve interpolation links from hydra_cfg to job_cfg hydra_cfg._set_parent(task_cfg) with read_write(task_cfg): with open_dict(task_cfg): del task_cfg["hydra"] HydraConfig.instance().cfg = hydra_cfg # type: ignore ret.cfg = task_cfg ret.hydra_cfg = hydra_cfg overrides = OmegaConf.to_container(config.hydra.overrides.task) assert isinstance(overrides, list) ret.overrides = overrides # handle output directories here Path(str(working_dir)).mkdir(parents=True, exist_ok=True) os.chdir(working_dir) if configure_logging: configure_log(config.hydra.job_logging, config.hydra.verbose) if config.hydra.output_subdir is not None: hydra_output = Path(config.hydra.output_subdir) _save_config(task_cfg, "config.yaml", hydra_output) _save_config(hydra_cfg, "hydra.yaml", hydra_output) _save_config(config.hydra.overrides.task, "overrides.yaml", hydra_output) with env_override(hydra_cfg.hydra.job.env_set): ret.return_value = task_function(task_cfg) ret.task_name = JobRuntime.instance().get("name") _flush_loggers() return ret finally: HydraConfig.instance().cfg = orig_hydra_cfg os.chdir(old_cwd)
def set_config(self, cfg: DictConfig) -> None: assert cfg is not None OmegaConf.set_readonly(cfg.hydra, True) assert OmegaConf.get_type(cfg, "hydra") == HydraConf # THis is emulating a node that is hidden. # It's quiet a hack but it will be much better once # https://github.com/omry/omegaconf/issues/280 is done # The motivation is that this allows for interpolations from the hydra node # into the user's config. self.cfg = OmegaConf.masked_copy(cfg, "hydra") # type: ignore self.cfg.hydra._set_parent(cfg) # type: ignore
def run_job( config: DictConfig, task_function: TaskFunction, job_dir_key: str, job_subdir_key: Optional[str], ) -> "JobReturn": old_cwd = os.getcwd() working_dir = str(OmegaConf.select(config, job_dir_key)) if job_subdir_key is not None: # evaluate job_subdir_key lazily. # this is running on the client side in sweep and contains things such as job:id which # are only available there. subdir = str(OmegaConf.select(config, job_subdir_key)) working_dir = os.path.join(working_dir, subdir) try: ret = JobReturn() ret.working_dir = working_dir task_cfg = copy.deepcopy(config) with read_write(task_cfg): with open_dict(task_cfg): del task_cfg["hydra"] ret.cfg = task_cfg ret.hydra_cfg = OmegaConf.create({"hydra": HydraConfig.get()}) overrides = OmegaConf.to_container(config.hydra.overrides.task) assert isinstance(overrides, list) ret.overrides = overrides # handle output directories here Path(str(working_dir)).mkdir(parents=True, exist_ok=True) os.chdir(working_dir) configure_log(config.hydra.job_logging, config.hydra.verbose) hydra_cfg = OmegaConf.masked_copy(config, "hydra") assert isinstance(hydra_cfg, DictConfig) if config.hydra.output_subdir is not None: hydra_output = Path(config.hydra.output_subdir) _save_config(task_cfg, "config.yaml", hydra_output) _save_config(hydra_cfg, "hydra.yaml", hydra_output) _save_config(config.hydra.overrides.task, "overrides.yaml", hydra_output) with env_override(hydra_cfg.hydra.job.env_set): ret.return_value = task_function(task_cfg) ret.task_name = JobRuntime.instance().get("name") # shut down logging to ensure job log files are closed. # If logging is still required after run_job caller is responsible to re-initialize it. logging.shutdown() return ret finally: os.chdir(old_cwd)
def run_job( config: DictConfig, task_function: TaskFunction, job_dir_key: str, job_subdir_key: Optional[str], ) -> "JobReturn": old_cwd = os.getcwd() working_dir = str(config.select(job_dir_key)) if job_subdir_key is not None: # evaluate job_subdir_key lazily. # this is running on the client side in sweep and contains things such as job:id which # are only available there. subdir = str(config.select(job_subdir_key)) working_dir = os.path.join(working_dir, subdir) try: ret = JobReturn() ret.working_dir = working_dir task_cfg = copy.deepcopy(config) del task_cfg["hydra"] ret.cfg = task_cfg hc: DictConfig = HydraConfig.instance() ret.hydra_cfg = copy.deepcopy(hc) overrides = OmegaConf.to_container(config.hydra.overrides.task) assert isinstance(overrides, list) ret.overrides = overrides # handle output directories here Path(str(working_dir)).mkdir(parents=True, exist_ok=True) os.chdir(working_dir) hydra_output = Path(config.hydra.output_subdir) configure_log(config.hydra.job_logging, config.hydra.verbose) hydra_cfg = OmegaConf.masked_copy(config, "hydra") assert isinstance(hydra_cfg, DictConfig) _save_config(task_cfg, "config.yaml", hydra_output) _save_config(hydra_cfg, "hydra.yaml", hydra_output) _save_config(config.hydra.overrides.task, "overrides.yaml", hydra_output) ret.return_value = task_function(task_cfg) ret.task_name = JobRuntime.instance().get("name") return ret finally: os.chdir(old_cwd)
def test_masked_copy( in_cfg: Dict[str, Any], mask_keys: Union[str, List[str]], expected: Any ) -> None: cfg = OmegaConf.create(in_cfg) masked = OmegaConf.masked_copy(cfg, keys=mask_keys) assert masked == expected
def test_masked_copy(cfg, mask_keys, expected): cfg = OmegaConf.create(cfg) masked = OmegaConf.masked_copy(cfg, keys=mask_keys) assert masked == expected
def set_config(self, cfg: DictConfig) -> None: assert cfg is not None OmegaConf.set_readonly(cfg.hydra, True) assert OmegaConf.get_type(cfg, "hydra") == HydraConf self.cfg = OmegaConf.masked_copy(cfg, "hydra") # type: ignore
def from_config(cls, catalog: DictConfig, data_dir: str, dataset_names: List[str] = None, ml_stages: Union[str, List[str]] = None, load_versions: Union[str, Dict[str, str]] = None, save_version: str = None) -> "DataCatalog": """Create a ``DataCatalog`` instance from configuration. This is a factory method used to instantiate ``DataCatalog`` with configuration parsed from configuration files. Parameters ---------- catalog : An omegaconf.dictconfig.DictConfig object This is the catalog configuration with information for instantiating the DataSets data_dir : str The directory where the data is stored. dataset_names : List of str (default=None) The names of the datasets to instantiate. If None, all datasets in the catalog configuration will be instantiated. ml_stages : str or list of str The type of the processed data for which datasets should be created: `train`, `val`, `test`. load_versions: str or dict (default=None) A mapping between dataset names and versions (i.e. MLflow run IDs) to load. If only one value is passed, it is assumed that it applies to all datasets. save_version: str (default=None) Version string (i.e. MLflow run ID) to be used for ``save`` operations by all datasets that extend the ``AbstractVersionedDataSet`` class. Return ______ An instantiated ``DataCatalog`` containing all specified data sets, created and ready to use. Raise _____ DataSetError: When the method fails to create any of the data sets from their config. """ default_load_version = None if load_versions is None: load_versions = {} elif isinstance(load_versions, dict): load_versions = copy.deepcopy(load_versions) else: default_load_version = load_versions load_versions = {} ml_stages = as_list(ml_stages) if ml_stages is not None else [ 'train', 'test' ] config = parse_catalog_configuration(catalog, data_dir) dataset_names = as_list( dataset_names) if dataset_names is not None else config.keys() datasets = {} for ds_name in dataset_names: for stage in config[ds_name].ml_stages: if stage in ml_stages: ds_config = OmegaConf.masked_copy(config, [ds_name])[ds_name] ds_config.pop('ml_stages') ds_config.ml_stage = stage ds_config.load_run_id = load_versions.get( ds_name, default_load_version) ds_config.save_run_id = save_version datasets[ f'{ds_name}_{stage}'] = AbstractDataSet.from_config( ds_config) return cls(datasets=datasets)