def test_masked_copy_is_deep() -> None:
    cfg = OmegaConf.create({"a": {"b": 1, "c": 2}})
    expected = {"a": {"b": 1, "c": 2}}
    masked = OmegaConf.masked_copy(cfg, keys=["a"])
    assert masked == expected
    cfg.a.b = 2
    assert cfg != expected

    with pytest.raises(ValueError):
        OmegaConf.masked_copy("fail", [])  # type: ignore
def test_masked_copy_is_deep():
    cfg = OmegaConf.create({"a": {"b": 1, "c": 2}})
    expected = {"a": {"b": 1, "c": 2}}
    masked = OmegaConf.masked_copy(cfg, keys=["a"])
    assert masked == expected
    cfg.a.b = 2
    assert cfg != expected
Beispiel #3
0
def cli(cfg):
    log.info('Entering application')
    cfg.workdir = str(Path.cwd())
    fromdir = OmegaConf.select(cfg, 'fromdir', throw_on_missing=True)
    state = None
    if fromdir:
        fromdir = Path(to_absolute_path(fromdir))
        cfg_orig, state = from_workdir(fromdir, cfg.state)
        cfg_orig = OmegaConf.masked_copy(cfg_orig,
                                         cfg_orig.keys() - cfg.keys())
        cfg = OmegaConf.merge(cfg_orig, cfg)
    try:
        if cfg.seed is not None:
            log.info(f'Setting random seed: {cfg.seed}')
            torch.manual_seed(cfg.seed)
        device = cfg.device
        if cfg.anomaly:
            torch.autograd.set_detect_anomaly(True)
            log.warn('Setting anomaly detection on')
        cfg = instantiate_wf(cfg, fromdir, state)
        log.info(f'Moving to {device}...')
        cfg['task']['wf'].to(device)
        log.info(f'Moved to {device}')
        call(cfg['task'], _convert_='all')
    except hydra.errors.InstantiationException as e:
        raise e.__cause__ from None
    except KeyboardInterrupt:
        log.warning('Interrupted!')
Beispiel #4
0
def get_current_configuration_copy(file_name: str = "threeML_config.yml",
                                   overwrite: bool = False):
    """
    write a copy of the CURRENT configuration to the config directory
    """

    outfile: Path = get_path_of_user_config() / file_name

    if outfile.exists() and (not overwrite):

        raise RuntimeError(f"{outfile} exists! Set overwrite to True")

    else:

        _read_only_keys = ["LAT", "GBM", "catalogs"]

        _valid_keys = []

        for k, v in threeML_config.items():
            if k not in _read_only_keys:

                _valid_keys.append(k)

        config_copy = OmegaConf.masked_copy(threeML_config, _valid_keys)

        with outfile.open("w") as f:

            f.write(
                OmegaConf.to_yaml(config_copy, sort_keys=True, resolve=True))
Beispiel #5
0
def run_job(
    config: DictConfig,
    task_function: TaskFunction,
    job_dir_key: str,
    job_subdir_key: Optional[str],
    configure_logging: bool = True,
) -> "JobReturn":
    old_cwd = os.getcwd()
    working_dir = str(OmegaConf.select(config, job_dir_key))
    orig_hydra_cfg = HydraConfig.instance().cfg
    if job_subdir_key is not None:
        # evaluate job_subdir_key lazily.
        # this is running on the client side in sweep and contains things such as job:id which
        # are only available there.
        subdir = str(OmegaConf.select(config, job_subdir_key))
        working_dir = os.path.join(working_dir, subdir)
    try:
        ret = JobReturn()
        ret.working_dir = working_dir
        task_cfg = copy.deepcopy(config)
        hydra_cfg = OmegaConf.masked_copy(task_cfg, "hydra")
        # maintain parent to preserve interpolation links from hydra_cfg to job_cfg
        hydra_cfg._set_parent(task_cfg)
        with read_write(task_cfg):
            with open_dict(task_cfg):
                del task_cfg["hydra"]
        HydraConfig.instance().cfg = hydra_cfg  # type: ignore

        ret.cfg = task_cfg
        ret.hydra_cfg = hydra_cfg
        overrides = OmegaConf.to_container(config.hydra.overrides.task)
        assert isinstance(overrides, list)
        ret.overrides = overrides
        # handle output directories here
        Path(str(working_dir)).mkdir(parents=True, exist_ok=True)
        os.chdir(working_dir)

        if configure_logging:
            configure_log(config.hydra.job_logging, config.hydra.verbose)

        if config.hydra.output_subdir is not None:
            hydra_output = Path(config.hydra.output_subdir)
            _save_config(task_cfg, "config.yaml", hydra_output)
            _save_config(hydra_cfg, "hydra.yaml", hydra_output)
            _save_config(config.hydra.overrides.task, "overrides.yaml",
                         hydra_output)

        with env_override(hydra_cfg.hydra.job.env_set):
            ret.return_value = task_function(task_cfg)
        ret.task_name = JobRuntime.instance().get("name")

        _flush_loggers()

        return ret
    finally:
        HydraConfig.instance().cfg = orig_hydra_cfg
        os.chdir(old_cwd)
Beispiel #6
0
 def set_config(self, cfg: DictConfig) -> None:
     assert cfg is not None
     OmegaConf.set_readonly(cfg.hydra, True)
     assert OmegaConf.get_type(cfg, "hydra") == HydraConf
     # THis is emulating a node that is hidden.
     # It's quiet a hack but it will be much better once
     # https://github.com/omry/omegaconf/issues/280 is done
     # The motivation is that this allows for interpolations from the hydra node
     # into the user's config.
     self.cfg = OmegaConf.masked_copy(cfg, "hydra")  # type: ignore
     self.cfg.hydra._set_parent(cfg)  # type: ignore
Beispiel #7
0
def run_job(
    config: DictConfig,
    task_function: TaskFunction,
    job_dir_key: str,
    job_subdir_key: Optional[str],
) -> "JobReturn":
    old_cwd = os.getcwd()
    working_dir = str(OmegaConf.select(config, job_dir_key))
    if job_subdir_key is not None:
        # evaluate job_subdir_key lazily.
        # this is running on the client side in sweep and contains things such as job:id which
        # are only available there.
        subdir = str(OmegaConf.select(config, job_subdir_key))
        working_dir = os.path.join(working_dir, subdir)
    try:
        ret = JobReturn()
        ret.working_dir = working_dir
        task_cfg = copy.deepcopy(config)
        with read_write(task_cfg):
            with open_dict(task_cfg):
                del task_cfg["hydra"]
        ret.cfg = task_cfg
        ret.hydra_cfg = OmegaConf.create({"hydra": HydraConfig.get()})
        overrides = OmegaConf.to_container(config.hydra.overrides.task)
        assert isinstance(overrides, list)
        ret.overrides = overrides
        # handle output directories here
        Path(str(working_dir)).mkdir(parents=True, exist_ok=True)
        os.chdir(working_dir)

        configure_log(config.hydra.job_logging, config.hydra.verbose)

        hydra_cfg = OmegaConf.masked_copy(config, "hydra")
        assert isinstance(hydra_cfg, DictConfig)

        if config.hydra.output_subdir is not None:
            hydra_output = Path(config.hydra.output_subdir)
            _save_config(task_cfg, "config.yaml", hydra_output)
            _save_config(hydra_cfg, "hydra.yaml", hydra_output)
            _save_config(config.hydra.overrides.task, "overrides.yaml",
                         hydra_output)

        with env_override(hydra_cfg.hydra.job.env_set):
            ret.return_value = task_function(task_cfg)
        ret.task_name = JobRuntime.instance().get("name")

        # shut down logging to ensure job log files are closed.
        # If logging is still required after run_job caller is responsible to re-initialize it.
        logging.shutdown()

        return ret
    finally:
        os.chdir(old_cwd)
Beispiel #8
0
def run_job(
    config: DictConfig,
    task_function: TaskFunction,
    job_dir_key: str,
    job_subdir_key: Optional[str],
) -> "JobReturn":
    old_cwd = os.getcwd()
    working_dir = str(config.select(job_dir_key))
    if job_subdir_key is not None:
        # evaluate job_subdir_key lazily.
        # this is running on the client side in sweep and contains things such as job:id which
        # are only available there.
        subdir = str(config.select(job_subdir_key))
        working_dir = os.path.join(working_dir, subdir)
    try:
        ret = JobReturn()
        ret.working_dir = working_dir
        task_cfg = copy.deepcopy(config)
        del task_cfg["hydra"]
        ret.cfg = task_cfg
        hc: DictConfig = HydraConfig.instance()
        ret.hydra_cfg = copy.deepcopy(hc)
        overrides = OmegaConf.to_container(config.hydra.overrides.task)
        assert isinstance(overrides, list)
        ret.overrides = overrides
        # handle output directories here
        Path(str(working_dir)).mkdir(parents=True, exist_ok=True)
        os.chdir(working_dir)
        hydra_output = Path(config.hydra.output_subdir)

        configure_log(config.hydra.job_logging, config.hydra.verbose)

        hydra_cfg = OmegaConf.masked_copy(config, "hydra")
        assert isinstance(hydra_cfg, DictConfig)

        _save_config(task_cfg, "config.yaml", hydra_output)
        _save_config(hydra_cfg, "hydra.yaml", hydra_output)
        _save_config(config.hydra.overrides.task, "overrides.yaml",
                     hydra_output)
        ret.return_value = task_function(task_cfg)
        ret.task_name = JobRuntime.instance().get("name")
        return ret
    finally:
        os.chdir(old_cwd)
def test_masked_copy(
    in_cfg: Dict[str, Any], mask_keys: Union[str, List[str]], expected: Any
) -> None:
    cfg = OmegaConf.create(in_cfg)
    masked = OmegaConf.masked_copy(cfg, keys=mask_keys)
    assert masked == expected
def test_masked_copy(cfg, mask_keys, expected):
    cfg = OmegaConf.create(cfg)
    masked = OmegaConf.masked_copy(cfg, keys=mask_keys)
    assert masked == expected
Beispiel #11
0
 def set_config(self, cfg: DictConfig) -> None:
     assert cfg is not None
     OmegaConf.set_readonly(cfg.hydra, True)
     assert OmegaConf.get_type(cfg, "hydra") == HydraConf
     self.cfg = OmegaConf.masked_copy(cfg, "hydra")  # type: ignore
Beispiel #12
0
    def from_config(cls,
                    catalog: DictConfig,
                    data_dir: str,
                    dataset_names: List[str] = None,
                    ml_stages: Union[str, List[str]] = None,
                    load_versions: Union[str, Dict[str, str]] = None,
                    save_version: str = None) -> "DataCatalog":
        """Create a ``DataCatalog`` instance from configuration. This is a factory method 
        used to instantiate ``DataCatalog`` with configuration parsed from configuration files.
        
        Parameters
        ----------
        catalog : An omegaconf.dictconfig.DictConfig object
            This is the catalog configuration with information for instantiating the DataSets
        data_dir : str
            The directory where the data is stored.
        dataset_names : List of str (default=None)
            The names of the datasets to instantiate. If None, all datasets in the catalog
            configuration will be instantiated.
        ml_stages : str or list of str
            The type of the processed data for which datasets should be created: `train`, `val`, `test`.
        load_versions: str or dict (default=None)
            A mapping between dataset names and versions (i.e. MLflow run IDs) to load. If only one
            value is passed, it is assumed that it applies to all datasets.
        save_version: str (default=None)
            Version string (i.e. MLflow run ID) to be used for ``save`` operations by all datasets 
            that extend the ``AbstractVersionedDataSet`` class.

        Return
        ______
        An instantiated ``DataCatalog`` containing all specified data sets, created and ready to use.
        
        Raise
        _____
        DataSetError: When the method fails to create any of the data sets from their config.
        """
        default_load_version = None

        if load_versions is None:
            load_versions = {}
        elif isinstance(load_versions, dict):
            load_versions = copy.deepcopy(load_versions)
        else:
            default_load_version = load_versions
            load_versions = {}

        ml_stages = as_list(ml_stages) if ml_stages is not None else [
            'train', 'test'
        ]
        config = parse_catalog_configuration(catalog, data_dir)
        dataset_names = as_list(
            dataset_names) if dataset_names is not None else config.keys()

        datasets = {}
        for ds_name in dataset_names:
            for stage in config[ds_name].ml_stages:
                if stage in ml_stages:
                    ds_config = OmegaConf.masked_copy(config,
                                                      [ds_name])[ds_name]
                    ds_config.pop('ml_stages')
                    ds_config.ml_stage = stage
                    ds_config.load_run_id = load_versions.get(
                        ds_name, default_load_version)
                    ds_config.save_run_id = save_version
                    datasets[
                        f'{ds_name}_{stage}'] = AbstractDataSet.from_config(
                            ds_config)

        return cls(datasets=datasets)