def test_env_dict_initialized_with_replaced_env_dict(): a = EnvDict({'a': {'b': 1}}) a_mod = a._replace_flatten_keys({'env__a__b': 2}) b = EnvDict(a_mod) # make sure the new object has the updated values assert b['a']['b'] == 2
def decorator(fn): _validate_and_modify_signature(fn) try: # FIXME: we should deprecate initializing from a decorator # with a dictionary, it isn't useful. leaving it for now if isinstance(source, Mapping): env_dict = EnvDict(source) else: # when the decorator is called without args, look for # 'env.yaml' env_dict = EnvDict.find(source or 'env.yaml') except Exception as e: raise RuntimeError('Failed to resolve environment using ' '@with_env decorator in function "{}". ' 'Tried to call Env with argument: {}'.format( _get_function_name_w_module(fn), source)) from e fn._env_dict = env_dict @wraps(fn) def wrapper(*args, **kwargs): to_replace = { k: v for k, v in kwargs.items() if k.startswith('env__') } for key in to_replace.keys(): kwargs.pop(key) env_dict_new = env_dict._replace_flatten_keys(to_replace) try: Env._init_from_decorator(env_dict_new, _get_function_name_w_module(fn)) except Exception as e: current = Env.load() raise RuntimeError('Failed to initialize environment using ' '@with_env decorator in function "{}". ' 'Current environment: {}'.format( _get_function_name_w_module(fn), repr(current))) from e Env._ref = _get_function_name_w_module(fn) try: res = fn(Env.load(), *args, **kwargs) except Exception as e: Env.end() raise e Env.end() return res return wrapper
def _process_file_dir_or_glob(parser, dagspec_arg=None): """ Process a file entry point file, directory or glob-like pattern, the initialized dag and parsed args Parameters ---------- parser : CustomParser CLI arg parser """ # NOTE: we must use parser.parse_entry_point_value() instead or # args.parse_args because calling the latter wont allow us to add more # cli parameters, but we want that to expose parms from env entry_point_value = dagspec_arg or parser.parse_entry_point_value() entry = EntryPoint(entry_point_value) if entry.type in {EntryPoint.Directory, EntryPoint.Pattern}: # pipelines initialized from directories or patterns cannot be # parametrized path_to_env = None # file else: path_to_env = default.path_to_env_from_spec(entry_point_value) if path_to_env: env_dict = EnvDict(path_to_env, path_to_here=Path(entry_point_value).parent if entry.type == EntryPoint.File else None) _add_cli_args_from_env_dict_keys(parser, env_dict) args = parser.parse_args() dagspec_arg = dagspec_arg or args.entry_point if hasattr(args, 'log'): if args.log is not None: logging.basicConfig(level=args.log.upper()) entry_point = EntryPoint(dagspec_arg) # directory if entry_point.type == EntryPoint.Directory: dag = DAGSpec.from_directory(dagspec_arg).to_dag() # pattern elif entry_point.type == EntryPoint.Pattern: dag = DAGSpec.from_files(dagspec_arg).to_dag() # file else: if path_to_env: # and replace keys depending on passed cli args replaced = _env_keys_to_override(args, parser.static_args) env = env_dict._replace_flatten_keys(replaced) dag = DAGSpec(dagspec_arg, env=env).to_dag() else: dag = DAGSpec(dagspec_arg).to_dag() return dag, args
def test_replace_value_casts_if_possible(): env = EnvDict({'a': False, 'b': 1, 'c': 1.1}) env._replace_value('True', ['a']) env._replace_value('2', ['b']) env._replace_value('2.2', ['c']) assert env.a is True assert env.b == 2 assert env.c == 2.2
def test_env_dict_initialized_with_env_dict(data, keys): original = EnvDict(data) env = EnvDict(original) # ensure we initialized the object correctly assert repr(env) assert str(env) # check default keys are correctly copied assert original._default_keys == env._default_keys # check we can access nested keys for key in keys: env = env[key] assert env == 1
def __init__(self, source='env.yaml'): """Start the environment Parameters ---------- source: dict, pathlib.Path, str, optional If dict, loads it directly, if pathlib.Path or path, reads the file (assumes yaml format). Raises ------ FileNotFoundError If source is None and an environment file cannot be found automatically RuntimeError If one environment has already started Returns ------- ploomber.Env An environment object """ if not isinstance(source, EnvDict): # try to initialize an EnvDict to perform validation, if any # errors occur, discard object try: source = EnvDict(source) except Exception: Env.__instance = None raise self._data = source self._fn_name = None
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch): """ DAGSpec can be initialized with a path to a spec or a dictionary, but they have a slightly different behavior. This ensure the cli passes the path, instead of a dictionary """ mock_DAGSpec = Mock(wraps=parsers.DAGSpec) mock_default_path_to_env = Mock( wraps=parsers.default.path_to_env_from_spec) mock_EnvDict = Mock(wraps=parsers.EnvDict) monkeypatch.setattr(sys, 'argv', ['python']) monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec) monkeypatch.setattr(parsers.default, 'path_to_env_from_spec', mock_default_path_to_env) monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict) parser = CustomParser() with parser: pass dag, args = _custom_command(parser) # ensure called using the path to the yaml spec mock_DAGSpec.assert_called_once_with('pipeline.yaml', env=EnvDict({'sample': False}, path_to_here='.')) # and EnvDict initialized from env.yaml mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()), path_to_here=Path('.'))
def test_default_with_root(monkeypatch): mock = Mock(return_value='some_value') monkeypatch.setattr(default, 'find_root_recursively', mock) env = EnvDict(dict()) assert env.root == 'some_value'
def test_default(monkeypatch): monkeypatch.setattr(getpass, 'getuser', Mock(return_value='User')) monkeypatch.setattr(os, 'getcwd', Mock(return_value='/some_path')) env = EnvDict(dict()) assert env.cwd == str(Path('/some_path').resolve()) assert env.user == 'User'
def test_attribute_error_message(): env = EnvDict({'user': '******', 'cwd': 'cwd', 'root': 'root'}) with pytest.raises(AttributeError) as excinfo_attr: env.aa with pytest.raises(KeyError) as excinfo_key: env['aa'] assert str(excinfo_attr.value) == f"{env!r} object has no atttribute 'aa'" assert str(excinfo_key.value) == f'"{env!r} object has no key \'aa\'"'
def test_error_when_loaded_obj_is_not_dict(content, type_, tmp_directory): path = Path(tmp_directory, 'file.yaml') path.write_text(content) with pytest.raises(ValueError) as excinfo: EnvDict('file.yaml') expected = ("Expected object loaded from 'file.yaml' to be " "a dict but got '{}' instead, " "verify the content").format(type_) assert str(excinfo.value) == expected
def test_find(tmp_directory): path = Path('some', 'dir') path.mkdir(parents=True) Path('some', 'env.yaml').write_text('key: value') expected_here = str(Path('some').resolve()) os.chdir(path) env = EnvDict.find('env.yaml') assert env.cwd == str(Path('.').resolve()) assert env.here == expected_here
def test_serialize_env_dict(): # this tests an edge case due to EnvDict's implementation: to enable # accessing values in the underlying dictionary as attributes, we are # customizing __getattr__, however, when an object is unserialized, # Python tries to look for __getstate__ (which triggers calling # __getattr__), since it cannot find it, it will go to __getitem__ # (given the current implementation of __getattr__). But __getitem__ # uses self.preprocessed. At unserialization time, this attribute does # not exist yet!, which will cause another call to __getattr__. To avoid # this recursive loop, we have to prevent special methods to call # __getitem__ if they do not exist - EnvDict and Env objects are not # expected to be serialized but we have fix it anyway env = EnvDict({'a': 1}) assert pickle.loads(pickle.dumps(env))
def test_expand_raw_dict_nested(): mapping = EnvDict({'key': 'value'}) d = { 'section': { 'some_settting': '{{key}}' }, 'list': ['{{key}}', '{{key}}'] } assert (expand_raw_dictionary(d, mapping) == { 'section': { 'some_settting': 'value' }, 'list': ['value', 'value'] })
def test_expand_raw_dictionaries_and_extract_tags(): mapping = EnvDict({'key': 'value'}) d = [{'some_setting': '{{key}}'}, {'another_setting': '{{key}}'}] expanded, tags = expand_raw_dictionaries_and_extract_tags(d, mapping) assert expanded == ( { 'some_setting': 'value', }, { 'another_setting': 'value' }, ) assert tags == {'key'}
def test_adds_default_keys_if_they_dont_exist(monkeypatch): monkeypatch.setattr(getpass, 'getuser', Mock(return_value='User')) monkeypatch.setattr(os, 'getcwd', Mock(return_value='/some_path')) mock = Mock(return_value='some_value') monkeypatch.setattr(default, 'find_root_recursively', mock) monkeypatch.setattr(expand.default, 'find_root_recursively', mock) env = EnvDict({'a': 1}, path_to_here='/dir') assert env.cwd == str(Path('/some_path').resolve()) assert env.here == str(Path('/dir').resolve()) assert env.user == 'User' assert env.root == 'some_value' assert env.default_keys == {'cwd', 'here', 'user', 'root', 'now'}
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch): """ DAGSpec can be initialized with a path to a spec or a dictionary, but they have a slightly different behavior. This ensure the cli passes the path, instead of a dictionary """ mock_DAGSpec = Mock(wraps=parsers.DAGSpec) mock_default_path_to_env = Mock( wraps=parsers.default.path_to_env_from_spec) mock_EnvDict = Mock(wraps=parsers.EnvDict) monkeypatch.setattr(sys, 'argv', ['python']) monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec) monkeypatch.setattr(parsers.default, 'path_to_env_from_spec', mock_default_path_to_env) monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict) # ensure current timestamp does not change mock = Mock() mock.datetime.now().isoformat.return_value = 'current-timestamp' monkeypatch.setattr(expand, "datetime", mock) parser = CustomParser() with parser: pass dag, args = parser.load_from_entry_point_arg() # ensure called using the path to the yaml spec mock_DAGSpec.assert_called_once_with('pipeline.yaml', env=EnvDict({'sample': False}, path_to_here='.')) # and EnvDict initialized from env.yaml mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()), path_to_here=Path('.'))
def test_replace_nested_flatten_key_env_dict(): env = EnvDict({'a': {'b': 1}}) new_env = env._replace_flatten_key(2, 'env__a__b') assert new_env.a.b == 2 and env is not new_env # must return a copy
def test_error_when_flatten_key_doesnt_exist(): env = EnvDict({'a': 1}) with pytest.raises(KeyError): env._replace_flatten_key(2, 'env__b')
def test_default_keys(kwargs, expected): assert EnvDict(**kwargs).default_keys == expected
def test_default_with_here_absolute(tmp_directory): here = str(Path(tmp_directory, 'dir').resolve()) env = EnvDict(dict(), path_to_here=here) assert env.here == here
def test_replace_flatten_key_env_dict(): env = EnvDict({'a': 1}) new_env = env._replace_flatten_key(2, 'env__a') assert new_env.a == 2 and env is not new_env # must return a copy
def test_default_with_here_relative(tmp_directory): Path('dir').mkdir() env = EnvDict(dict(), path_to_here='dir') assert env.here == str(Path(tmp_directory, 'dir').resolve())
def test_expand_raw_dictionary(): mapping = EnvDict({'key': 'value'}) d = {'some_setting': '{{key}}'} assert expand_raw_dictionary(d, mapping) == {'some_setting': 'value'}
def test_replace_nested_flatten_keys_env_dict(): env = EnvDict({'a': {'b': 1, 'c': 1}}) new_env = env._replace_flatten_keys({'env__a__b': 2, 'env__a__c': 2}) assert (new_env.a.b == 2 and new_env.a.c == 2 and env is not new_env) # must return a copy
def _init(self, data, env, lazy_import, reload, parent_path, look_up_project_root_recursively): self._lazy_import = lazy_import # initialized with a path to a yaml file... if isinstance(data, (str, Path)): # TODO: test this if parent_path is not None: raise ValueError('parent_path must be None when ' f'initializing {type(self).__name__} with ' 'a path to a YAML spec') # resolve the parent path to make sources and products unambiguous # even if the current working directory changes self._path = Path(data).resolve() self._parent_path = str(self._path.parent) if not Path(data).is_file(): raise FileNotFoundError( 'Error initializing DAGSpec with argument ' f'{data!r}: Expected it to be a path to a YAML file, but ' 'such file does not exist') content = Path(data).read_text() try: data = yaml.safe_load(content) except (yaml.parser.ParserError, yaml.constructor.ConstructorError) as e: error = e else: error = None if error: if '{{' in content or '}}' in content: raise DAGSpecInitializationError( 'Failed to initialize spec. It looks like ' 'you\'re using placeholders (i.e. {{placeholder}}). ' 'Make sure values are enclosed in parentheses ' '(e.g. key: "{{placeholder}}"). Original ' 'parser error:\n\n' f'{error}') else: raise error # initialized with a dictionary... else: self._path = None # FIXME: add test cases, some of those features wont work if # _parent_path is None. We should make sure that we either raise # an error if _parent_path is needed or use the current working # directory if it's appropriate - this is mostly to make relative # paths consistent: they should be relative to the file that # contains them self._parent_path = (None if not parent_path else str( Path(parent_path).resolve())) self.data = data if isinstance(self.data, list): self.data = {'tasks': self.data} # validate keys defined at the top (nested keys are not validated here) self._validate_top_keys(self.data, self._path) logger.debug('DAGSpec enviroment:\n%s', pp.pformat(env)) env = env or dict() path_to_defaults = default.path_to_env_from_spec( path_to_spec=self._path) if path_to_defaults: defaults = yaml.safe_load(Path(path_to_defaults).read_text()) self.env = EnvDict(env, path_to_here=self._parent_path, defaults=defaults) else: self.env = EnvDict(env, path_to_here=self._parent_path) self.data, tags = expand_raw_dictionary_and_extract_tags( self.data, self.env) logger.debug('Expanded DAGSpec:\n%s', pp.pformat(data)) # if there is a "location" top key, we don't have to do anything else # as we will just load the dotted path when .to_dag() is called if 'location' not in self.data: Meta.initialize_inplace(self.data) import_tasks_from = self.data['meta']['import_tasks_from'] if import_tasks_from is not None: # when using a relative path in "import_tasks_from", we must # make it absolute... if not Path(import_tasks_from).is_absolute(): # use _parent_path if there is one if self._parent_path: self.data['meta']['import_tasks_from'] = str( Path(self._parent_path, import_tasks_from)) # otherwise just make it absolute else: self.data['meta']['import_tasks_from'] = str( Path(import_tasks_from).resolve()) imported = yaml.safe_load( Path(self.data['meta']['import_tasks_from']).read_text()) if self.env is not None: (imported, tags_other) = expand_raw_dictionaries_and_extract_tags( imported, self.env) tags = tags | tags_other # relative paths here are relative to the file where they # are declared base_path = Path(self.data['meta']['import_tasks_from']).parent for task in imported: add_base_path_to_source_if_relative(task, base_path=base_path) self.data['tasks'].extend(imported) # check if there are any params declared in env, not used in # in the pipeline extra = set(self.env) - self.env.default_keys - tags if extra: warnings.warn('The following placeholders are declared in the ' 'environment but ' f'unused in the spec: {extra}') self.data['tasks'] = [ normalize_task(task) for task in self.data['tasks'] ] # NOTE: for simple projects, project root is the parent folder # of pipeline.yaml, for package projects is the parent folder # of setup.py if look_up_project_root_recursively: project_root = ( None if not self._parent_path else default.find_root_recursively( starting_dir=self._parent_path, filename=None if not self._path else self._path.name)) else: project_root = self._parent_path # make sure the folder where the pipeline is located is in sys.path # otherwise dynamic imports needed by TaskSpec will fail with add_to_sys_path(self._parent_path, chdir=False): self.data['tasks'] = [ TaskSpec(t, self.data['meta'], project_root=project_root, lazy_import=lazy_import, reload=reload) for t in self.data['tasks'] ] else: self.data['meta'] = Meta.empty()
def test_add_cli_args_from_env_dict_keys(): parser = ArgumentParser() _add_cli_args_from_env_dict_keys(parser, EnvDict({'a': 1})) assert {action.dest for action in parser._actions} == {'env__a', 'help'}