def get_config( file_glob: str, globals_dict: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: conf_paths = [str(BASE_DIR / "conf/base"), str(BASE_DIR / "conf/local")] config_loader = TemplatedConfigLoader( conf_paths, globals_pattern="*globals.yml", globals_dict=globals_dict ) return config_loader.get(file_glob)
def get_config(env, patterns, globals_pattern="globals*.y*ml"):#(env, patterns): if not isinstance(env, list): env = [env] # Initialise a ConfigLoader conf_loader = TemplatedConfigLoader([f"conf/{single_env}" for single_env in env], globals_pattern=globals_pattern) # Load the data catalog configuration from catalog.yml conf= conf_loader.get(*patterns) return conf
def register_config_loader( self, conf_paths: Iterable[str] ) -> ConfigLoader: return TemplatedConfigLoader( conf_paths, globals_dict=self.read_env(), )
def test_catalog_with_jinja2_syntax(self, tmp_path, conf_paths, template_config): (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader( conf_paths, globals_dict=template_config).get("catalog*.yml") expected_catalog = { "fast-trains": { "type": "MemoryDataSet" }, "fast-cars": { "type": "pandas.CSVDataSet", "filepath": "s3a://boat-and-car-bucket/fast-cars.csv", "save_args": { "index": True }, }, "slow-trains": { "type": "MemoryDataSet" }, "slow-cars": { "type": "pandas.CSVDataSet", "filepath": "s3a://boat-and-car-bucket/slow-cars.csv", "save_args": { "index": True }, }, } assert catalog == expected_catalog
def test_catalog_parameterized_no_params_no_default( self, tmp_path, conf_paths): """Test parameterized config without input""" (tmp_path / "local").mkdir(exist_ok=True) with pytest.raises(ValueError, match="Failed to format pattern"): TemplatedConfigLoader(conf_paths).get("catalog*.yml")
def register_config_loader( self, conf_paths: Iterable[str] ) -> TemplatedConfigLoader: return TemplatedConfigLoader( conf_paths, globals_pattern="*globals.yml", globals_dict={}, )
def test_catalog_parameterized_empty_params_with_default( self, tmp_path, conf_paths): """Test parameterized config with empty globals dictionary""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader(conf_paths, globals_dict={}).get("catalog*.yml") assert catalog["boats"]["users"] == ["fred", "ron"]
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader: return TemplatedConfigLoader( conf_paths, globals_dict={ "commit_id": getenv("KEDRO_KUBEFLOW_COMMIT", default=None), "branch_name": getenv("KEDRO_KUBEFLOW_BRANCH", default=None), }, )
def test_catalog_parameterized_exceptional(self, tmp_path, conf_paths, template_config_exceptional): """Test templating with mixed type replacement values going into one string""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader( conf_paths, globals_dict=template_config_exceptional).get("catalog*.yml") assert catalog["postcode"] == "NW10 2JK"
def register_config_loader( self, conf_paths: Iterable[str], env: str, extra_params: Dict[str, Any]) -> TemplatedConfigLoader: return TemplatedConfigLoader( conf_paths, globals_dict={ "AWS_S3_FEATURE_STORE_BUCKET": os.environ.get("AWS_S3_FEATURE_STORE_BUCKET"), }, )
def test_catlog_parameterized_no_params(self, tmp_path, conf_paths): """Test parameterized config without input""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader(conf_paths).get("catalog*.yml") assert catalog["boats"]["type"] == "${boat_data_type}" assert (catalog["boats"]["filepath"] == "${s3_bucket}/${raw_data_folder}/${boat_file_name}") assert catalog["boats"]["columns"]["id"] == "${string_type}" assert catalog["boats"]["columns"]["name"] == "${string_type}" assert catalog["boats"]["columns"]["top_speed"] == "${float_type}" assert catalog["boats"]["users"] == ["fred", "${write_only_user}"]
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader: globals = {} if not (set(['DB_HOST', 'DB_USER', 'DB_PASSWORD']) - set(os.environ.keys())): globals['dbconn'] = { "host": os.environ.get('DB_HOST'), "user": os.environ.get('DB_USER'), "password": os.environ.get('DB_PASSWORD'), } return TemplatedConfigLoader( conf_paths, globals_pattern="*globals.yml", globals_dict=globals )
def test_catalog_parameterized_w_globals(self, tmp_path, conf_paths): """Test parameterized config with globals yaml file""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader( conf_paths, globals_pattern="*globals.yml").get("catalog*.yml") assert catalog["boats"]["type"] == "SparkDataSet" assert (catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv") assert catalog["boats"]["columns"]["id"] == "VARCHAR" assert catalog["boats"]["columns"]["name"] == "VARCHAR" assert catalog["boats"]["columns"]["top_speed"] == "FLOAT" assert catalog["boats"]["users"] == ["fred", "ron"]
def test_catalog_parameterized_w_dict(self, tmp_path, conf_paths, template_config): """Test parameterized config with input from dictionary with values""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader( conf_paths, globals_dict=template_config).get("catalog*.yml") assert catalog["boats"]["type"] == "SparkDataSet" assert (catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv") assert catalog["boats"]["columns"]["id"] == "VARCHAR" assert catalog["boats"]["columns"]["name"] == "VARCHAR" assert catalog["boats"]["columns"]["top_speed"] == "FLOAT" assert catalog["boats"]["users"] == ["fred", "ron"]
def config_loader(self) -> ConfigLoader: import os config_loader = self._get_config_loader() # Add extra git parameters extra_params = self._extra_params or {} git_path_config = { "path": os.path.expanduser(extra_params.get("path", os.getcwd())), "before": extra_params.get("before", None), "after": extra_params.get("after", None), } return TemplatedConfigLoader( config_loader.conf_paths, globals_dict=git_path_config, )
def test_catalog_parameterized_advanced(self, tmp_path, conf_paths, template_config_advanced): """Test advanced templating (i.e. nested dicts, booleans, lists, etc.)""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader( conf_paths, globals_dict=template_config_advanced).get("catalog*.yml") assert catalog["planes"]["type"] == "SparkJDBCDataSet" assert catalog["planes"]["postgres_credentials"]["user"] == "Fakeuser" assert catalog["planes"]["postgres_credentials"][ "password"] == "F@keP@55word" assert catalog["planes"]["batch_size"] == 10000 assert catalog["planes"]["need_permission"] assert catalog["planes"]["secret_tables"] == [ "models", "pilots", "engines" ]
def test_catlog_parameterized_w_dict_namespaced(self, tmp_path, conf_paths, template_config, get_environ): """Test parameterized config with namespacing in the template values""" (tmp_path / "local").mkdir(exist_ok=True) catalog = TemplatedConfigLoader(conf_paths, globals_dict={ "global": template_config, "env": get_environ }).get("catalog*.yml") assert catalog["boats"]["type"] == "SparkDataSet" assert (catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv") assert catalog["boats"]["columns"]["id"] == "VARCHAR" assert catalog["boats"]["columns"]["name"] == "VARCHAR" assert catalog["boats"]["columns"]["top_speed"] == "FLOAT" assert catalog["boats"]["users"] == ["fred", "ron"]
def _create_config_loader( # pylint: disable=no-self-use self, conf_paths) -> ConfigLoader: return TemplatedConfigLoader(conf_paths)