def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults_no_root_directory( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately This test does not set the optional root_directory parameter """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=FilesystemStoreBackendDefaults(), ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def substitute_all_config_variables(data, replace_variables_dict): """ Substitute all config variables of the form ${SOME_VARIABLE} in a dictionary-like config object for their values. The method traverses the dictionary recursively. :param data: :param replace_variables_dict: :return: a dictionary with all the variables replaced with their values """ if isinstance(data, DataContextConfig): data = DataContextConfigSchema().dump(data) if isinstance(data, dict) or isinstance(data, OrderedDict): return { k: substitute_all_config_variables(v, replace_variables_dict) for k, v in data.items() } elif isinstance(data, list): return [ substitute_all_config_variables(v, replace_variables_dict) for v in data ] return substitute_config_variable(data, replace_variables_dict)
def test_DataContextConfig_with_BaseStoreBackendDefaults_and_simple_defaults( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Ensure that a very simple DataContextConfig setup with many defaults is created accurately and produces a valid DataContextConfig """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=BaseStoreBackendDefaults(), ) desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources=default_pandas_datasource_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_create_pandas_datasource(data_context, tmp_path_factory): basedir = tmp_path_factory.mktemp('test_create_pandas_datasource') name = "test_pandas_datasource" class_name = "PandasDatasource" data_context.add_datasource(name, class_name=class_name, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": str(basedir) } }) data_context_config = data_context.get_config() assert name in data_context_config["datasources"] assert data_context_config["datasources"][name]["class_name"] == class_name # assert data_context_config["datasources"][name]["type"] == type_ # We should now see updated configs # Finally, we should be able to confirm that the folder structure is as expected with open( os.path.join(data_context.root_directory, "great_expectations.yml"), "r") as data_context_config_file: data_context_file_config = yaml.load(data_context_config_file) assert data_context_file_config["datasources"][ name] == DataContextConfigSchema().dump( data_context_config)["datasources"][name] # We should have added a default generator built from the default config assert data_context_file_config["datasources"][name]["batch_kwargs_generators"]["subdir_reader"]["class_name"] == \ "SubdirReaderBatchKwargsGenerator"
def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately This test sets the root_dir parameter """ test_root_directory = "test_root_dir" store_backend_defaults = FilesystemStoreBackendDefaults( root_directory=test_root_directory ) data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=store_backend_defaults, ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config ) # Add root_directory to stores and data_docs desired_config["stores"][desired_config["expectations_store_name"]][ "store_backend" ]["root_directory"] = test_root_directory desired_config["stores"][desired_config["validations_store_name"]]["store_backend"][ "root_directory" ] = test_root_directory desired_config["stores"][desired_config["checkpoint_store_name"]]["store_backend"][ "root_directory" ] = test_root_directory desired_config["data_docs_sites"]["local_site"]["store_backend"][ "root_directory" ] = test_root_directory data_context_config_schema = DataContextConfigSchema() assert filter_properties_dict( properties=data_context_config_schema.dump(data_context_config) ) == filter_properties_dict(properties=desired_config) assert DataContext.validate_config(project_config=data_context_config)
def test_override_general_defaults( construct_data_context_config, default_pandas_datasource_config, default_spark_datasource_config, ): """ What does this test and why? A DataContextConfig should be able to be created by passing items into the constructor that override any defaults. It should also be able to handle multiple datasources, even if they are configured with a dictionary or a DatasourceConfig. """ data_context_config = DataContextConfig( config_version=999, plugins_directory="custom_plugins_directory", config_variables_file_path="custom_config_variables_file_path", datasources={ "my_spark_datasource": { "data_asset_type": { "class_name": "SparkDFDataset", "module_name": "great_expectations.dataset", }, "class_name": "SparkDFDatasource", "module_name": "great_expectations.datasource", "batch_kwargs_generators": {}, }, "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ), }, stores={ "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "expectations_S3_store2": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "validations_S3_store2": { "class_name": "ValidationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "custom_evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, }, expectations_store_name="custom_expectations_store_name", validations_store_name="custom_validations_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", data_docs_sites={ "s3_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", }, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, }, "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, }, }, validation_operators={ "custom_action_list_operator": { "class_name": "ActionListValidationOperator", "action_list": [ { "name": "custom_store_validation_result", "action": {"class_name": "CustomStoreValidationResultAction"}, }, { "name": "store_evaluation_params", "action": {"class_name": "StoreEvaluationParametersAction"}, }, { "name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}, }, ], } }, anonymous_usage_statistics={"enabled": True}, ) desired_stores = { "custom_evaluation_parameter_store": {"class_name": "EvaluationParameterStore"}, "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "expectations_S3_store2": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "validations_S3_store2": { "class_name": "ValidationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, }, "s3_site": { "class_name": "SiteBuilder", "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", }, }, } desired_validation_operators = { "custom_action_list_operator": { "class_name": "ActionListValidationOperator", "action_list": [ { "name": "custom_store_validation_result", "action": {"class_name": "CustomStoreValidationResultAction"}, }, { "name": "store_evaluation_params", "action": {"class_name": "StoreEvaluationParametersAction"}, }, { "name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}, }, ], } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources={ **default_pandas_datasource_config, **default_spark_datasource_config, }, config_version=999.0, expectations_store_name="custom_expectations_store_name", validations_store_name="custom_validations_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", stores=desired_stores, validation_operators=desired_validation_operators, data_docs_sites=desired_data_docs_sites_config, plugins_directory="custom_plugins_directory", ) desired_config["config_variables_file_path"] = "custom_config_variables_file_path" data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_DatabaseStoreBackendDefaults_using_all_parameters( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Make sure that DatabaseStoreBackendDefaults parameters are handled appropriately E.g. Make sure that default_credentials is ignored if individual store credentials are passed """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=DatabaseStoreBackendDefaults( default_credentials={ "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, expectations_store_credentials={ "drivername": "custom_expectations_store_drivername", "host": "custom_expectations_store_host", "port": "custom_expectations_store_port", "username": "******", "password": "******", "database": "custom_expectations_store_database", }, validations_store_credentials={ "drivername": "custom_validations_store_drivername", "host": "custom_validations_store_host", "port": "custom_validations_store_port", "username": "******", "password": "******", "database": "custom_validations_store_database", }, expectations_store_name="custom_expectations_database_store_name", validations_store_name="custom_validations_database_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", ), ) # Create desired config desired_stores_config = { "custom_evaluation_parameter_store_name": { "class_name": "EvaluationParameterStore" }, "custom_expectations_database_store_name": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "database": "custom_expectations_store_database", "drivername": "custom_expectations_store_drivername", "host": "custom_expectations_store_host", "password": "******", "port": "custom_expectations_store_port", "username": "******", }, }, }, "custom_validations_database_store_name": { "class_name": "ValidationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "database": "custom_validations_store_database", "drivername": "custom_validations_store_drivername", "host": "custom_validations_store_host", "password": "******", "port": "custom_validations_store_port", "username": "******", }, }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="custom_expectations_database_store_name", validations_store_name="custom_validations_database_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_DatabaseStoreBackendDefaults( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Make sure that using DatabaseStoreBackendDefaults as the store_backend_defaults applies appropriate defaults, including default_credentials getting propagated to stores and not data_docs """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=DatabaseStoreBackendDefaults( default_credentials={ "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, ), ) # Create desired config desired_stores_config = { "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"}, "expectations_database_store": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, }, }, "validations_database_store": { "class_name": "ValidationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_database_store", validations_store_name="validations_database_store", evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_GCSStoreBackendDefaults_using_all_parameters( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Make sure that GCSStoreBackendDefaults parameters are handled appropriately E.g. Make sure that default_bucket_name is ignored if individual bucket names are passed """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=GCSStoreBackendDefaults( default_bucket_name="custom_default_bucket_name", default_project_name="custom_default_project_name", expectations_store_bucket_name="custom_expectations_store_bucket_name", validations_store_bucket_name="custom_validations_store_bucket_name", data_docs_bucket_name="custom_data_docs_store_bucket_name", expectations_store_project_name="custom_expectations_store_project_name", validations_store_project_name="custom_validations_store_project_name", data_docs_project_name="custom_data_docs_store_project_name", expectations_store_prefix="custom_expectations_store_prefix", validations_store_prefix="custom_validations_store_prefix", data_docs_prefix="custom_data_docs_prefix", expectations_store_name="custom_expectations_GCS_store_name", validations_store_name="custom_validations_GCS_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", ), ) # Create desired config desired_stores_config = { "custom_evaluation_parameter_store_name": { "class_name": "EvaluationParameterStore" }, "custom_expectations_GCS_store_name": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "custom_expectations_store_bucket_name", "project": "custom_expectations_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_expectations_store_prefix", }, }, "custom_validations_GCS_store_name": { "class_name": "ValidationsStore", "store_backend": { "bucket": "custom_validations_store_bucket_name", "project": "custom_validations_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_validations_store_prefix", }, }, } desired_data_docs_sites_config = { "gcs_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "custom_data_docs_store_bucket_name", "project": "custom_data_docs_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_data_docs_prefix", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="custom_expectations_GCS_store_name", validations_store_name="custom_validations_GCS_store_name", evaluation_parameter_store_name="custom_evaluation_parameter_store_name", stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_GCSStoreBackendDefaults( construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Make sure that using GCSStoreBackendDefaults as the store_backend_defaults applies appropriate defaults, including default_bucket_name & default_project_name getting propagated to all stores. """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=GCSStoreBackendDefaults( default_bucket_name="my_default_bucket", default_project_name="my_default_project", ), ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_stores_config = { "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"}, "expectations_GCS_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "expectations", }, }, "validations_GCS_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "validations", }, }, } desired_data_docs_sites_config = { "gcs_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "data_docs", }, } } desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_GCS_store", validations_store_name="validations_GCS_store", evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_S3StoreBackendDefaults_and_simple_defaults_with_variable_sub( monkeypatch, construct_data_context_config, default_pandas_datasource_config ): """ What does this test and why? Ensure that a very simple DataContextConfig setup with many defaults is created accurately and produces a valid DataContextConfig """ monkeypatch.setenv("SUBSTITUTED_BASE_DIRECTORY", "../data/") data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": DatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "${SUBSTITUTED_BASE_DIRECTORY}", } }, ) }, store_backend_defaults=S3StoreBackendDefaults( default_bucket_name="my_default_bucket" ), ) # Create desired config desired_stores_config = { "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"}, "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "expectations", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "validations", }, }, } desired_data_docs_sites_config = { "s3_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "data_docs", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics.data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_S3_store", validations_store_name="validations_S3_store", evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) desired_config["datasources"]["my_pandas_datasource"]["batch_kwargs_generators"][ "subdir_reader" ]["base_directory"] = "${SUBSTITUTED_BASE_DIRECTORY}" data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump(data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config) data_context = BaseDataContext(project_config=data_context_config) assert ( data_context.datasources["my_pandas_datasource"] .get_batch_kwargs_generator("subdir_reader") ._base_directory == "../data/" )
def test_create_data_context_and_config_vars_in_code(tmp_path_factory, monkeypatch): """ What does this test and why? Creating a DataContext via .create(), then using .save_config_variable() to save a variable that will eventually be substituted (e.g. ${SOME_VAR}) should result in the proper escaping of $. This is in response to issue #2196 """ project_path = str(tmp_path_factory.mktemp("data_context")) context = ge.DataContext.create( project_root_dir=project_path, usage_statistics_enabled=False, ) CONFIG_VARS = { "DB_HOST": "${DB_HOST_FROM_ENV_VAR}", "DB_NAME": "DB_NAME", "DB_USER": "******", "DB_PWD": "pas$word", } for k, v in CONFIG_VARS.items(): context.save_config_variable(k, v) config_vars_file_contents = context._load_config_variables_file() # Add escaping for DB_PWD since it is not of the form ${SOMEVAR} or $SOMEVAR CONFIG_VARS_WITH_ESCAPING = CONFIG_VARS.copy() CONFIG_VARS_WITH_ESCAPING["DB_PWD"] = r"pas\$word" # Ensure all config vars saved are in the config_variables.yml file # and that escaping was added for "pas$word" -> "pas\$word" assert all(item in config_vars_file_contents.items() for item in CONFIG_VARS_WITH_ESCAPING.items()) assert not all(item in config_vars_file_contents.items() for item in CONFIG_VARS.items()) # Add env var for substitution monkeypatch.setenv("DB_HOST_FROM_ENV_VAR", "DB_HOST_FROM_ENV_VAR_VALUE") datasource_config = DatasourceConfig( class_name="SqlAlchemyDatasource", credentials={ "drivername": "postgresql", "host": "$DB_HOST", "port": "65432", "database": "${DB_NAME}", "username": "******", "password": "******", }, ) datasource_config_schema = DatasourceConfigSchema() # use context.add_datasource to test this by adding a datasource with values to substitute. context.add_datasource(initialize=False, name="test_datasource", **datasource_config_schema.dump(datasource_config)) assert context.list_datasources()[0]["credentials"] == { "drivername": "postgresql", "host": "DB_HOST_FROM_ENV_VAR_VALUE", "port": "65432", "database": "DB_NAME", "username": "******", # Note masking of "password" field "password": "******", } # Check context substitutes escaped variables appropriately data_context_config_schema = DataContextConfigSchema() context_with_variables_substituted_dict = data_context_config_schema.dump( context.get_config_with_variables_substituted()) test_datasource_credentials = context_with_variables_substituted_dict[ "datasources"]["test_datasource"]["credentials"] assert test_datasource_credentials["host"] == "DB_HOST_FROM_ENV_VAR_VALUE" assert test_datasource_credentials["username"] == "DB_USER" assert test_datasource_credentials["password"] == "pas$word" assert test_datasource_credentials["database"] == "DB_NAME" # Ensure skip_if_substitution_variable=False works as documented context.save_config_variable("escaped", "$SOME_VAR", skip_if_substitution_variable=False) context.save_config_variable("escaped_curly", "${SOME_VAR}", skip_if_substitution_variable=False) config_vars_file_contents = context._load_config_variables_file() assert config_vars_file_contents["escaped"] == r"\$SOME_VAR" assert config_vars_file_contents["escaped_curly"] == r"\${SOME_VAR}"
DatasourceConfig, DatasourceConfigSchema, ) from great_expectations.data_context.util import ( file_relative_path, substitute_all_config_variables, substitute_config_variable, ) from great_expectations.exceptions import InvalidConfigError, MissingConfigVariableError from tests.data_context.conftest import create_data_context_files yaml = YAML() yaml.indent(mapping=2, sequence=4, offset=2) yaml.default_flow_style = False dataContextConfigSchema = DataContextConfigSchema() def test_config_variables_on_context_without_config_variables_filepath_configured( data_context_without_config_variables_filepath_configured, ): # test the behavior on a context that does not config_variables_filepath (the location of # the file with config variables values) configured. context = data_context_without_config_variables_filepath_configured # an attempt to save a config variable should raise an exception with pytest.raises(InvalidConfigError) as exc: context.save_config_variable("var_name_1", {"n1": "v1"}) assert ("'config_variables_file_path' property is not found in config"