def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults( construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=FilesystemStoreBackendDefaults(), ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_SparkDatasource_config(default_spark_datasource_config): datasource_config = LegacyDatasourceConfig( class_name="SparkDFDatasource", batch_kwargs_generators={}, ) desired_config = default_spark_datasource_config["my_spark_datasource"] datasource_config_schema = LegacyDatasourceConfigSchema() assert datasource_config_schema.dump(datasource_config) == desired_config
def test_PandasDatasource_config(default_pandas_datasource_config): datasource_config = LegacyDatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) desired_config = default_pandas_datasource_config["my_pandas_datasource"] datasource_config_schema = LegacyDatasourceConfigSchema() assert datasource_config_schema.dump(datasource_config) == desired_config
def test_SqlAlchemyDatasource_config(default_sql_alchemy_datasource_config): datasource_config = LegacyDatasourceConfig( class_name="SqlAlchemyDatasource", credentials={ "drivername": "custom_drivername", "host": "custom_host", "port": "custom_port", "username": "******", "password": "******", "database": "custom_database", }, ) desired_config = default_sql_alchemy_datasource_config[ "my_sql_alchemy_datasource"] datasource_config_schema = LegacyDatasourceConfigSchema() assert datasource_config_schema.dump(datasource_config) == desired_config
def test_override_general_defaults( construct_data_context_config, default_pandas_datasource_config, default_spark_datasource_config, ): """ What does this test and why? A DataContextConfig should be able to be created by passing items into the constructor that override any defaults. It should also be able to handle multiple datasources, even if they are configured with a dictionary or a LegacyDatasourceConfig. """ data_context_config = DataContextConfig( config_version=999, plugins_directory="custom_plugins_directory", config_variables_file_path="custom_config_variables_file_path", datasources={ "my_spark_datasource": { "data_asset_type": { "class_name": "SparkDFDataset", "module_name": "great_expectations.dataset", }, "class_name": "SparkDFDatasource", "module_name": "great_expectations.datasource", "batch_kwargs_generators": {}, }, "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ), }, stores={ "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "expectations_S3_store2": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "validations_S3_store2": { "class_name": "ValidationsStore", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", "prefix": "REPLACE_ME", }, }, "custom_evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, }, expectations_store_name="custom_expectations_store_name", validations_store_name="custom_validations_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", data_docs_sites={ "s3_site": { "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleS3StoreBackend", "bucket": "REPLACE_ME", }, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, }, "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, }, }, validation_operators={ "custom_action_list_operator": { "class_name": "ActionListValidationOperator", "action_list": [ { "name": "custom_store_validation_result", "action": { "class_name": "CustomStoreValidationResultAction" }, }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction" }, }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction" }, }, ], } }, anonymous_usage_statistics={"enabled": True}, ) desired_stores = { "custom_evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "expectations_S3_store2": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, "validations_S3_store2": { "class_name": "ValidationsStore", "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", "prefix": "REPLACE_ME", }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, }, "s3_site": { "class_name": "SiteBuilder", "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "REPLACE_ME", "class_name": "TupleS3StoreBackend", }, }, } desired_validation_operators = { "custom_action_list_operator": { "class_name": "ActionListValidationOperator", "action_list": [ { "name": "custom_store_validation_result", "action": { "class_name": "CustomStoreValidationResultAction" }, }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction" }, }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction" }, }, ], } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics. data_context_id, datasources={ **default_pandas_datasource_config, **default_spark_datasource_config, }, config_version=999.0, expectations_store_name="custom_expectations_store_name", validations_store_name="custom_validations_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", stores=desired_stores, validation_operators=desired_validation_operators, data_docs_sites=desired_data_docs_sites_config, plugins_directory="custom_plugins_directory", ) desired_config[ "config_variables_file_path"] = "custom_config_variables_file_path" data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_DatabaseStoreBackendDefaults_using_all_parameters( construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Make sure that DatabaseStoreBackendDefaults parameters are handled appropriately E.g. Make sure that default_credentials is ignored if individual store credentials are passed """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=DatabaseStoreBackendDefaults( default_credentials={ "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, expectations_store_credentials={ "drivername": "custom_expectations_store_drivername", "host": "custom_expectations_store_host", "port": "custom_expectations_store_port", "username": "******", "password": "******", "database": "custom_expectations_store_database", }, validations_store_credentials={ "drivername": "custom_validations_store_drivername", "host": "custom_validations_store_host", "port": "custom_validations_store_port", "username": "******", "password": "******", "database": "custom_validations_store_database", }, expectations_store_name="custom_expectations_database_store_name", validations_store_name="custom_validations_database_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", ), ) # Create desired config desired_stores_config = { "custom_evaluation_parameter_store_name": { "class_name": "EvaluationParameterStore" }, "custom_expectations_database_store_name": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "database": "custom_expectations_store_database", "drivername": "custom_expectations_store_drivername", "host": "custom_expectations_store_host", "password": "******", "port": "custom_expectations_store_port", "username": "******", }, }, }, "custom_validations_database_store_name": { "class_name": "ValidationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "database": "custom_validations_store_database", "drivername": "custom_validations_store_drivername", "host": "custom_validations_store_host", "password": "******", "port": "custom_validations_store_port", "username": "******", }, }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics. data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="custom_expectations_database_store_name", validations_store_name="custom_validations_database_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_DatabaseStoreBackendDefaults( construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Make sure that using DatabaseStoreBackendDefaults as the store_backend_defaults applies appropriate defaults, including default_credentials getting propagated to stores and not data_docs """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=DatabaseStoreBackendDefaults( default_credentials={ "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, ), ) # Create desired config desired_stores_config = { "evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, "expectations_database_store": { "class_name": "ExpectationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, }, }, "validations_database_store": { "class_name": "ValidationsStore", "store_backend": { "class_name": "DatabaseStoreBackend", "credentials": { "drivername": "postgresql", "host": "localhost", "port": "65432", "username": "******", "password": "******", "database": "ge_tutorials", }, }, }, } desired_data_docs_sites_config = { "local_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "base_directory": "uncommitted/data_docs/local_site/", "class_name": "TupleFilesystemStoreBackend", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics. data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_database_store", validations_store_name="validations_database_store", evaluation_parameter_store_name=DataContextConfigDefaults. DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_GCSStoreBackendDefaults_using_all_parameters( construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Make sure that GCSStoreBackendDefaults parameters are handled appropriately E.g. Make sure that default_bucket_name is ignored if individual bucket names are passed """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=GCSStoreBackendDefaults( default_bucket_name="custom_default_bucket_name", default_project_name="custom_default_project_name", expectations_store_bucket_name= "custom_expectations_store_bucket_name", validations_store_bucket_name= "custom_validations_store_bucket_name", data_docs_bucket_name="custom_data_docs_store_bucket_name", expectations_store_project_name= "custom_expectations_store_project_name", validations_store_project_name= "custom_validations_store_project_name", data_docs_project_name="custom_data_docs_store_project_name", expectations_store_prefix="custom_expectations_store_prefix", validations_store_prefix="custom_validations_store_prefix", data_docs_prefix="custom_data_docs_prefix", expectations_store_name="custom_expectations_GCS_store_name", validations_store_name="custom_validations_GCS_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", ), ) # Create desired config desired_stores_config = { "custom_evaluation_parameter_store_name": { "class_name": "EvaluationParameterStore" }, "custom_expectations_GCS_store_name": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "custom_expectations_store_bucket_name", "project": "custom_expectations_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_expectations_store_prefix", }, }, "custom_validations_GCS_store_name": { "class_name": "ValidationsStore", "store_backend": { "bucket": "custom_validations_store_bucket_name", "project": "custom_validations_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_validations_store_prefix", }, }, } desired_data_docs_sites_config = { "gcs_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "custom_data_docs_store_bucket_name", "project": "custom_data_docs_store_project_name", "class_name": "TupleGCSStoreBackend", "prefix": "custom_data_docs_prefix", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics. data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="custom_expectations_GCS_store_name", validations_store_name="custom_validations_GCS_store_name", evaluation_parameter_store_name= "custom_evaluation_parameter_store_name", stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_GCSStoreBackendDefaults( construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Make sure that using GCSStoreBackendDefaults as the store_backend_defaults applies appropriate defaults, including default_bucket_name & default_project_name getting propagated to all stores. """ data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", module_name="great_expectations.datasource", data_asset_type={ "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data/", } }, ) }, store_backend_defaults=GCSStoreBackendDefaults( default_bucket_name="my_default_bucket", default_project_name="my_default_project", ), ) # Create desired config data_context_id = data_context_config.anonymous_usage_statistics.data_context_id desired_stores_config = { "evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, "expectations_GCS_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "expectations", }, }, "validations_GCS_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "validations", }, }, } desired_data_docs_sites_config = { "gcs_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "my_default_bucket", "project": "my_default_project", "class_name": "TupleGCSStoreBackend", "prefix": "data_docs", }, } } desired_config = construct_data_context_config( data_context_id=data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_GCS_store", validations_store_name="validations_GCS_store", evaluation_parameter_store_name=DataContextConfigDefaults. DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_S3StoreBackendDefaults_and_simple_defaults_with_variable_sub( monkeypatch, construct_data_context_config, default_pandas_datasource_config): """ What does this test and why? Ensure that a very simple DataContextConfig setup with many defaults is created accurately and produces a valid DataContextConfig """ monkeypatch.setenv("SUBSTITUTED_BASE_DIRECTORY", "../data/") data_context_config = DataContextConfig( datasources={ "my_pandas_datasource": LegacyDatasourceConfig( class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "${SUBSTITUTED_BASE_DIRECTORY}", } }, ) }, store_backend_defaults=S3StoreBackendDefaults( default_bucket_name="my_default_bucket"), ) # Create desired config desired_stores_config = { "evaluation_parameter_store": { "class_name": "EvaluationParameterStore" }, "expectations_S3_store": { "class_name": "ExpectationsStore", "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "expectations", }, }, "validations_S3_store": { "class_name": "ValidationsStore", "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "validations", }, }, } desired_data_docs_sites_config = { "s3_site": { "class_name": "SiteBuilder", "show_how_to_buttons": True, "site_index_builder": { "class_name": "DefaultSiteIndexBuilder", "show_cta_footer": True, }, "store_backend": { "bucket": "my_default_bucket", "class_name": "TupleS3StoreBackend", "prefix": "data_docs", }, } } desired_config = construct_data_context_config( data_context_id=data_context_config.anonymous_usage_statistics. data_context_id, datasources=default_pandas_datasource_config, expectations_store_name="expectations_S3_store", validations_store_name="validations_S3_store", evaluation_parameter_store_name=DataContextConfigDefaults. DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value, stores=desired_stores_config, data_docs_sites=desired_data_docs_sites_config, ) desired_config["datasources"]["my_pandas_datasource"][ "batch_kwargs_generators"]["subdir_reader"][ "base_directory"] = "${SUBSTITUTED_BASE_DIRECTORY}" data_context_config_schema = DataContextConfigSchema() assert data_context_config_schema.dump( data_context_config) == desired_config assert DataContext.validate_config(project_config=data_context_config) data_context = BaseDataContext(project_config=data_context_config) assert (data_context.datasources["my_pandas_datasource"]. get_batch_kwargs_generator( "subdir_reader")._base_directory == "../data/")