Exemple #1
0
    def _ge_context(self) -> Iterator[GEContext]:
        with self.base_engine.connect() as conn:
            data_context = BaseDataContext(project_config=DataContextConfig(
                # The datasource will be added via add_datasource().
                datasources={},
                store_backend_defaults=InMemoryStoreBackendDefaults(),
                anonymous_usage_statistics={
                    "enabled": False,
                    # "data_context_id": <not set>,
                },
            ))

            datasource_name = f"{self._datasource_name_base}-{uuid.uuid4()}"
            datasource_config = DatasourceConfig(
                class_name="SqlAlchemyDatasource",
                credentials={
                    # This isn't actually used since we pass the connection directly,
                    # but GE parses it to change some of its behavior so it's useful
                    # to emulate that here.
                    "url": conn.engine.url,
                },
            )
            with _inject_connection_into_datasource(conn):
                # Using the add_datasource method ensures that the datasource is added to
                # GE-internal cache, which avoids problems when calling GE methods later on.
                assert data_context.add_datasource(
                    datasource_name,
                    initialize=True,
                    **dict(datasourceConfigSchema.dump(datasource_config)),
                )
            assert data_context.get_datasource(datasource_name)

            yield GEContext(data_context, datasource_name)
def test_DataContextConfig_with_BaseStoreBackendDefaults_and_simple_defaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup with many defaults is created accurately
    and produces a valid DataContextConfig
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=BaseStoreBackendDefaults(),
    )

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)
    def __init__(self, conn, report):
        self.conn = conn
        self.report = report

        data_context_config = DataContextConfig(
            datasources={
                self.datasource_name:
                DatasourceConfig(
                    class_name="SqlAlchemyDatasource",
                    credentials={
                        # This isn't actually used since we pass the connection directly,
                        # but GE parses it to change some of its behavior so it's useful
                        # to emulate that here.
                        "url": self.conn.engine.url,
                    },
                )
            },
            store_backend_defaults=InMemoryStoreBackendDefaults(),
            anonymous_usage_statistics={
                "enabled": False,
                # "data_context_id": <not set>,
            },
        )

        with _properly_init_datasource(self.conn):
            self.data_context = BaseDataContext(
                project_config=data_context_config)
def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults_no_root_directory(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately
    This test does not set the optional root_directory parameter
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=FilesystemStoreBackendDefaults(),
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_config = construct_data_context_config(
        data_context_id=data_context_id, datasources=default_pandas_datasource_config
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)
    def _upgrade_configuration_automatically(self):
        if not self.upgrade_log["skipped_checkpoint_store_upgrade"]:
            config_commented_map: CommentedMap = (
                self.data_context.get_config().commented_map)
            for key, config in self.upgrade_checklist["automatic"][
                    "stores"].items():
                config_commented_map["stores"][key] = config

            for key, value in self.upgrade_checklist["automatic"][
                    "store_names"].items():
                config_commented_map[key] = value

            data_context_config: DataContextConfig = (
                DataContextConfig.from_commented_map(
                    commented_map=config_commented_map))
            self.data_context.set_config(project_config=data_context_config)
            self.data_context._save_project_config()

            checkpoint_log_entry = {
                "stores": {
                    DataContextConfigDefaults.DEFAULT_CHECKPOINT_STORE_NAME.value:
                    data_context_config.stores[
                        DataContextConfigDefaults.
                        DEFAULT_CHECKPOINT_STORE_NAME.value],
                },
                "checkpoint_store_name":
                data_context_config.checkpoint_store_name,
            }
            self.upgrade_log["added_checkpoint_store"].update(
                checkpoint_log_entry)
def test_substituted_config_variables_not_written_to_file(tmp_path_factory):
    # this test uses a great_expectations.yml with almost all values replaced
    # with substitution variables

    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    asset_config_path = os.path.join(context_path, "expectations")

    create_data_context_files(
        context_path,
        asset_config_path,
        ge_config_fixture_filename=
        "great_expectations_basic_with_exhaustive_variables.yml",
        config_variables_fixture_filename="config_variables_exhaustive.yml",
    )

    # load ge config fixture for expected
    path_to_yml = (
        "../test_fixtures/great_expectations_basic_with_exhaustive_variables.yml"
    )
    path_to_yml = file_relative_path(__file__, path_to_yml)
    with open(path_to_yml) as data:
        config_dict = yaml.load(data)
    expected_config = DataContextConfig.from_commented_map(config_dict)
    expected_config_dict = dataContextConfigSchema.dump(expected_config)
    expected_config_dict.pop("anonymous_usage_statistics")

    # instantiate data_context twice to go through cycle of loading config from file then saving
    context = ge.data_context.DataContext(context_path)
    context._save_project_config()
    context_config_dict = dataContextConfigSchema.dump(
        ge.data_context.DataContext(context_path)._project_config)
    context_config_dict.pop("anonymous_usage_statistics")

    assert context_config_dict == expected_config_dict
    def _retrieve_data_context_config_from_ge_cloud(self) -> DataContextConfig:
        """
        Utilizes the GeCloudConfig instantiated in the constructor to create a request to the Cloud API.
        Given proper authorization, the request retrieves a data context config that is pre-populated with
        GE objects specific to the user's Cloud environment (datasources, data connectors, etc).

        Please note that substitution for ${VAR} variables is performed in GE Cloud before being sent
        over the wire.

        :return: the configuration object retrieved from the Cloud API
        """
        ge_cloud_url = (
            self.ge_cloud_config.base_url +
            f"/organizations/{self.ge_cloud_config.organization_id}/data-context-configuration"
        )
        auth_headers = {
            "Content-Type": "application/vnd.api+json",
            "Authorization": f"Bearer {self.ge_cloud_config.access_token}",
        }

        response = requests.get(ge_cloud_url, headers=auth_headers)
        if response.status_code != 200:
            raise ge_exceptions.GeCloudError(
                f"Bad request made to GE Cloud; {response.text}")
        config = response.json()
        return DataContextConfig(**config)
def basic_data_context_config():
    return DataContextConfig(
        **{
            "commented_map": {},
            "config_version": 1,
            "plugins_directory": "plugins/",
            "evaluation_parameter_store_name": "evaluation_parameter_store",
            "validations_store_name": "does_not_have_to_be_real",
            "expectations_store_name": "expectations_store",
            "config_variables_file_path": "uncommitted/config_variables.yml",
            "datasources": {},
            "stores": {
                "expectations_store": {
                    "class_name": "ExpectationsStore",
                    "store_backend": {
                        "class_name": "TupleFilesystemStoreBackend",
                        "base_directory": "expectations/",
                    },
                },
                "evaluation_parameter_store": {
                    "module_name": "great_expectations.data_context.store",
                    "class_name": "EvaluationParameterStore",
                }
            },
            "data_docs_sites": {},
            "validation_operators": {
                "default": {
                    "class_name": "ActionListValidationOperator",
                    "action_list": []
                }
            }
        })
def in_memory_data_context_config_usage_stats_enabled():
    return DataContextConfig(
        **{
            "commented_map": {},
            "config_version": 1,
            "plugins_directory": None,
            "evaluation_parameter_store_name": "evaluation_parameter_store",
            "validations_store_name": "validations_store",
            "expectations_store_name": "expectations_store",
            "config_variables_file_path": None,
            "datasources": {},
            "stores": {
                "expectations_store": {
                    "class_name": "ExpectationsStore",
                },
                "validations_store": {
                    "class_name": "ValidationsStore",
                },
                "evaluation_parameter_store": {
                    "class_name": "EvaluationParameterStore",
                },
            },
            "data_docs_sites": {},
            "validation_operators": {
                "default": {
                    "class_name": "ActionListValidationOperator",
                    "action_list": []
                }
            },
            "anonymous_usage_statistics": {
                "enabled": True,
                "data_context_id": "6a52bdfa-e182-455b-a825-e69f076e67d6",
                "usage_statistics_url": USAGE_STATISTICS_QA_URL
            }
        })
Exemple #10
0
    def create_data_context_config(self):
        # Get the credentials information for the BigQuery data source from the BigQuery Airflow connection
        conn = BaseHook.get_connection(self.bigquery_conn_id)
        connection_json = conn.extra_dejson
        credentials_path = connection_json[
            'extra__google_cloud_platform__key_path']
        data_context_config = DataContextConfig(
            config_version=2,
            datasources={
                "bq_datasource": {
                    "credentials": {
                        "url":
                        "bigquery://" + self.gcp_project + "/" +
                        self.bq_dataset_name + "?credentials_path=" +
                        credentials_path
                    },
                    "class_name": "SqlAlchemyDatasource",
                    "module_name": "great_expectations.datasource",
                    "data_asset_type": {
                        "module_name": "great_expectations.dataset",
                        "class_name": "SqlAlchemyDataset"
                    }
                }
            },
            store_backend_defaults=GCSStoreBackendDefaults(
                default_bucket_name=self.gcs_bucket,
                default_project_name=self.gcp_project,
                validations_store_prefix=self.gcs_validations_prefix,
                expectations_store_prefix=self.gcs_expectations_prefix,
                data_docs_prefix=self.gcs_datadocs_prefix,
            ),
        )

        return data_context_config
def basic_data_context_config_for_validation_operator():
    return DataContextConfig(
        config_version=1,
        plugins_directory=None,
        evaluation_parameter_store_name="evaluation_parameter_store",
        expectations_store_name="expectations_store",
        datasources={},
        stores={
            "expectations_store": {
                "class_name": "ExpectationsStore"
            },
            "evaluation_parameter_store": {
                "class_name": "EvaluationParameterStore"
            },
            "validation_result_store": {
                "class_name": "ValidationsStore"
            },
            "metrics_store": {
                "class_name": "MetricStore"
            }
        },
        validations_store_name="validation_result_store",
        data_docs_sites={},
        validation_operators={
            "store_val_res_and_extract_eval_params": {
                "class_name":
                "ActionListValidationOperator",
                "action_list": [{
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                        "target_store_name": "validation_result_store",
                    }
                }, {
                    "name": "extract_and_store_eval_parameters",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction",
                        "target_store_name": "evaluation_parameter_store",
                    }
                }]
            },
            "errors_and_warnings_validation_operator": {
                "class_name":
                "WarningAndFailureExpectationSuitesValidationOperator",
                "action_list": [{
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                        "target_store_name": "validation_result_store",
                    }
                }, {
                    "name": "extract_and_store_eval_parameters",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction",
                        "target_store_name": "evaluation_parameter_store",
                    }
                }]
            }
        })
def test_data_context_concurrency_property():
    data_context = BaseDataContext(
        project_config=DataContextConfig(
            concurrency=ConcurrencyConfig(enabled=True),
            store_backend_defaults=InMemoryStoreBackendDefaults(),
        )
    )
    assert data_context.concurrency.enabled
def test_in_memory_data_context_configuration(
    titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled,
):
    project_config_dict: dict = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled.get_config(
        mode=ConfigOutputModes.DICT)
    project_config_dict["plugins_directory"] = None
    project_config_dict["validation_operators"] = {
        "action_list_operator": {
            "class_name":
            "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction"
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction"
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": {
                        "class_name": "UpdateDataDocsAction"
                    },
                },
            ],
        }
    }

    # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
    project_config_dict = dataContextConfigSchema.dump(project_config_dict)
    project_config_dict = dataContextConfigSchema.load(project_config_dict)

    project_config: DataContextConfig = DataContextConfig(
        **project_config_dict)
    data_context = BaseDataContext(
        project_config=project_config,
        context_root_dir=
        titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled
        .root_directory,
    )

    my_validator: Validator = data_context.get_validator(
        datasource_name="my_datasource",
        data_connector_name="my_basic_data_connector",
        data_asset_name="Titanic_1912",
        create_expectation_suite_with_name="my_test_titanic_expectation_suite",
    )

    assert my_validator.expect_table_row_count_to_equal(1313)["success"]
    assert my_validator.expect_table_column_count_to_equal(7)["success"]
Exemple #14
0
def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately
    This test sets the root_dir parameter
    """

    test_root_directory = "test_root_dir"

    store_backend_defaults = FilesystemStoreBackendDefaults(
        root_directory=test_root_directory
    )
    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=store_backend_defaults,
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_config = construct_data_context_config(
        data_context_id=data_context_id, datasources=default_pandas_datasource_config
    )
    # Add root_directory to stores and data_docs
    desired_config["stores"][desired_config["expectations_store_name"]][
        "store_backend"
    ]["root_directory"] = test_root_directory
    desired_config["stores"][desired_config["validations_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["stores"][desired_config["checkpoint_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["data_docs_sites"]["local_site"]["store_backend"][
        "root_directory"
    ] = test_root_directory

    data_context_config_schema = DataContextConfigSchema()
    assert filter_properties_dict(
        properties=data_context_config_schema.dump(data_context_config)
    ) == filter_properties_dict(properties=desired_config)
    assert DataContext.validate_config(project_config=data_context_config)
Exemple #15
0
def test_in_memory_data_context_configuration(
    titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store,
):
    project_config_dict: dict = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store.get_config(
        mode="dict")
    project_config_dict["plugins_directory"] = None
    project_config_dict["validation_operators"] = {
        "action_list_operator": {
            "class_name":
            "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction"
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction"
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": {
                        "class_name": "UpdateDataDocsAction"
                    },
                },
            ],
        }
    }
    project_config: DataContextConfig = DataContextConfig(
        **project_config_dict)
    data_context = BaseDataContext(
        project_config=project_config,
        context_root_dir=
        titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store
        .root_directory,
    )

    my_validator: Validator = data_context.get_validator(
        datasource_name="my_datasource",
        data_connector_name="my_basic_data_connector",
        data_asset_name="Titanic_1912",
        create_expectation_suite_with_name="my_test_titanic_expectation_suite",
    )

    assert my_validator.expect_table_row_count_to_equal(1313)["success"]
    assert my_validator.expect_table_column_count_to_equal(7)["success"]
Exemple #16
0
    def validate_with_great_expectations(
        self,
        dataframe: TypeVar("pyspark.sql.DataFrame"),  # noqa: F821
        expectation_suite: TypeVar("ge.core.ExpectationSuite"),  # noqa: F821
        ge_validate_kwargs: Optional[dict],
    ):
        # NOTE: InMemoryStoreBackendDefaults SHOULD NOT BE USED in normal settings. You
        # may experience data loss as it persists nothing. It is used here for testing.
        # Please refer to docs to learn how to instantiate your DataContext.
        store_backend_defaults = InMemoryStoreBackendDefaults()
        data_context_config = DataContextConfig(
            store_backend_defaults=store_backend_defaults,
            checkpoint_store_name=store_backend_defaults.checkpoint_store_name,
        )
        context = BaseDataContext(project_config=data_context_config)

        datasource = {
            "name": "my_spark_dataframe",
            "class_name": "Datasource",
            "execution_engine": {
                "class_name": "SparkDFExecutionEngine",
                "force_reuse_spark_context": True,
            },
            "data_connectors": {
                "default_runtime_data_connector_name": {
                    "class_name": "RuntimeDataConnector",
                    "batch_identifiers": ["batch_id"],
                }
            },
        }
        context.add_datasource(**datasource)

        # Here is a RuntimeBatchRequest using a dataframe
        batch_request = RuntimeBatchRequest(
            datasource_name="my_spark_dataframe",
            data_connector_name="default_runtime_data_connector_name",
            data_asset_name=
            "<YOUR_MEANGINGFUL_NAME>",  # This can be anything that identifies this data_asset for you
            batch_identifiers={"batch_id": "default_identifier"},
            runtime_parameters={"batch_data":
                                dataframe},  # Your dataframe goes here
        )
        context.save_expectation_suite(expectation_suite)
        validator = context.get_validator(
            batch_request=batch_request,
            expectation_suite_name=expectation_suite.expectation_suite_name,
        )
        report = validator.validate(**ge_validate_kwargs)

        return report
Exemple #17
0
    def  build_context(self):
        """
            Purpose:
                Create a dataContext and datasource and add to object 
            Returns:
                saves dataContext and datasource to self
        """
        self.context=ge.get_context()

        #create datasource configuration
        datasource_config = {
            "name": "example_datasource",
            "class_name": "Datasource",
            "module_name": "great_expectations.datasource",
            "execution_engine": {
                "module_name": "great_expectations.execution_engine",
                "class_name": "PandasExecutionEngine",
            },
            "data_connectors": {
                "default_runtime_data_connector_name": {
                    "class_name": "RuntimeDataConnector",
                    "batch_identifiers": ["default_identifier_name"],
                },
            },
        }

        #create data context configuration
        data_context_config = DataContextConfig(
            datasources={
                "pandas": DatasourceConfig(
                    class_name="Datasource",
                    execution_engine={
                        "class_name": "PandasExecutionEngine"
                    },
                    data_connectors={
                        "default_runtime_data_connector_name": {
                            "class_name": "RuntimeDataConnector",
                            "batch_identifiers": ["default_identifier_name"],
                        }
                    },
                )
            },
            store_backend_defaults=FilesystemStoreBackendDefaults(root_directory=os.path.join(os.getcwd(),'great_expectations')),
        )

        #build context and add data source
        self.context=BaseDataContext(project_config=data_context_config)
        #self.context.test_yaml_config(yaml.dump(datasource_config))
        self.context.add_datasource(**datasource_config)
def build_in_memory_runtime_context():
    data_context_config: DataContextConfig = DataContextConfig(
        datasources={
            "pandas_datasource": {
                "execution_engine": {
                    "class_name": "PandasExecutionEngine",
                    "module_name": "great_expectations.execution_engine",
                },
                "class_name": "Datasource",
                "module_name": "great_expectations.datasource",
                "data_connectors": {
                    "runtime_data_connector": {
                        "class_name": "RuntimeDataConnector",
                        "batch_identifiers": [
                            "id_key_0",
                            "id_key_1",
                        ],
                    }
                },
            },
            "spark_datasource": {
                "execution_engine": {
                    "class_name": "SparkDFExecutionEngine",
                    "module_name": "great_expectations.execution_engine",
                },
                "class_name": "Datasource",
                "module_name": "great_expectations.datasource",
                "data_connectors": {
                    "runtime_data_connector": {
                        "class_name": "RuntimeDataConnector",
                        "batch_identifiers": [
                            "id_key_0",
                            "id_key_1",
                        ],
                    }
                },
            },
        },
        expectations_store_name="expectations_store",
        validations_store_name="validations_store",
        evaluation_parameter_store_name="evaluation_parameter_store",
        checkpoint_store_name="checkpoint_store",
        store_backend_defaults=InMemoryStoreBackendDefaults(),
    )

    context: BaseDataContext = BaseDataContext(
        project_config=data_context_config)

    return context
Exemple #19
0
def test_data_context_variables_get_with_substitutions(
    data_context_config_dict: dict, ) -> None:
    env_var_name: str = "MY_CONFIG_VERSION"
    value_associated_with_env_var: float = 7.0

    data_context_config_dict[
        DataContextVariableSchema.CONFIG_VERSION] = f"${env_var_name}"
    config: DataContextConfig = DataContextConfig(**data_context_config_dict)
    substitutions: dict = {
        env_var_name: value_associated_with_env_var,
    }

    variables: DataContextVariables = EphemeralDataContextVariables(
        config=config, substitutions=substitutions)
    assert variables.config_version == value_associated_with_env_var
 def _upgrade_configuration(self):
     if self.upgrade_log["skipped_upgrade"]:
         return
     config_commented_map: CommentedMap = (
         self.data_context.get_config().commented_map)
     for name, value in self.upgrade_checklist.items():
         if isinstance(value, dict):
             for key, config in value.items():
                 config_commented_map[name][key] = config
         else:
             config_commented_map[name] = value
     data_context_config: DataContextConfig = DataContextConfig.from_commented_map(
         commented_map=config_commented_map)
     self.data_context.set_config(project_config=data_context_config)
     # noinspection PyProtectedMember
     self.data_context._save_project_config()
     self._update_upgrade_log()
Exemple #21
0
    def _get_ge_context_local(ge_project_path: str) -> BaseDataContext:
        """
        This is configured to work with an in-memory pandas DataFrame.
        This setup allows us to run validations before (perhaps unnecessarily) writing any data
        to disk, as well as at any other stage.

        Currently using local storage.

        Args:
        ge_project_path (str): The path to the Great Expectations project,
        eg. `/home/viadot/my_flow`. Expectation suites need to be placed inside the
        `expectations` folder, eg. `/home/viadot/my_flow/expectations/failure.json`.

        Returns:
            BaseDataContext: The GE context (ie. config) required to run the validations.
        """
        data_context_config = DataContextConfig(
            datasources={
                "pandas": DatasourceConfig(
                    class_name="PandasDatasource",
                    batch_kwargs_generators={},  # override the CSV default
                )
            },
            store_backend_defaults=FilesystemStoreBackendDefaults(ge_project_path),
            validation_operators={
                "action_list_operator": {
                    "class_name": "ActionListValidationOperator",
                    "action_list": [
                        {
                            "name": "store_validation_result",
                            "action": {"class_name": "StoreValidationResultAction"},
                        },
                        {
                            "name": "store_evaluation_params",
                            "action": {"class_name": "StoreEvaluationParametersAction"},
                        },
                        {
                            "name": "update_data_docs",
                            "action": {"class_name": "UpdateDataDocsAction"},
                        },
                    ],
                }
            },
        )
        context = BaseDataContext(project_config=data_context_config)
        return context
    def _set(self, key: Tuple[str, ...], value: Any, **kwargs: dict) -> None:
        (
            resource_type,
            resource_name,
        ) = InlineStoreBackend._determine_resource_type_and_name_from_key(key)

        project_config: DataContextConfig = self._data_context.config

        if resource_type is DataContextVariableSchema.ALL_VARIABLES:
            config_commented_map_from_yaml = yaml.load(value)
            value = DataContextConfig.from_commented_map(
                commented_map=config_commented_map_from_yaml)
            self._data_context.set_config(value)
        elif resource_name is not None:
            project_config[resource_type][resource_name] = value
        else:
            project_config[resource_type] = value

        self._save_changes()
Exemple #23
0
def test_file_data_context_variables_e2e(
        monkeypatch, file_data_context: FileDataContext,
        progress_bars: ProgressBarsConfig) -> None:
    """
    What does this test do and why?

    Tests the E2E workflow with a FileDataContextVariables instance.
      1. User updates certain values and sets them as attributes.
      2. User persists changes utilizing the save_config call defined by the Variables API.
      3. Upon reading the result config from disk, we can confirm that changes were appropriately persisted.

    It is also important to note that in the case of $VARS syntax, we NEVER want to persist the underlying
    value in order to preserve sensitive information.
    """
    # Prepare updated progress bars to set and serialize to disk
    updated_progress_bars: ProgressBarsConfig = copy.deepcopy(progress_bars)
    updated_progress_bars.globally = False
    updated_progress_bars.profilers = True

    # Prepare updated plugins directory to set and serialize to disk (ensuring we hide the true value behind $VARS syntax)
    env_var_name: str = "MY_PLUGINS_DIRECTORY"
    value_associated_with_env_var: str = "foo/bar/baz"
    monkeypatch.setenv(env_var_name, value_associated_with_env_var)

    # Set attributes defined above
    file_data_context.variables.progress_bars = updated_progress_bars
    file_data_context.variables.plugins_directory = f"${env_var_name}"
    file_data_context.variables.save_config()

    # Review great_expectations.yml where values were written and confirm changes
    config_filepath = pathlib.Path(file_data_context.root_directory).joinpath(
        file_data_context.GE_YML)

    with open(config_filepath) as f:
        contents: dict = yaml.load(f)
        config_saved_to_disk: DataContextConfig = DataContextConfig(**contents)

    assert config_saved_to_disk.progress_bars == updated_progress_bars.to_dict(
    )
    assert (file_data_context.variables.plugins_directory ==
            value_associated_with_env_var)
    assert config_saved_to_disk.plugins_directory == f"${env_var_name}"
    def _load_project_config(self):
        """
        Reads the project configuration from the project configuration file.
        The file may contain ${SOME_VARIABLE} variables - see self.project_config_with_variables_substituted
        for how these are substituted.

        For Data Contexts in GE Cloud mode, a user-specific template is retrieved from the Cloud API
        - see self._retrieve_data_context_config_from_ge_cloud for more details.

        :return: the configuration object read from the file or template
        """
        if self.ge_cloud_mode:
            config = self._retrieve_data_context_config_from_ge_cloud()
            return config

        path_to_yml = os.path.join(self._context_root_directory, self.GE_YML)
        try:
            with open(path_to_yml) as data:
                config_commented_map_from_yaml = yaml.load(data)

        except YAMLError as err:
            raise ge_exceptions.InvalidConfigurationYamlError(
                "Your configuration file is not a valid yml file likely due to a yml syntax error:\n\n{}".format(
                    err
                )
            )
        except DuplicateKeyError:
            raise ge_exceptions.InvalidConfigurationYamlError(
                "Error: duplicate key found in project YAML file."
            )
        except OSError:
            raise ge_exceptions.ConfigNotFoundError()

        try:
            return DataContextConfig.from_commented_map(
                commented_map=config_commented_map_from_yaml
            )
        except ge_exceptions.InvalidDataContextConfigError:
            # Just to be explicit about what we intended to catch
            raise
def basic_data_context_v013_config():
    return DataContextConfig(
        **{
            "commented_map": {},
            "config_version": 3,
            "plugins_directory": "plugins/",
            "evaluation_parameter_store_name": "evaluation_parameter_store",
            "validations_store_name": "does_not_have_to_be_real",
            "expectations_store_name": "expectations_store",
            "checkpoint_store_name": "checkpoint_store",
            "config_variables_file_path": "uncommitted/config_variables.yml",
            "datasources": {},
            "stores": {
                "expectations_store": {
                    "class_name": "ExpectationsStore",
                    "store_backend": {
                        "class_name": "TupleFilesystemStoreBackend",
                        "base_directory": "expectations/",
                    },
                },
                "evaluation_parameter_store": {
                    "module_name": "great_expectations.data_context.store",
                    "class_name": "EvaluationParameterStore",
                },
                "checkpoint_store": {
                    "class_name": "CheckpointStore",
                    "store_backend": {
                        "class_name": "TupleFilesystemStoreBackend",
                        "base_directory": "checkpoints/",
                    },
                },
            },
            "data_docs_sites": {},
            "anonymous_usage_statistics": {
                "enabled": True,
                "data_context_id": "6a52bdfa-e182-455b-a825-e69f076e67d6",
                "usage_statistics_url": USAGE_STATISTICS_QA_URL,
            },
        }
    )
def in_memory_data_context_config_usage_stats_enabled():

    return DataContextConfig(
        **{
            "commented_map": {},
            "config_version": 3,
            "plugins_directory": None,
            "evaluation_parameter_store_name": "evaluation_parameter_store",
            "validations_store_name": "validations_store",
            "expectations_store_name": "expectations_store",
            "checkpoint_store_name": "checkpoints_store",
            "config_variables_file_path": None,
            "datasources": {},
            "stores": {
                "expectations_store": {
                    "class_name": "ExpectationsStore",
                },
                "validations_store": {
                    "class_name": "ValidationsStore",
                },
                "checkpoints_store": {
                    "class_name": "CheckpointStore",
                },
                "evaluation_parameter_store": {
                    "class_name": "EvaluationParameterStore",
                },
            },
            "data_docs_sites": {},
            "validation_operators": {
                "default": {
                    "class_name": "ActionListValidationOperator",
                    "action_list": [],
                }
            },
            "anonymous_usage_statistics": {
                "enabled": True,
                "data_context_id": DATA_CONTEXT_ID,
                "usage_statistics_url": USAGE_STATISTICS_URL,
            },
        })
Exemple #27
0
def basic_in_memory_data_context_config_just_stores():
    return DataContextConfig(
        config_version=3.0,
        plugins_directory=None,
        evaluation_parameter_store_name="evaluation_parameter_store",
        expectations_store_name="expectations_store",
        datasources={},
        stores={
            "expectations_store": {
                "class_name": "ExpectationsStore"
            },
            "evaluation_parameter_store": {
                "class_name": "EvaluationParameterStore"
            },
            "validation_result_store": {
                "class_name": "ValidationsStore"
            },
        },
        validations_store_name="validation_result_store",
        data_docs_sites={},
        validation_operators={},
    )
    def get_config_with_variables_substituted(
            self,
            config: Optional[DataContextConfig] = None) -> DataContextConfig:
        """
        Substitute vars in config of form ${var} or $(var) with values found in the following places,
        in order of precedence: ge_cloud_config (for Data Contexts in GE Cloud mode), runtime_environment,
        environment variables, config_variables, or ge_cloud_config_variable_defaults (allows certain variables to
        be optional in GE Cloud mode).
        """
        if not config:
            config = self.config

        substitutions: dict = self._determine_substitutions()

        ge_cloud_config_variable_defaults = {
            "plugins_directory":
            self._normalize_absolute_or_relative_path(
                path=DataContextConfigDefaults.DEFAULT_PLUGINS_DIRECTORY.value
            ),
            "usage_statistics_url":
            DEFAULT_USAGE_STATISTICS_URL,
        }
        for config_variable, value in ge_cloud_config_variable_defaults.items(
        ):
            if substitutions.get(config_variable) is None:
                logger.info(
                    f'Config variable "{config_variable}" was not found in environment or global config ('
                    f'{self.GLOBAL_CONFIG_PATHS}). Using default value "{value}" instead. If you would '
                    f"like to "
                    f"use a different value, please specify it in an environment variable or in a "
                    f"great_expectations.conf file located at one of the above paths, in a section named "
                    f'"ge_cloud_config".')
                substitutions[config_variable] = value

        return DataContextConfig(**substitute_all_config_variables(
            config, substitutions, self.DOLLAR_SIGN_ESCAPE_STRING))
def test_override_general_defaults(
    construct_data_context_config,
    default_pandas_datasource_config,
    default_spark_datasource_config,
):
    """
    What does this test and why?
    A DataContextConfig should be able to be created by passing items into the constructor that override any defaults.
    It should also be able to handle multiple datasources, even if they are configured with a dictionary or a DatasourceConfig.
    """

    data_context_config = DataContextConfig(
        config_version=999,
        plugins_directory="custom_plugins_directory",
        config_variables_file_path="custom_config_variables_file_path",
        datasources={
            "my_spark_datasource": {
                "data_asset_type": {
                    "class_name": "SparkDFDataset",
                    "module_name": "great_expectations.dataset",
                },
                "class_name": "SparkDFDatasource",
                "module_name": "great_expectations.datasource",
                "batch_kwargs_generators": {},
            },
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            ),
        },
        stores={
            "expectations_S3_store": {
                "class_name": "ExpectationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "expectations_S3_store2": {
                "class_name": "ExpectationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "validations_S3_store": {
                "class_name": "ValidationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "validations_S3_store2": {
                "class_name": "ValidationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "custom_evaluation_parameter_store": {
                "class_name": "EvaluationParameterStore"
            },
        },
        expectations_store_name="custom_expectations_store_name",
        validations_store_name="custom_validations_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        data_docs_sites={
            "s3_site": {
                "class_name": "SiteBuilder",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                },
                "site_index_builder": {
                    "class_name": "DefaultSiteIndexBuilder",
                    "show_cta_footer": True,
                },
            },
            "local_site": {
                "class_name": "SiteBuilder",
                "show_how_to_buttons": True,
                "site_index_builder": {
                    "class_name": "DefaultSiteIndexBuilder",
                    "show_cta_footer": True,
                },
                "store_backend": {
                    "base_directory": "uncommitted/data_docs/local_site/",
                    "class_name": "TupleFilesystemStoreBackend",
                },
            },
        },
        validation_operators={
            "custom_action_list_operator": {
                "class_name": "ActionListValidationOperator",
                "action_list": [
                    {
                        "name": "custom_store_validation_result",
                        "action": {"class_name": "CustomStoreValidationResultAction"},
                    },
                    {
                        "name": "store_evaluation_params",
                        "action": {"class_name": "StoreEvaluationParametersAction"},
                    },
                    {
                        "name": "update_data_docs",
                        "action": {"class_name": "UpdateDataDocsAction"},
                    },
                ],
            }
        },
        anonymous_usage_statistics={"enabled": True},
    )

    desired_stores = {
        "custom_evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
        "expectations_S3_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "expectations_S3_store2": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "validations_S3_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "validations_S3_store2": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
    }

    desired_data_docs_sites_config = {
        "local_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "base_directory": "uncommitted/data_docs/local_site/",
                "class_name": "TupleFilesystemStoreBackend",
            },
        },
        "s3_site": {
            "class_name": "SiteBuilder",
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
            },
        },
    }
    desired_validation_operators = {
        "custom_action_list_operator": {
            "class_name": "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "custom_store_validation_result",
                    "action": {"class_name": "CustomStoreValidationResultAction"},
                },
                {
                    "name": "store_evaluation_params",
                    "action": {"class_name": "StoreEvaluationParametersAction"},
                },
                {
                    "name": "update_data_docs",
                    "action": {"class_name": "UpdateDataDocsAction"},
                },
            ],
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources={
            **default_pandas_datasource_config,
            **default_spark_datasource_config,
        },
        config_version=999.0,
        expectations_store_name="custom_expectations_store_name",
        validations_store_name="custom_validations_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        stores=desired_stores,
        validation_operators=desired_validation_operators,
        data_docs_sites=desired_data_docs_sites_config,
        plugins_directory="custom_plugins_directory",
    )
    desired_config["config_variables_file_path"] = "custom_config_variables_file_path"

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)
def test_DataContextConfig_with_DatabaseStoreBackendDefaults_using_all_parameters(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Make sure that DatabaseStoreBackendDefaults parameters are handled appropriately
    E.g. Make sure that default_credentials is ignored if individual store credentials are passed
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                module_name="great_expectations.datasource",
                data_asset_type={
                    "module_name": "great_expectations.dataset",
                    "class_name": "PandasDataset",
                },
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=DatabaseStoreBackendDefaults(
            default_credentials={
                "drivername": "postgresql",
                "host": "localhost",
                "port": "65432",
                "username": "******",
                "password": "******",
                "database": "ge_tutorials",
            },
            expectations_store_credentials={
                "drivername": "custom_expectations_store_drivername",
                "host": "custom_expectations_store_host",
                "port": "custom_expectations_store_port",
                "username": "******",
                "password": "******",
                "database": "custom_expectations_store_database",
            },
            validations_store_credentials={
                "drivername": "custom_validations_store_drivername",
                "host": "custom_validations_store_host",
                "port": "custom_validations_store_port",
                "username": "******",
                "password": "******",
                "database": "custom_validations_store_database",
            },
            expectations_store_name="custom_expectations_database_store_name",
            validations_store_name="custom_validations_database_store_name",
            evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        ),
    )

    # Create desired config
    desired_stores_config = {
        "custom_evaluation_parameter_store_name": {
            "class_name": "EvaluationParameterStore"
        },
        "custom_expectations_database_store_name": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "database": "custom_expectations_store_database",
                    "drivername": "custom_expectations_store_drivername",
                    "host": "custom_expectations_store_host",
                    "password": "******",
                    "port": "custom_expectations_store_port",
                    "username": "******",
                },
            },
        },
        "custom_validations_database_store_name": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "database": "custom_validations_store_database",
                    "drivername": "custom_validations_store_drivername",
                    "host": "custom_validations_store_host",
                    "password": "******",
                    "port": "custom_validations_store_port",
                    "username": "******",
                },
            },
        },
    }
    desired_data_docs_sites_config = {
        "local_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "base_directory": "uncommitted/data_docs/local_site/",
                "class_name": "TupleFilesystemStoreBackend",
            },
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="custom_expectations_database_store_name",
        validations_store_name="custom_validations_database_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)