Python ExpectationSuite 예제들, great_expectations.core.expectation_suite.ExpectationSuite Python 예제들

예제 #1

0

파일 보기

def test_sqlalchemy_datasource_processes_dataset_options(
        test_db_connection_string):
    datasource = SqlAlchemyDatasource(
        "SqlAlchemy", credentials={"url": test_db_connection_string})
    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": False})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(batch,
                                ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": True})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(batch,
                                ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is True

    batch_kwargs = {
        "query": "select * from table_1;",
        "dataset_options": {
            "caching": False
        },
    }
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(batch,
                                ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

예제 #2

0

파일 보기

    def _profile(self,
                 schema: Dict,
                 suite_name: str = None) -> ExpectationSuite:
        if not suite_name:
            raise ValueError(
                "Please provide a suite name when using this profiler.")
        expectations = []
        # TODO add recursion to allow creation of suites for nested schema files
        if schema["type"] == JsonSchemaTypes.OBJECT.value:
            for key, details in schema["properties"].items():
                expectations.append(
                    self._create_existence_expectation(key, details))

                type_expectation = self._create_type_expectation(key, details)
                if type_expectation:
                    expectations.append(type_expectation)

                range_expectation = self._create_range_expectation(
                    key, details)
                if range_expectation:
                    expectations.append(range_expectation)

                boolean_expectation = self._create_boolean_expectation(
                    key, details)
                if boolean_expectation:
                    expectations.append(boolean_expectation)

                set_expectation = self._create_set_expectation(key, details)
                if set_expectation:
                    expectations.append(set_expectation)

                string_len_expectation = self._create_string_length_expectation(
                    key, details)
                if string_len_expectation:
                    expectations.append(string_len_expectation)

                null_or_not_null_expectation = self._create_null_or_not_null_column_expectation(
                    key, details)
                if null_or_not_null_expectation:
                    expectations.append(null_or_not_null_expectation)
        description = schema.get("description", None)
        meta = None
        if description:
            meta = {
                "notes": {
                    "format": "markdown",
                    "content": [f"### Description:\n{description}"],
                }
            }
        suite = ExpectationSuite(suite_name,
                                 expectations=expectations,
                                 meta=meta)
        suite.add_citation(
            comment=f"This suite was built by the {self.__class__.__name__}", )
        return suite

예제 #3

0

파일 보기

파일: toolkit.py 프로젝트: alfredo-f/great_expectations

def add_citation_with_batch_request(
    data_context: DataContext,
    expectation_suite: ExpectationSuite,
    batch_request: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None,
) -> None:
    if (expectation_suite is not None and batch_request
            and isinstance(batch_request, dict)
            and BatchRequest(**batch_request)):
        expectation_suite.add_citation(
            comment="Created suite added via CLI",
            batch_request=batch_request,
        )
        data_context.save_expectation_suite(
            expectation_suite=expectation_suite)

예제 #4

0

파일 보기

파일: test_great_expectations.py 프로젝트: alfredo-f/great_expectations

def test_validate_with_invalid_result(empty_data_context):
    context: DataContext = empty_data_context
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite_dict: dict = expectationSuiteSchema.loads(f.read())
        my_expectation_suite: ExpectationSuite = ExpectationSuite(
            **my_expectation_suite_dict, data_context=context
        )

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    results = my_df.validate()  # catch_exceptions=True is default

    with open(
        file_relative_path(
            __file__,
            "./test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json",
        )
    ) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(f.read())

    del results.meta["great_expectations_version"]
    del results.meta["expectation_suite_meta"]["great_expectations_version"]

    for result in results.results:
        result.exception_info.pop("exception_traceback")

    assert results.to_json_dict() == expected_results.to_json_dict()

예제 #5

0

파일 보기

파일: test_expectation_suite.py 프로젝트: yangrong688/great_expectations

def suite_with_table_and_column_expectations(exp1, exp2, exp3, exp4,
                                             column_pair_expectation,
                                             table_exp1, table_exp2,
                                             table_exp3):
    suite = ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[
            exp1,
            exp2,
            exp3,
            exp4,
            column_pair_expectation,
            table_exp1,
            table_exp2,
            table_exp3,
        ],
        meta={"notes": "This is an expectation suite."},
    )
    assert suite.expectations == [
        exp1,
        exp2,
        exp3,
        exp4,
        column_pair_expectation,
        table_exp1,
        table_exp2,
        table_exp3,
    ]
    return suite

예제 #6

0

파일 보기

def test_spark_datasource_processes_dataset_options(
        test_folder_connection_path_csv, test_backends, empty_data_context):
    context: DataContext = empty_data_context
    if "SparkDFDataset" not in test_backends:
        pytest.skip(
            "Spark has not been enabled, so this test must be skipped.")
    datasource = SparkDFDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path_csv,
            }
        },
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader",
                                                 data_asset_name="test")
    batch_kwargs["dataset_options"] = {"caching": False, "persist": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(
        batch,
        ExpectationSuite(expectation_suite_name="foo", data_context=context))
    dataset = validator.get_dataset()
    assert dataset.caching is False
    assert dataset._persist is False

예제 #7

0

파일 보기

파일: test_expectation_suite_crud_methods.py 프로젝트: MuttData/great_expectations

def suite_with_column_pair_and_table_expectations(
    table_exp1,
    table_exp2,
    table_exp3,
    column_pair_expectation,
    empty_data_context_stats_enabled,
):
    context: DataContext = empty_data_context_stats_enabled

    suite = ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[
            column_pair_expectation,
            table_exp1,
            table_exp2,
            table_exp3,
        ],
        meta={"notes": "This is an expectation suite."},
        data_context=context,
    )
    assert suite.expectations == [
        column_pair_expectation,
        table_exp1,
        table_exp2,
        table_exp3,
    ]
    return suite

예제 #8

0

파일 보기

파일: test_expectation_suite.py 프로젝트: rpatil524/great_expectations

def test_suite_does_not_overwrite_existing_version_metadata(empty_data_context):
    context: DataContext = empty_data_context
    suite = ExpectationSuite(
        "foo", meta={"great_expectations_version": "0.0.0"}, data_context=context
    )
    assert "great_expectations_version" in suite.meta.keys()
    assert suite.meta["great_expectations_version"] == "0.0.0"

예제 #9

0

파일 보기

파일: test_sqlalchemy_datasource.py 프로젝트: rpatil524/great_expectations

def test_sqlalchemy_source_limit(sqlitedb_engine, empty_data_context):
    context: DataContext = empty_data_context
    df1 = pd.DataFrame({
        "col_1": [1, 2, 3, 4, 5],
        "col_2": ["a", "b", "c", "d", "e"]
    })
    df2 = pd.DataFrame({
        "col_1": [0, 1, 2, 3, 4],
        "col_2": ["b", "c", "d", "e", "f"]
    })
    df1.to_sql(name="table_1", con=sqlitedb_engine, index=True)
    df2.to_sql(name="table_2", con=sqlitedb_engine, index=True, schema="main")
    datasource = SqlAlchemyDatasource("SqlAlchemy", engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = BridgeValidator(
        limited_batch,
        expectation_suite=ExpectationSuite("test", data_context=context),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_temp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)["col_1"][0] == 3  # offset should have been applied

예제 #10

0

파일 보기

파일: test_sqlalchemy_datasource.py 프로젝트: rpatil524/great_expectations

def test_sqlalchemy_source_templating(sqlitedb_engine, empty_data_context):
    context: DataContext = empty_data_context
    datasource = SqlAlchemyDatasource(
        engine=sqlitedb_engine,
        batch_kwargs_generators={
            "foo": {
                "class_name": "QueryBatchKwargsGenerator"
            }
        },
    )
    generator = datasource.get_batch_kwargs_generator("foo")
    generator.add_query(data_asset_name="test",
                        query="select 'cat' as ${col_name};")
    batch = datasource.get_batch(
        generator.build_batch_kwargs(
            "test", query_parameters={"col_name": "animal_name"}))
    dataset = BridgeValidator(
        batch,
        expectation_suite=ExpectationSuite("test", data_context=context),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    res = dataset.expect_column_to_exist("animal_name")
    assert res.success is True
    res = dataset.expect_column_values_to_be_in_set("animal_name", ["cat"])
    assert res.success is True

예제 #11

0

파일 보기

파일: test_expectation_suite.py 프로젝트: admariner/great_expectations

def different_suite(exp1, exp4, empty_data_context):
    context: DataContext = empty_data_context
    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[exp1, exp4],
        meta={"notes": "This is an expectation suite."},
        data_context=context,
    )

예제 #12

0

파일 보기

파일: test_expectation_suite.py 프로젝트: yjlee215/great_expectations

def equivalent_suite(exp1, exp3):
    return ExpectationSuite(
        expectation_suite_name="danger",
        expectations=[exp1, exp3],
        meta={
            "notes": "This is another expectation suite, with a different name and meta"
        },
    )

예제 #13

0

파일 보기

파일: test_expectation_suite_crud_methods.py 프로젝트: MuttData/great_expectations

def baseline_suite(exp1, exp2, empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled
    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[exp1, exp2],
        meta={"notes": "This is an expectation suite."},
        data_context=context,
    )

예제 #14

0

파일 보기

def test_anonymize_object_info_with_core_ge_object(
    anonymizer_with_consistent_salt: Anonymizer, ):
    anonymized_result: dict = anonymizer_with_consistent_salt._anonymize_object_info(
        anonymized_info_dict={},
        object_=ExpectationSuite(expectation_suite_name="my_suite"),
    )

    assert anonymized_result == {"parent_class": "ExpectationSuite"}

예제 #15

0

파일 보기

파일: test_expectation_suite_crud_methods.py 프로젝트: alfredo-f/great_expectations

def domain_success_runtime_suite(exp1, exp2, exp3, exp4, exp5,
                                 empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled

    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[exp1, exp2, exp3, exp4, exp5],
        meta={"notes": "This is an expectation suite."},
        data_context=context,
    )

예제 #16

0

파일 보기

파일: test_expectation_suite.py 프로젝트: rpatil524/great_expectations

def equivalent_suite(exp1, exp3, empty_data_context):
    context: DataContext = empty_data_context
    return ExpectationSuite(
        expectation_suite_name="danger",
        expectations=[exp1, exp3],
        meta={
            "notes": "This is another expectation suite, with a different name and meta"
        },
        data_context=context,
    )

예제 #17

0

파일 보기

파일: test_expectation_suite_crud_methods.py 프로젝트: MuttData/great_expectations

def ge_cloud_suite(ge_cloud_id, exp1, exp2, exp3, empty_data_context_stats_enabled):
    context: DataContext = empty_data_context_stats_enabled

    for exp in (exp1, exp2, exp3):
        exp.ge_cloud_id = ge_cloud_id
    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[exp1, exp2, exp3],
        meta={"notes": "This is an expectation suite."},
        ge_cloud_id=ge_cloud_id,
        data_context=context,
    )

예제 #18

0

파일 보기

파일: test_data_asset.py 프로젝트: yjlee215/great_expectations

def test_data_asset_expectation_suite():
    asset = DataAsset()
    default_suite = ExpectationSuite(
        expectation_suite_name="default",
        data_asset_type="DataAsset",
        meta={"great_expectations_version": ge_version},
        expectations=[],
    )

    # We should have a default-initialized suite stored internally and available for getting
    assert asset._expectation_suite == default_suite
    assert asset.get_expectation_suite() == default_suite

예제 #19

0

파일 보기

파일: test_data_asset_internals.py 프로젝트: ryanaustincarlson/great_expectations

def test_meta_version_warning():
    asset = ge.data_asset.DataAsset()

    with pytest.warns(UserWarning) as w:
        suite = ExpectationSuite(expectations=[],
                                 expectation_suite_name="test")
        # mangle the metadata
        suite.meta = {"foo": "bar"}
        out = asset.validate(expectation_suite=suite)
    assert (
        w[0].message.args[0] ==
        "WARNING: No great_expectations version found in configuration object."
    )

    with pytest.warns(UserWarning) as w:
        suite = ExpectationSuite(
            expectations=[],
            expectation_suite_name="test",
            meta={"great_expectations_version": "0.0.0"},
        )
        # mangle the metadata
        suite.meta = {"great_expectations_version": "0.0.0"}
        out = asset.validate(expectation_suite=suite)
    assert (
        w[0].message.args[0] ==
        "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently "
        "being validated by version %s." % ge.__version__)

예제 #20

0

파일 보기

파일: rule_based_profiler.py 프로젝트: alfredo-f/great_expectations

    def run(
        self,
        variables: Optional[Dict[str, Any]] = None,
        rules: Optional[Dict[str, Dict[str, Any]]] = None,
        expectation_suite_name: Optional[str] = None,
        include_citation: bool = True,
    ) -> ExpectationSuite:
        """
        Args:
            :param variables attribute name/value pairs (overrides)
            :param rules name/(configuration-dictionary) (overrides)
            :param expectation_suite_name: A name for returned Expectation suite.
            :param include_citation: Whether or not to include the Profiler config in the metadata for the ExpectationSuite produced by the Profiler
        :return: Set of rule evaluation results in the form of an ExpectationSuite
        """
        effective_variables: Optional[
            ParameterContainer] = self.reconcile_profiler_variables(
                variables=variables)

        effective_rules: List[Rule] = self.reconcile_profiler_rules(
            rules=rules)

        if expectation_suite_name is None:
            expectation_suite_name = (
                f"tmp.profiler_{self.__class__.__name__}_suite_{str(uuid.uuid4())[:8]}"
            )

        expectation_suite: ExpectationSuite = ExpectationSuite(
            expectation_suite_name=expectation_suite_name,
            data_context=self._data_context,
        )

        if include_citation:
            expectation_suite.add_citation(
                comment=
                "Suite created by Rule-Based Profiler with the configuration included.",
                profiler_config=self._citation,
            )

        rule: Rule
        for rule in effective_rules:
            expectation_configurations: List[
                ExpectationConfiguration] = rule.generate(
                    variables=effective_variables, )
            expectation_configuration: ExpectationConfiguration
            for expectation_configuration in expectation_configurations:
                expectation_suite._add_expectation(
                    expectation_configuration=expectation_configuration,
                    send_usage_event=False,
                )

        return expectation_suite

예제 #21

0

파일 보기

파일: test_expectation_suite_crud_methods.py 프로젝트: MuttData/great_expectations

def single_expectation_suite_with_expectation_ge_cloud_id(
    exp1, empty_data_context_stats_enabled
):
    exp1_with_ge_cloud_id = deepcopy(exp1)
    exp1_with_ge_cloud_id.ge_cloud_id = UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774")
    context: DataContext = empty_data_context_stats_enabled

    return ExpectationSuite(
        expectation_suite_name="warning",
        expectations=[exp1_with_ge_cloud_id],
        meta={"notes": "This is an expectation suite."},
        data_context=context,
    )

예제 #22

0

파일 보기

    def get_batch_kwargs(self, suite: ExpectationSuite,
                         batch_kwargs: Union[dict, BatchKwargs]):
        if isinstance(batch_kwargs, dict):
            return self._fix_path_in_batch_kwargs(batch_kwargs)

        citations = suite.meta.get("citations")
        if not citations:
            return self._fix_path_in_batch_kwargs(batch_kwargs)

        citations = suite.get_citations(require_batch_kwargs=True)
        if not citations:
            return None

        citation = citations[-1]
        batch_kwargs = citation.get("batch_kwargs")
        return self._fix_path_in_batch_kwargs(batch_kwargs)

예제 #23

0

파일 보기

파일: test_pandas_datasource.py 프로젝트: ryanaustincarlson/great_expectations

def test_pandas_datasource_processes_dataset_options(test_folder_connection_path_csv):
    datasource = PandasDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path_csv,
            }
        },
    )
    batch_kwargs = datasource.build_batch_kwargs(
        "subdir_reader", data_asset_name="test"
    )
    batch_kwargs["dataset_options"] = {"caching": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

예제 #24

0

파일 보기

파일: test_great_expectations.py 프로젝트: vtdangg/great_expectations

def test_validate_catch_non_existent_expectation():
    df = ge.dataset.PandasDataset({"x": [1, 2, 3, 4, 5]})

    validation_config_non_existent_expectation = ExpectationSuite(
        expectation_suite_name="default",
        meta={"great_expectations_version": ge.__version__},
        expectations=[
            ExpectationConfiguration(
                expectation_type="non_existent_expectation",
                kwargs={"column": "x"})
        ],
    )

    results = df.validate(
        expectation_suite=validation_config_non_existent_expectation)

    assert ("object has no attribute 'non_existent_expectation'"
            in results.results[0].exception_info["exception_message"])

예제 #25

0

파일 보기

def test_validate_catch_invalid_parameter():
    df = ge.dataset.PandasDataset({"x": [1, 2, 3, 4, 5]})

    validation_config_invalid_parameter = ExpectationSuite(
        expectation_suite_name="default",
        meta={"great_expectations_version": ge.__version__},
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_between",
                kwargs={"column": "x", "min_value": 6, "max_value": 5},
            )
        ],
    )

    result = df.validate(expectation_suite=validation_config_invalid_parameter)
    assert (
        "min_value cannot be greater than max_value"
        in result.results[0].exception_info["exception_message"]
    )

예제 #26

0

파일 보기

파일: test_great_expectations.py 프로젝트: alfredo-f/great_expectations

def test_validate_with_invalid_result_catch_exceptions_false(empty_data_context):
    context: DataContext = empty_data_context
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite_dict: dict = expectationSuiteSchema.loads(f.read())
        my_expectation_suite: ExpectationSuite = ExpectationSuite(
            **my_expectation_suite_dict, data_context=context
        )

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    with pytest.raises(InvalidCacheValueError):
        with pytest.warns(Warning, match=r"No great_expectations version found"):
            my_df.validate(catch_exceptions=False)

예제 #27

0

파일 보기

파일: test_expectations_store.py 프로젝트: rpatil524/great_expectations

def test_expectations_store(empty_data_context):
    context: DataContext = empty_data_context
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(
        ns_1,
        ExpectationSuite(expectation_suite_name="a.b.c.warning",
                         data_context=context),
    )

    ns_1_dict: dict = my_store.get(ns_1)
    ns_1_suite: ExpectationSuite = ExpectationSuite(**ns_1_dict,
                                                    data_context=context)
    assert ns_1_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.warning", data_context=context)

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(
        ns_2,
        ExpectationSuite(expectation_suite_name="a.b.c.failure",
                         data_context=context),
    )
    ns_2_dict: dict = my_store.get(ns_2)
    ns_2_suite: ExpectationSuite = ExpectationSuite(**ns_2_dict,
                                                    data_context=context)
    assert ns_2_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.failure", data_context=context)

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }

예제 #28

0

파일 보기

파일: test_new_datasource_with_sql_data_connector.py 프로젝트: alfredo-f/great_expectations

def test_batch_request_sql_with_schema(
    data_context_with_sql_data_connectors_including_schema_for_testing_get_batch,
):
    context: DataContext = (
        data_context_with_sql_data_connectors_including_schema_for_testing_get_batch
    )

    df_table_expected_my_first_data_asset: pd.DataFrame = pd.DataFrame({
        "col_1": [1, 2, 3, 4, 5],
        "col_2": ["a", "b", "c", "d", "e"]
    })
    df_table_expected_my_second_data_asset: pd.DataFrame = pd.DataFrame({
        "col_1": [0, 1, 2, 3, 4],
        "col_2": ["b", "c", "d", "e", "f"]
    })

    batch_request: dict
    validator: Validator
    df_table_actual: pd.DataFrame

    # Exercise RuntimeDataConnector using SQL query against database table with empty schema name.
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_runtime_data_connector",
        "data_asset_name": "test_asset",
        "runtime_parameters": {
            "query": "SELECT * FROM table_1"
        },
        "batch_identifiers": {
            "pipeline_stage_name": "core_processing",
            "airflow_run_id": 1234567890,
        },
    }
    validator = context.get_validator(
        batch_request=RuntimeBatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_first_data_asset)

    # Exercise RuntimeDataConnector using SQL query against database table with non-empty ("main") schema name.
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_runtime_data_connector",
        "data_asset_name": "test_asset",
        "runtime_parameters": {
            "query": "SELECT * FROM main.table_2"
        },
        "batch_identifiers": {
            "pipeline_stage_name": "core_processing",
            "airflow_run_id": 1234567890,
        },
    }
    validator = context.get_validator(
        batch_request=RuntimeBatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_second_data_asset)

    # Exercise InferredAssetSqlDataConnector using data_asset_name introspected with schema from table, named "table_1".
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_inferred_data_connector",
        "data_asset_name": "main.table_1",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_first_data_asset)

    # Exercise InferredAssetSqlDataConnector using data_asset_name introspected with schema from table, named "table_2".
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_inferred_data_connector",
        "data_asset_name": "main.table_2",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_second_data_asset)

    # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_1" (implicitly).
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_configured_data_connector",
        "data_asset_name": "my_first_data_asset",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_first_data_asset)

    # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_2" (implicitly).
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_configured_data_connector",
        "data_asset_name": "my_second_data_asset",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_second_data_asset)

    # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_1" (explicitly).
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_configured_data_connector",
        "data_asset_name": "table_1",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_first_data_asset)

    # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_2" (explicitly).
    batch_request = {
        "datasource_name": "test_sqlite_db_datasource",
        "data_connector_name": "my_configured_data_connector",
        "data_asset_name": "table_2",
    }
    validator = context.get_validator(
        batch_request=BatchRequest(**batch_request),
        expectation_suite=ExpectationSuite("my_expectation_suite",
                                           data_context=context),
    )
    df_table_actual = validator.head(n_rows=0,
                                     fetch_all=True).drop(columns=["index"])
    assert df_table_actual.equals(df_table_expected_my_second_data_asset)

예제 #29

0

파일 보기

파일: test_data_asset_internals.py 프로젝트: ryanaustincarlson/great_expectations

def test_get_and_save_expectation_suite(tmp_path_factory):
    directory_name = str(
        tmp_path_factory.mktemp("test_get_and_save_expectation_config"))
    df = ge.dataset.PandasDataset({
        "x": [1, 2, 4],
        "y": [1, 2, 5],
        "z": ["hello", "jello", "mello"],
    })

    df.expect_column_values_to_be_in_set("x", [1, 2, 4])
    df.expect_column_values_to_be_in_set("y", [1, 2, 4],
                                         catch_exceptions=True,
                                         include_config=True)
    df.expect_column_values_to_match_regex("z", "ello")

    ### First test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )

    assert output_config == df.get_expectation_suite()

    df.save_expectation_suite(directory_name + "/temp1.json")
    with open(directory_name + "/temp1.json") as infile:
        loaded_config = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_config

    ### Second test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "y",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )

    assert output_config == df.get_expectation_suite(
        discard_failed_expectations=False)
    df.save_expectation_suite(directory_name + "/temp2.json",
                              discard_failed_expectations=False)
    with open(directory_name + "/temp2.json") as infile:
        loaded_suite = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_suite

    ### Third test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4],
                    "result_format": "BASIC",
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello",
                    "result_format": "BASIC"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )
    assert output_config == df.get_expectation_suite(
        discard_result_format_kwargs=False,
        discard_include_config_kwargs=False,
        discard_catch_exceptions_kwargs=False,
    )

    df.save_expectation_suite(
        directory_name + "/temp3.json",
        discard_result_format_kwargs=False,
        discard_include_config_kwargs=False,
        discard_catch_exceptions_kwargs=False,
    )
    with open(directory_name + "/temp3.json") as infile:
        loaded_suite = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_suite

예제 #30

0

파일 보기

파일: test_page_renderer.py 프로젝트: alfredo-f/great_expectations

def test_ExpectationSuitePageRenderer_render_expectation_suite_notes(
    empty_data_context, ):
    context: DataContext = empty_data_context
    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(expectation_suite_name="test",
                         meta={"notes": "*hi*"},
                         data_context=context))
    # print(RenderedContent.rendered_content_list_to_json(result.text))
    assert RenderedContent.rendered_content_list_to_json(result.text) == [
        "This Expectation suite currently contains 0 total Expectations across 0 columns.",
        "*hi*",
    ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={"notes": ["*alpha*", "_bravo_", "charlie"]},
            data_context=context,
        ))
    # print(RenderedContent.rendered_content_list_to_json(result.text))
    assert RenderedContent.rendered_content_list_to_json(result.text) == [
        "This Expectation suite currently contains 0 total Expectations across 0 columns.",
        "*alpha*",
        "_bravo_",
        "charlie",
    ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={
                "notes": {
                    "format": "string",
                    "content": ["*alpha*", "_bravo_", "charlie"],
                }
            },
            data_context=context,
        ))
    # print(RenderedContent.rendered_content_list_to_json(result.text))
    assert RenderedContent.rendered_content_list_to_json(result.text) == [
        "This Expectation suite currently contains 0 total Expectations across 0 columns.",
        "*alpha*",
        "_bravo_",
        "charlie",
    ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={"notes": {
                "format": "markdown",
                "content": "*alpha*"
            }},
            data_context=context,
        ))
    # print(RenderedContent.rendered_content_list_to_json(result.text))

    try:
        mistune.markdown("*test*")
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            "This Expectation suite currently contains 0 total Expectations across 0 columns.",
            {
                "content_block_type": "markdown",
                "styling": {
                    "parent": {}
                },
                "markdown": "*alpha*",
            },
        ]
    except OSError:
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            "This Expectation suite currently contains 0 total Expectations across 0 columns.",
            "*alpha*",
        ]

    result = ExpectationSuitePageRenderer._render_expectation_suite_notes(
        ExpectationSuite(
            expectation_suite_name="test",
            meta={
                "notes": {
                    "format": "markdown",
                    "content": ["*alpha*", "_bravo_", "charlie"],
                }
            },
            data_context=context,
        ))
    # print(RenderedContent.rendered_content_list_to_json(result.text))

    try:
        mistune.markdown("*test*")
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            "This Expectation suite currently contains 0 total Expectations across 0 columns.",
            {
                "content_block_type": "markdown",
                "styling": {
                    "parent": {}
                },
                "markdown": "*alpha*",
            },
            {
                "content_block_type": "markdown",
                "styling": {
                    "parent": {}
                },
                "markdown": "_bravo_",
            },
            {
                "content_block_type": "markdown",
                "styling": {
                    "parent": {}
                },
                "markdown": "charlie",
            },
        ]
    except OSError:
        assert RenderedContent.rendered_content_list_to_json(result.text) == [
            "This Expectation suite currently contains 0 total Expectations across 0 columns.",
            "*alpha*",
            "_bravo_",
            "charlie",
        ]