Esempio n. 1
0
def test_stats_mixed_expectations():
    expectation_results = [
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=True),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(2, 1, 1, 50.0, False)
    assert expected == actual
Esempio n. 2
0
 def _build_evr(self, raw_response, configuration):
     """_build_evr is a lightweight convenience wrapper handling cases where an Expectation implementor
     fails to return an EVR but returns the necessary components in a dictionary."""
     if not isinstance(raw_response, ExpectationValidationResult):
         if isinstance(raw_response, dict):
             evr = ExpectationValidationResult(**raw_response)
             evr.expectation_config = configuration
         else:
             raise GreatExpectationsError("Unable to build EVR")
     else:
         evr = raw_response
         evr.expectation_config = configuration
     return evr
Esempio n. 3
0
def test_stats_no_successful_expectations():
    expectation_results = [ExpectationValidationResult(success=False)]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(1, 0, 1, 0.0, False)
    assert expected == actual

    expectation_results = [
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=False),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(3, 0, 3, 0.0, False)
    assert expected == actual
def test_test_column_map_expectation_function():
    asset = ge.dataset.PandasDataset(
        {
            "x": [1, 3, 5, 7, 9],
            "y": [1, 2, None, 7, 9],
        }
    )

    def is_odd(
        self,
        column,
        mostly=None,
        result_format=None,
        include_config=True,
        catch_exceptions=None,
        meta=None,
    ):
        return column % 2 == 1

    assert asset.test_column_map_expectation_function(
        is_odd, column="x", include_config=False
    ) == ExpectationValidationResult(
        result={
            "element_count": 5,
            "missing_count": 0,
            "missing_percent": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
            "unexpected_percent_nonmissing": 0.0,
            "unexpected_count": 0,
        },
        success=True,
    )

    assert asset.test_column_map_expectation_function(
        is_odd, "x", result_format="BOOLEAN_ONLY", include_config=False
    ) == ExpectationValidationResult(success=True)

    assert asset.test_column_map_expectation_function(
        is_odd, column="y", result_format="BOOLEAN_ONLY", include_config=False
    ) == ExpectationValidationResult(success=False)

    assert asset.test_column_map_expectation_function(
        is_odd,
        column="y",
        result_format="BOOLEAN_ONLY",
        mostly=0.7,
        include_config=False,
    ) == ExpectationValidationResult(success=True)
def z_score_validation_result():
    return ExpectationValidationResult(
        success=True,
        expectation_config={
            "expectation_type": "expect_column_value_z_scores_to_be_less_than",
            "kwargs": {
                "column": "a",
                "mostly": 0.9,
                "threshold": 4,
                "double_sided": True,
            },
            "meta": {},
        },
        result={
            "element_count": 6,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_percent_total": 0.0,
            "unexpected_percent_nonmissing": 0.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
        meta={},
    )
def test_spark_expect_column_value_z_scores_to_be_less_than_impl(
    spark_session, basic_spark_df_execution_engine
):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]})
    spark = get_or_create_spark_application(
        spark_config={
            "spark.sql.catalogImplementation": "hive",
            "spark.executor.memory": "450m",
            # "spark.driver.allowMultipleContexts": "true",  # This directive does not appear to have any effect.
        }
    )
    df = spark.createDataFrame(df)

    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration)
    engine = basic_spark_df_execution_engine
    engine.load_batch_data(batch_id="my_id", batch_data=df)
    result = expectation.validate(Validator(execution_engine=engine))
    assert result == ExpectationValidationResult(
        success=True,
    )
Esempio n. 7
0
def test_stats_all_successful_expectations():
    expectation_results = [
        ExpectationValidationResult(success=True),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(1, 1, 0, 100.0, True)
    assert expected == actual

    expectation_results = [
        ExpectationValidationResult(success=True),
        ExpectationValidationResult(success=True),
        ExpectationValidationResult(success=True),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(3, 3, 0, 100.0, True)
    assert expected == actual
Esempio n. 8
0
def test_sa_expect_column_value_z_scores_to_be_less_than_impl(postgresql_engine):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]})
    df.to_sql(
        name="z_score_test_data",
        con=postgresql_engine,
        index=False,
        if_exists="replace",
    )
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration)
    engine = SqlAlchemyExecutionEngine(engine=postgresql_engine)
    engine.load_batch_data(
        "my_id",
        SqlAlchemyBatchData(execution_engine=engine, table_name="z_score_test_data"),
    )
    result = expectation.validate(Validator(execution_engine=engine))
    assert result == ExpectationValidationResult(
        success=True,
    )
def test_expect_column_values_to_be_of_type_string_dialect_sqlite(sa):

    df = pd.DataFrame({"col": ["test_val1", "test_val2"]})
    validator = build_sa_validator_with_data(df, "sqlite")

    result = validator.expect_column_values_to_be_of_type("col", type_="TEXT")

    assert result == ExpectationValidationResult(
        success=True,
        expectation_config={
            "expectation_type": "expect_column_values_to_be_of_type",
            "kwargs": {
                "column": "col",
                "type_": "TEXT",
            },
            "meta": {},
        },
        result={
            "element_count": 2,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_percent_total": 0.0,
            "unexpected_percent_nonmissing": 0.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
        meta={},
    )
def test_graph_validate_with_runtime_config(basic_datasource):
    df = pd.DataFrame(
        {"a": [1, 5, 22, 3, 5, 10, 2, 3], "b": [97, 332, 3, 4, 5, 6, 7, None]}
    )

    batch = basic_datasource.get_single_batch_from_batch_request(
        BatchRequest(
            **{
                "datasource_name": "my_datasource",
                "data_connector_name": "test_runtime_data_connector",
                "data_asset_name": "IN_MEMORY_DATA_ASSET",
                "batch_data": df,
                "partition_request": PartitionRequest(
                    **{
                        "batch_identifiers": {
                            "pipeline_stage_name": 0,
                            "airflow_run_id": 0,
                            "custom_key_0": 0,
                        }
                    }
                ),
            }
        )
    )

    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={"column": "b", "mostly": 1, "threshold": 2, "double_sided": True},
    )
    try:
        result = Validator(
            execution_engine=PandasExecutionEngine(), batches=(batch,)
        ).graph_validate(
            configurations=[expectation_configuration],
            runtime_configuration={"result_format": "COMPLETE"},
        )
    except AssertionError as e:
        result = e
    assert result == [
        ExpectationValidationResult(
            success=False,
            meta={},
            result={
                "element_count": 8,
                "unexpected_count": 1,
                "unexpected_percent": 12.5,
                "partial_unexpected_list": [332.0],
                "missing_count": 1,
                "missing_percent": 12.5,
                "unexpected_percent_nonmissing": 14.285714285714285,
                "partial_unexpected_index_list": None,
                "partial_unexpected_counts": [{"value": 332.0, "count": 1}],
                "unexpected_list": [332.0],
                "unexpected_index_list": None,
            },
            expectation_config=None,
            exception_info=None,
        )
    ]
def test_test_expectation_function():
    asset = ge.dataset.PandasDataset({
        "x": [1, 3, 5, 7, 9],
        "y": [1, 2, None, 7, 9],
    })
    asset_2 = ge.dataset.PandasDataset({
        "x": [1, 3, 5, 6, 9],
        "y": [1, 2, None, 6, 9],
    })

    def expect_dataframe_to_contain_7(self):
        return {"success": bool((self == 7).sum().sum() > 0)}

    assert asset.test_expectation_function(
        expect_dataframe_to_contain_7,
        include_config=False) == ExpectationValidationResult(success=True)
    assert asset_2.test_expectation_function(
        expect_dataframe_to_contain_7,
        include_config=False) == ExpectationValidationResult(success=False)
Esempio n. 12
0
def test_graph_validate(basic_datasource):
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]})

    batch = basic_datasource.get_single_batch_from_batch_request(
        BatchRequest(
            **{
                "datasource_name":
                "my_datasource",
                "data_connector_name":
                "test_runtime_data_connector",
                "data_asset_name":
                "IN_MEMORY_DATA_ASSET",
                "batch_data":
                df,
                "partition_request":
                PartitionRequest(
                    **{
                        "batch_identifiers": {
                            "pipeline_stage_name": 0,
                            "airflow_run_id": 0,
                            "custom_key_0": 0,
                        }
                    }),
            }))

    expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "b",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    result = Validator(execution_engine=PandasExecutionEngine(),
                       batches=[batch]).graph_validate(
                           configurations=[expectation_configuration])
    assert result == [
        ExpectationValidationResult(
            success=True,
            expectation_config=None,
            meta={},
            result={
                "element_count": 6,
                "unexpected_count": 0,
                "unexpected_percent": 0.0,
                "partial_unexpected_list": [],
                "missing_count": 1,
                "missing_percent": 16.666666666666664,
                "unexpected_percent_nonmissing": 0.0,
            },
            exception_info=None,
        )
    ]
Esempio n. 13
0
def test__find_evr_by_type(titanic_profiled_evrs_1):
    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results,
                                             "expect_column_to_exist")
    print(found_evr)
    assert found_evr is None

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set")
    print(found_evr)
    assert found_evr == ExpectationValidationResult(
        success=True,
        result={
            "observed_value": ["*", "1st", "2nd", "3rd"],
            "element_count": 1313,
            "missing_count": 0,
            "missing_percent": 0.0,
            "details": {
                "value_counts": [
                    {
                        "value": "*",
                        "count": 1
                    },
                    {
                        "value": "1st",
                        "count": 322
                    },
                    {
                        "value": "2nd",
                        "count": 279
                    },
                    {
                        "value": "3rd",
                        "count": 711
                    },
                ]
            },
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_distinct_values_to_be_in_set",
            kwargs={
                "column": "PClass",
                "value_set": None,
                "result_format": "SUMMARY"
            },
        ),
    )
def test_expectation_decorator_meta():
    metadata = {"meta_key": "meta_value"}
    eds = ExpectationOnlyDataAsset()
    out = eds.no_op_value_expectation("a", meta=metadata)
    config = eds.get_expectation_suite()

    assert (ExpectationValidationResult(
        success=True, meta=metadata,
        expectation_config=config.expectations[0]) == out)

    assert (ExpectationConfiguration(
        expectation_type="no_op_value_expectation",
        kwargs={"value": "a"},
        meta=metadata,
    ) == config.expectations[0])
Esempio n. 15
0
def test_expect_column_value_z_scores_to_be_less_than_impl():
    df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]})
    expectationConfiguration = ExpectationConfiguration(
        expectation_type="expect_column_value_z_scores_to_be_less_than",
        kwargs={
            "column": "a",
            "mostly": 0.9,
            "threshold": 4,
            "double_sided": True,
        },
    )
    expectation = ExpectColumnValueZScoresToBeLessThan(
        expectationConfiguration)
    engine = PandasExecutionEngine(batch_data_dict={"my_id": df})
    result = expectation.validate(Validator(execution_engine=engine))
    assert result == ExpectationValidationResult(success=True, )
    def _validate(
        self,
        configuration: ExpectationConfiguration,
        metrics: Dict,
        runtime_configuration: dict = None,
        execution_engine: ExecutionEngine = None,
    ) -> Dict:

        string_integers_increasing = metrics.get(
            "column_values.string_integers.increasing.map")

        success = all(string_integers_increasing[0])

        return ExpectationValidationResult(
            result={
                "observed_value":
                np.unique(string_integers_increasing[0], return_counts=True)
            },
            success=success,
        )
def test_expect_column_values_to_be_in_type_list_nullable_int():
    from packaging.version import parse

    pandas_version = parse(pd.__version__)
    if pandas_version < parse("0.24"):
        # Prior to 0.24, Pandas did not have
        pytest.skip(
            "Prior to 0.24, Pandas did not have `Int32Dtype` or related.")

    df = pd.DataFrame({"col": pd.Series([1, 2, None], dtype=pd.Int32Dtype())})
    validator = build_pandas_validator_with_data(df)

    result = validator.expect_column_values_to_be_in_type_list(
        "col", type_list=["Int32Dtype"])
    assert result == ExpectationValidationResult(
        success=True,
        expectation_config={
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "col",
                "type_list": ["Int32Dtype"],
            },
            "meta": {},
        },
        result={
            "element_count": 3,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_percent_total": 0.0,
            "unexpected_percent_nonmissing": 0.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
        meta={},
    )
Esempio n. 18
0
def test_expect_table_row_count_to_be_between_runtime_custom_query_with_where_no_temp_table_sa(
    titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled,
):
    context: DataContext = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled
    batch_request = RuntimeBatchRequest(
        datasource_name="my_sqlite_db_datasource",
        data_connector_name="default_runtime_data_connector_name",
        data_asset_name="titanic",
        runtime_parameters={
            "query": "select * from titanic where sexcode = 1"
        },
        batch_identifiers={"default_identifier_name": "test_identifier"},
        batch_spec_passthrough={"create_temp_table": False},
    )
    validator = context.get_validator(
        batch_request=batch_request,
        create_expectation_suite_with_name="test",
    )
    results = validator.expect_table_row_count_to_be_between(min_value=100,
                                                             max_value=2000)
    assert results == ExpectationValidationResult(
        success=True,
        result={"observed_value": 462},
        meta={},
        expectation_config={
            "kwargs": {
                "min_value": 100,
                "max_value": 2000,
                "batch_id": "a47a711a9984cb2a482157adf54c3cb6",
            },
            "ge_cloud_id": None,
            "meta": {},
            "expectation_type": "expect_table_row_count_to_be_between",
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
    )
Esempio n. 19
0
def test_expect_column_values_to_be_of_type_string_dialect_pyathena(sa):
    from pyathena import sqlalchemy_athena

    df = pd.DataFrame({"col": ["test_val1", "test_val2"]})
    validator = build_sa_validator_with_data(df, "sqlite")

    # Monkey-patch dialect for testing purposes.
    validator.execution_engine.dialect_module = sqlalchemy_athena

    result = validator.expect_column_values_to_be_of_type("col",
                                                          type_="STRINGTYPE")

    assert result == ExpectationValidationResult(
        success=True,
        expectation_config={
            "expectation_type": "expect_column_values_to_be_of_type",
            "kwargs": {
                "column": "col",
                "type_": "STRINGTYPE",
            },
            "meta": {},
        },
        result={
            "element_count": 2,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_percent_total": 0.0,
            "unexpected_percent_nonmissing": 0.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
        meta={},
    )
def test_column_map_expectation_decorator():

    # Create a new CustomPandasDataset to
    # (1) demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @MetaPandasDataset.column_map_expectation
        def expect_column_values_to_be_odd(self, column):
            return column.map(lambda x: x % 2)

        @MetaPandasDataset.column_map_expectation
        def expectation_that_crashes_on_sixes(self, column):
            return column.map(lambda x: (x - 6) / 0 != "duck")

    df = CustomPandasDataset({
        "all_odd": [1, 3, 5, 5, 5, 7, 9, 9, 9, 11],
        "mostly_odd": [1, 3, 5, 7, 9, 2, 4, 1, 3, 5],
        "all_even": [2, 4, 4, 6, 6, 6, 8, 8, 8, 8],
        "odd_missing": [1, 3, 5, None, None, None, None, 1, 3, None],
        "mixed_missing": [1, 3, 5, None, None, 2, 4, 1, 3, None],
        "all_missing":
        [None, None, None, None, None, None, None, None, None, None],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_values_to_be_odd(
        "all_odd") == ExpectationValidationResult(
            result={
                "element_count": 10,
                "missing_count": 0,
                "missing_percent": 0.0,
                "partial_unexpected_counts": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_list": [],
                "unexpected_count": 0,
                "unexpected_index_list": [],
                "unexpected_list": [],
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
            },
            success=True,
        )

    assert df.expect_column_values_to_be_odd(
        "all_missing") == ExpectationValidationResult(
            result={
                "element_count": 10,
                "missing_count": 10,
                "missing_percent": 100.0,
                "partial_unexpected_counts": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_list": [],
                "unexpected_count": 0,
                "unexpected_index_list": [],
                "unexpected_list": [],
                "unexpected_percent": None,
                "unexpected_percent_nonmissing": None,
                "unexpected_percent_total": None,
            },
            success=True,
        )

    assert df.expect_column_values_to_be_odd(
        "odd_missing") == ExpectationValidationResult(
            result={
                "element_count": 10,
                "missing_count": 5,
                "missing_percent": 50.0,
                "partial_unexpected_counts": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_list": [],
                "unexpected_count": 0,
                "unexpected_index_list": [],
                "unexpected_list": [],
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
            },
            success=True,
        )

    assert df.expect_column_values_to_be_odd(
        "mixed_missing") == ExpectationValidationResult(
            result={
                "element_count":
                10,
                "missing_count":
                3,
                "missing_percent":
                30.0,
                "partial_unexpected_counts": [
                    {
                        "value": 2.0,
                        "count": 1
                    },
                    {
                        "value": 4.0,
                        "count": 1
                    },
                ],
                "partial_unexpected_index_list": [5, 6],
                "partial_unexpected_list": [2.0, 4.0],
                "unexpected_count":
                2,
                "unexpected_index_list": [5, 6],
                "unexpected_list": [2, 4],
                "unexpected_percent": ((2.0 / 7) * 100),
                "unexpected_percent_nonmissing": ((2.0 / 7) * 100),
            },
            success=False,
        )

    assert df.expect_column_values_to_be_odd(
        "mostly_odd") == ExpectationValidationResult(
            result={
                "element_count":
                10,
                "missing_count":
                0,
                "missing_percent":
                0,
                "partial_unexpected_counts": [
                    {
                        "value": 2.0,
                        "count": 1
                    },
                    {
                        "value": 4.0,
                        "count": 1
                    },
                ],
                "partial_unexpected_index_list": [5, 6],
                "partial_unexpected_list": [2.0, 4.0],
                "unexpected_count":
                2,
                "unexpected_index_list": [5, 6],
                "unexpected_list": [2, 4],
                "unexpected_percent":
                20.0,
                "unexpected_percent_nonmissing":
                20.0,
            },
            success=False,
        )

    assert df.expect_column_values_to_be_odd(
        "mostly_odd", mostly=0.6) == ExpectationValidationResult(
            result={
                "element_count":
                10,
                "missing_count":
                0,
                "missing_percent":
                0,
                "partial_unexpected_counts": [
                    {
                        "value": 2.0,
                        "count": 1
                    },
                    {
                        "value": 4.0,
                        "count": 1
                    },
                ],
                "partial_unexpected_index_list": [5, 6],
                "partial_unexpected_list": [2.0, 4.0],
                "unexpected_count":
                2,
                "unexpected_index_list": [5, 6],
                "unexpected_list": [2, 4],
                "unexpected_percent":
                20.0,
                "unexpected_percent_nonmissing":
                20.0,
            },
            success=True,
        )

    assert df.expect_column_values_to_be_odd(
        "mostly_odd",
        result_format="BOOLEAN_ONLY") == ExpectationValidationResult(
            success=False)

    df.default_expectation_args["result_format"] = "BOOLEAN_ONLY"

    assert df.expect_column_values_to_be_odd(
        "mostly_odd") == ExpectationValidationResult(success=False)

    df.default_expectation_args["result_format"] = "BASIC"

    assert df.expect_column_values_to_be_odd(
        "mostly_odd", include_config=True) == ExpectationValidationResult(
            expectation_config=ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_odd",
                kwargs={
                    "column": "mostly_odd",
                    "result_format": "BASIC"
                },
            ),
            result={
                "element_count": 10,
                "missing_count": 0,
                "missing_percent": 0,
                "partial_unexpected_list": [2, 4],
                "unexpected_count": 2,
                "unexpected_percent": 20.0,
                "unexpected_percent_nonmissing": 20.0,
            },
            success=False,
        )
def test_file_format_map_output():
    incomplete_file_path = file_relative_path(
        __file__, "../test_sets/toy_data_incomplete.csv")
    incomplete_file_dat = ge.data_asset.FileDataAsset(incomplete_file_path)
    null_file_path = file_relative_path(__file__, "../test_sets/null_file.csv")
    null_file_dat = ge.data_asset.FileDataAsset(null_file_path)
    white_space_path = file_relative_path(__file__,
                                          "../test_sets/white_space.txt")
    white_space_dat = ge.data_asset.FileDataAsset(white_space_path)

    # Boolean Expectation Output
    expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r",\S",
        expected_count=3,
        skip=1,
        result_format="BOOLEAN_ONLY",
        include_config=False,
    )
    expected_result = ExpectationValidationResult(success=False)
    assert expected_result == expectation

    # Empty File Expectations
    expectation = null_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r",\S",
        expected_count=3,
        skip=1,
        result_format="BASIC",
        include_config=False,
    )
    expected_result = ExpectationValidationResult(
        success=None,
        result={
            "element_count": 0,
            "missing_count": 0,
            "missing_percent": None,
            "unexpected_count": 0,
            "unexpected_percent": None,
            "unexpected_percent_nonmissing": None,
            "partial_unexpected_list": [],
        },
    )

    assert expected_result == expectation

    # White Space File
    expectation = white_space_dat.expect_file_line_regex_match_count_to_equal(
        regex=r",\S",
        expected_count=3,
        result_format="BASIC",
        include_config=False)
    expected_result = ExpectationValidationResult(
        success=None,
        result={
            "element_count": 0,
            "missing_count": 0,
            "missing_percent": None,
            "unexpected_count": 0,
            "unexpected_percent": None,
            "unexpected_percent_nonmissing": None,
            "unexpected_percent_total": None,
            "partial_unexpected_list": [],
        },
    )

    assert expected_result == expectation

    # Complete Result Format
    expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r",\S",
        expected_count=3,
        skip=1,
        result_format="COMPLETE",
        include_config=False,
    )

    expected_result = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            9,
            "missing_count":
            2,
            "missing_percent": ((2.0 / 9) * 100),
            "unexpected_count":
            3,
            "unexpected_percent": ((3.0 / 7) * 100),
            "unexpected_percent_total": ((3.0 / 9) * 100),
            "unexpected_percent_nonmissing": ((3.0 / 7) * 100),
            "partial_unexpected_list": ["A,C,1\n", "B,1,4\n", "A,1,4\n"],
            "partial_unexpected_counts": [
                {
                    "value": "A,1,4\n",
                    "count": 1
                },
                {
                    "value": "A,C,1\n",
                    "count": 1
                },
                {
                    "value": "B,1,4\n",
                    "count": 1
                },
            ],
            "partial_unexpected_index_list": [0, 3, 5],
            "unexpected_list": ["A,C,1\n", "B,1,4\n", "A,1,4\n"],
            "unexpected_index_list": [0, 3, 5],
        },
    )

    assert expected_result == expectation

    # Invalid Result Format
    with pytest.raises(ValueError):
        expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
            regex=r",\S",
            expected_count=3,
            skip=1,
            result_format="JOKE",
            include_config=False,
        )
Esempio n. 22
0
def test_validate():
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    results = my_df.validate(catch_exceptions=False)

    with open(
        file_relative_path(
            __file__, "./test_sets/titanic_expected_data_asset_validate_results.json"
        )
    ) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(f.read())

    del results.meta["great_expectations_version"]
    del results.meta["expectation_suite_meta"]["great_expectations_version"]
    assert results.to_json_dict() == expected_results.to_json_dict()

    # Now, change the results and ensure they are no longer equal
    results.results[0] = ExpectationValidationResult()
    assert results.to_json_dict() != expected_results.to_json_dict()

    # Finally, confirm that only_return_failures works
    # and does not affect the "statistics" field.
    validation_results = my_df.validate(only_return_failures=True)
    del validation_results.meta["great_expectations_version"]
    del validation_results.meta["expectation_suite_meta"]["great_expectations_version"]
    expected_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "titanic",
            "run_id": {"run_name": None, "run_time": "1955-11-05T00:00:00+00:00"},
            "validation_time": "19551105T000000.000000Z",
            "batch_kwargs": {"ge_batch_id": "1234"},
            "expectation_suite_meta": {},
            "batch_markers": {},
            "batch_parameters": {},
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_in_set",
                    kwargs={"column": "PClass", "value_set": ["1st", "2nd", "3rd"]},
                ),
                success=False,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "partial_unexpected_index_list": [456],
                    "unexpected_count": 1,
                    "unexpected_list": ["*"],
                    "unexpected_percent": 0.07616146230007616,
                    "element_count": 1313,
                    "missing_percent": 0.0,
                    "partial_unexpected_counts": [{"count": 1, "value": "*"}],
                    "partial_unexpected_list": ["*"],
                    "unexpected_percent_total": 0.07616146230007616,
                    "unexpected_percent_nonmissing": 0.07616146230007616,
                    "missing_count": 0,
                    "unexpected_index_list": [456],
                },
            )
        ],
        success=expected_results.success,  # unaffected
        statistics=expected_results["statistics"],  # unaffected
    )
    assert validation_results.to_json_dict() == expected_results.to_json_dict()
def test_ValidationResultsTableContentBlockRenderer_get_observed_value(
        evr_success):
    evr_no_result_key = ExpectationValidationResult(
        success=True,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_table_row_count_to_be_between",
            kwargs={
                "min_value": 0,
                "max_value": None,
                "result_format": "SUMMARY"
            },
        ),
    )

    evr_expect_column_values_to_not_be_null = ExpectationValidationResult(
        success=True,
        result={
            "element_count": 1313,
            "unexpected_count": 1050,
            "unexpected_percent": 79.96953541508,
            "partial_unexpected_list": [],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_be_null",
            kwargs={
                "column": "Unnamed: 0",
                "mostly": 0.5,
                "result_format": "SUMMARY"
            },
        ),
    )

    evr_expect_column_values_to_be_null = ExpectationValidationResult(
        success=True,
        result={
            "element_count": 1313,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_null",
            kwargs={
                "column": "Unnamed: 0",
                "mostly": 0.5,
                "result_format": "SUMMARY"
            },
        ),
    )

    # test _get_observed_value when evr.result["observed_value"] exists
    output_1 = get_renderer_impl(
        object_name=evr_success.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.observed_value",
    )[1](result=evr_success)
    assert output_1 == "1,313"
    # test _get_observed_value when evr.result does not exist
    output_2 = get_renderer_impl(
        object_name=evr_no_result_key.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.observed_value",
    )[1](result=evr_no_result_key)
    assert output_2 == "--"
    # test _get_observed_value for expect_column_values_to_not_be_null expectation type
    output_3 = get_renderer_impl(
        object_name=evr_expect_column_values_to_not_be_null.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.observed_value",
    )[1](result=evr_expect_column_values_to_not_be_null)
    assert output_3 == "≈20.03% not null"
    # test _get_observed_value for expect_column_values_to_be_null expectation type
    output_4 = get_renderer_impl(
        object_name=evr_expect_column_values_to_be_null.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.observed_value",
    )[1](result=evr_expect_column_values_to_be_null)
    assert output_4 == "100% null"
def test_test_column_aggregate_expectation_function():
    asset = ge.dataset.PandasDataset({
        "x": [1, 3, 5, 7, 9],
        "y": [1, 2, None, 7, 9],
    })

    def expect_second_value_to_be(
        self,
        column,
        value,
        result_format=None,
        include_config=True,
        catch_exceptions=None,
        meta=None,
    ):
        return {
            "success": self[column].iloc[1] == value,
            "result": {
                "observed_value": self[column].iloc[1],
            },
        }

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be, "x", 2,
        include_config=False) == ExpectationValidationResult(
            result={
                "observed_value": 3,
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0.0,
            },
            success=False,
        )

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be, column="x", value=3,
        include_config=False) == ExpectationValidationResult(
            result={
                "observed_value": 3.0,
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0.0,
            },
            success=True,
        )

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be,
        "y",
        value=3,
        result_format="BOOLEAN_ONLY",
        include_config=False,
    ) == ExpectationValidationResult(success=False)

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be,
        "y",
        2,
        result_format="BOOLEAN_ONLY",
        include_config=False,
    ) == ExpectationValidationResult(success=True)
Esempio n. 25
0
def test_evaluation_parameter_store_methods(
    data_context_parameterized_expectation_suite: DataContext, ):
    run_id = RunIdentifier(run_name="20191125T000000.000000Z")
    source_patient_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_patient_data.default",
            "run_id": run_id,
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_table_row_count_to_equal",
                    kwargs={
                        "value": 1024,
                    },
                ),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "observed_value": 1024,
                    "element_count": 1024,
                    "missing_percent": 0.0,
                    "missing_count": 0,
                },
            )
        ],
        success=True,
    )

    data_context_parameterized_expectation_suite.store_evaluation_parameters(
        source_patient_data_results)

    bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result"
        ".observed_value":
        1024
    }
    source_diabetes_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_diabetes_data.default",
            "run_id": run_id,
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={
                        "column": "patient_nbr",
                        "min": 2048,
                        "max": 2048
                    },
                ),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "observed_value": 2048,
                    "element_count": 5000,
                    "missing_percent": 0.0,
                    "missing_count": 0,
                },
            )
        ],
        success=True,
    )

    data_context_parameterized_expectation_suite.store_evaluation_parameters(
        source_diabetes_data_results)
    bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result"
        ".observed_value":
        1024,
        "urn:great_expectations:validations:source_diabetes_data.default"
        ":expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr":
        2048,
    }
def test_StoreMetricsAction_column_metric(
    basic_in_memory_data_context_for_validation_operator,
):
    action = StoreMetricsAction(
        data_context=basic_in_memory_data_context_for_validation_operator,
        requested_metrics={
            "*": [
                {
                    "column": {
                        "provider_id": [
                            "expect_column_values_to_be_unique.result.unexpected_count"
                        ]
                    }
                },
                "statistics.evaluated_expectations",
                "statistics.successful_expectations",
            ]
        },
        target_store_name="metrics_store",
    )

    run_id = RunIdentifier(run_name="bar")

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={"expectation_suite_name": "foo", "run_id": run_id},
        results=[
            ExpectationValidationResult(
                meta={},
                result={
                    "element_count": 10,
                    "missing_count": 0,
                    "missing_percent": 0.0,
                    "unexpected_count": 7,
                    "unexpected_percent": 0.0,
                    "unexpected_percent_nonmissing": 0.0,
                    "partial_unexpected_list": [],
                },
                success=True,
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_unique",
                    kwargs={"column": "provider_id", "result_format": "BASIC"},
                ),
                exception_info=None,
            )
        ],
        statistics={"evaluated_expectations": 5, "successful_expectations": 3},
    )

    action.run(
        validation_result,
        ValidationResultIdentifier.from_object(validation_result),
        data_asset=None,
    )

    assert (
        basic_in_memory_data_context_for_validation_operator.stores[
            "metrics_store"
        ].get(
            ValidationMetricIdentifier(
                run_id=run_id,
                data_asset_name=None,
                expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
                metric_name="expect_column_values_to_be_unique.result.unexpected_count",
                metric_kwargs_id="column=provider_id",
            )
        )
        == 7
    )
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_statement(
        evr_success, evr_failed):
    evr_no_result = ExpectationValidationResult(
        success=True,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_table_row_count_to_be_between",
            kwargs={
                "min_value": 0,
                "max_value": None,
                "result_format": "SUMMARY"
            },
        ),
    )
    evr_failed_no_unexpected_count = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_percent":
            0.2284843869002285,
            "unexpected_percent_nonmissing":
            0.2284843869002285,
            "partial_unexpected_list": [
                "Daly, Mr Peter Denis ",
                "Barber, Ms ",
                "Geiger, Miss Emily ",
            ],
            "partial_unexpected_index_list": [77, 289, 303],
            "partial_unexpected_counts": [
                {
                    "value": "Barber, Ms ",
                    "count": 1
                },
                {
                    "value": "Daly, Mr Peter Denis ",
                    "count": 1
                },
                {
                    "value": "Geiger, Miss Emily ",
                    "count": 1
                },
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_match_regex",
            kwargs={
                "column": "Name",
                "regex": "^\\s+|\\s+$",
                "result_format": "SUMMARY",
            },
        ),
    )

    # test for succeeded evr
    output_1 = get_renderer_impl(
        object_name=evr_success.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_statement",
    )[1](result=evr_success)
    assert output_1 == []

    # test for failed evr
    output_2 = get_renderer_impl(
        object_name=evr_failed.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_statement",
    )[1](result=evr_failed)
    assert output_2 == [
        RenderedStringTemplateContent(
            **{
                "content_block_type": "string_template",
                "string_template": {
                    "template":
                    "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.",
                    "params": {
                        "unexpected_count": "3",
                        "unexpected_percent": "≈0.2285%",
                        "element_count": "1,313",
                    },
                    "tag": "strong",
                    "styling": {
                        "classes": ["text-danger"]
                    },
                },
            })
    ]

    # test for evr with no "result" key
    output_3 = get_renderer_impl(
        object_name=evr_no_result.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_statement",
    )[1](result=evr_no_result)
    print(json.dumps(output_3, indent=2))
    assert output_3 == []

    # test for evr with no unexpected count
    output_4 = get_renderer_impl(
        object_name=evr_failed_no_unexpected_count.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.unexpected_statement",
    )[1](result=evr_failed_no_unexpected_count)
    print(output_4)
    assert output_4 == []

    # test for evr with exception
    evr_failed_exception = ExpectationValidationResult(
        success=False,
        exception_info={
            "raised_exception":
            True,
            "exception_message":
            "Unrecognized column: not_a_real_column",
            "exception_traceback":
            "Traceback (most recent call last):\n...more_traceback...",
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_match_regex",
            kwargs={
                "column": "Name",
                "regex": "^\\s+|\\s+$",
                "result_format": "SUMMARY",
            },
        ),
    )

    output_5 = get_renderer_impl(
        object_name=evr_failed_exception.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_statement",
    )[1](result=evr_failed_exception)
    output_5 = [content.to_json_dict() for content in output_5]
    expected_output_5 = [
        {
            "content_block_type": "string_template",
            "string_template": {
                "template":
                "\n\n$expectation_type raised an exception:\n$exception_message",
                "params": {
                    "expectation_type":
                    "expect_column_values_to_not_match_regex",
                    "exception_message":
                    "Unrecognized column: not_a_real_column",
                },
                "tag": "strong",
                "styling": {
                    "classes": ["text-danger"],
                    "params": {
                        "exception_message": {
                            "tag": "code"
                        },
                        "expectation_type": {
                            "classes": ["badge", "badge-danger", "mb-2"]
                        },
                    },
                },
            },
        },
        {
            "content_block_type":
            "collapse",
            "collapse_toggle_link":
            "Show exception traceback...",
            "collapse": [{
                "content_block_type": "string_template",
                "string_template": {
                    "template":
                    "Traceback (most recent call last):\n...more_traceback...",
                    "tag": "code",
                },
            }],
            "inline_link":
            False,
        },
    ]
    assert output_5 == expected_output_5
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table(
        evr_success):
    evr_failed_no_result = ExpectationValidationResult(
        success=False,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_no_unexpected_list_or_counts = ExpectationValidationResult(
        success=False,
        result={
            "element_count": 1313,
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_count": 1313,
            "unexpected_percent": 100.0,
            "unexpected_percent_nonmissing": 100.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_partial_unexpected_list = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_count":
            1313,
            "unexpected_percent":
            100.0,
            "unexpected_percent_nonmissing":
            100.0,
            "partial_unexpected_list": [
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_partial_unexpected_counts = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_count":
            1313,
            "unexpected_percent":
            100.0,
            "unexpected_percent_nonmissing":
            100.0,
            "partial_unexpected_list": [
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
            ],
            "partial_unexpected_index_list": [
                0,
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
            ],
            "partial_unexpected_counts": [
                {
                    "value": 1,
                    "count": 1
                },
                {
                    "value": 2,
                    "count": 1
                },
                {
                    "value": 3,
                    "count": 1
                },
                {
                    "value": 4,
                    "count": 1
                },
                {
                    "value": 5,
                    "count": 1
                },
                {
                    "value": 6,
                    "count": 1
                },
                {
                    "value": 7,
                    "count": 1
                },
                {
                    "value": 8,
                    "count": 1
                },
                {
                    "value": 9,
                    "count": 1
                },
                {
                    "value": 10,
                    "count": 1
                },
                {
                    "value": 11,
                    "count": 1
                },
                {
                    "value": 12,
                    "count": 1
                },
                {
                    "value": 13,
                    "count": 1
                },
                {
                    "value": 14,
                    "count": 1
                },
                {
                    "value": 15,
                    "count": 1
                },
                {
                    "value": 16,
                    "count": 1
                },
                {
                    "value": 17,
                    "count": 1
                },
                {
                    "value": 18,
                    "count": 1
                },
                {
                    "value": 19,
                    "count": 1
                },
                {
                    "value": 20,
                    "count": 1
                },
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    # test for succeeded evr
    output_1 = get_renderer_impl(
        object_name=evr_success.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_table",
    )[1](result=evr_success)
    assert output_1 is None

    # test for failed evr with no "result" key
    output_2 = get_renderer_impl(
        object_name=evr_failed_no_result.expectation_config.expectation_type,
        renderer_type="renderer.diagnostic.unexpected_table",
    )[1](result=evr_failed_no_result)
    assert output_2 is None

    # test for failed evr with no unexpected list or unexpected counts
    output_3 = get_renderer_impl(
        object_name=evr_failed_no_unexpected_list_or_counts.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.unexpected_table",
    )[1](result=evr_failed_no_unexpected_list_or_counts)
    assert output_3 is None

    # test for failed evr with partial unexpected list
    output_4 = get_renderer_impl(
        object_name=evr_failed_partial_unexpected_list.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.unexpected_table",
    )[1](result=evr_failed_partial_unexpected_list)
    assert output_4.to_json_dict() == {
        "content_block_type":
        "table",
        "table": [
            [1],
            [2],
            [3],
            [4],
            [5],
            [6],
            [7],
            [8],
            [9],
            [10],
            [11],
            [12],
            [13],
            [14],
            [15],
            [16],
            [17],
            [18],
            [19],
            [20],
        ],
        "header_row": ["Sampled Unexpected Values"],
        "styling": {
            "body": {
                "classes": ["table-bordered", "table-sm", "mt-3"]
            }
        },
    }

    # test for failed evr with partial unexpected counts
    output_5 = get_renderer_impl(
        object_name=evr_failed_partial_unexpected_counts.expectation_config.
        expectation_type,
        renderer_type="renderer.diagnostic.unexpected_table",
    )[1](result=evr_failed_partial_unexpected_counts)
    assert output_5.to_json_dict() == {
        "content_block_type":
        "table",
        "table": [
            [1],
            [2],
            [3],
            [4],
            [5],
            [6],
            [7],
            [8],
            [9],
            [10],
            [11],
            [12],
            [13],
            [14],
            [15],
            [16],
            [17],
            [18],
            [19],
            [20],
        ],
        "header_row": ["Sampled Unexpected Values"],
        "styling": {
            "body": {
                "classes": ["table-bordered", "table-sm", "mt-3"]
            }
        },
    }
def test_column_pair_map_expectation_decorator():
    # Create a new CustomPandasDataset to
    # (1) Demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @PandasDataset.column_pair_map_expectation
        def expect_column_pair_values_to_be_different(
            self,
            column_A,
            column_B,
            keep_missing="either",
            output_format=None,
            include_config=True,
            catch_exceptions=None,
        ):
            return column_A != column_B

    df = CustomPandasDataset({
        "all_odd": [1, 3, 5, 7, 9],
        "all_even": [2, 4, 6, 8, 10],
        "odd_missing": [1, 3, 5, None, None],
        "mixed_missing": [1, 2, None, None, 6],
        "mixed_missing_2": [1, 3, None, None, 6],
        "all_missing": [
            None,
            None,
            None,
            None,
            None,
        ],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "unexpected_list": [],
                "unexpected_index_list": [],
                "partial_unexpected_list": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_counts": [],
            },
        )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd",
        "all_even",
        ignore_row_if="both_values_are_missing",
    ) == ExpectationValidationResult(
        success=True,
        result={
            "element_count": 5,
            "missing_count": 0,
            "unexpected_count": 0,
            "missing_percent": 0.0,
            "unexpected_percent": 0.0,
            "unexpected_percent_nonmissing": 0.0,
            "unexpected_list": [],
            "unexpected_index_list": [],
            "partial_unexpected_list": [],
            "partial_unexpected_index_list": [],
            "partial_unexpected_counts": [],
        },
    )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing") == ExpectationValidationResult(
            success=False,
            result={
                "element_count":
                5,
                "missing_count":
                0,
                "unexpected_count":
                3,
                "missing_percent":
                0.0,
                "unexpected_percent":
                60.0,
                "unexpected_percent_nonmissing":
                60.0,
                "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
                "unexpected_index_list": [0, 1, 2],
                "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
                "partial_unexpected_index_list": [0, 1, 2],
                "partial_unexpected_counts": [
                    {
                        "count": 1,
                        "value": (1, 1.0)
                    },
                    {
                        "count": 1,
                        "value": (3, 3.0)
                    },
                    {
                        "count": 1,
                        "value": (5, 5.0)
                    },
                ],
            },
        )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing", ignore_row_if="both_values_are_missing"
    ) == ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            5,
            "missing_count":
            0,
            "unexpected_count":
            3,
            "missing_percent":
            0.0,
            "unexpected_percent":
            60.0,
            "unexpected_percent_nonmissing":
            60.0,
            "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
            "unexpected_index_list": [0, 1, 2],
            "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
            "partial_unexpected_index_list": [0, 1, 2],
            "partial_unexpected_counts": [
                {
                    "count": 1,
                    "value": (1, 1.0)
                },
                {
                    "count": 1,
                    "value": (3, 3.0)
                },
                {
                    "count": 1,
                    "value": (5, 5.0)
                },
            ],
        },
    )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing", ignore_row_if="either_value_is_missing"
    ) == ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            5,
            "missing_count":
            2,
            "unexpected_count":
            3,
            "missing_percent":
            40.0,
            "unexpected_percent":
            100.0,
            "unexpected_percent_nonmissing":
            100.0,
            "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
            "unexpected_index_list": [0, 1, 2],
            "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)],
            "partial_unexpected_index_list": [0, 1, 2],
            "partial_unexpected_counts": [
                {
                    "count": 1,
                    "value": (1, 1.0)
                },
                {
                    "count": 1,
                    "value": (3, 3.0)
                },
                {
                    "count": 1,
                    "value": (5, 5.0)
                },
            ],
        },
    )

    with pytest.raises(ValueError):
        df.expect_column_pair_values_to_be_different(
            "all_odd", "odd_missing", ignore_row_if="blahblahblah")

    # Test SUMMARY, BASIC, and BOOLEAN_ONLY output_formats
    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="SUMMARY") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "partial_unexpected_list": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_counts": [],
            },
        )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="BASIC") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "partial_unexpected_list": [],
            },
        )

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="BOOLEAN_ONLY") == ExpectationValidationResult(
            success=True)
def test_column_aggregate_expectation_decorator():
    # Create a new CustomPandasDataset to
    # (1) demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @PandasDataset.column_aggregate_expectation
        def expect_column_median_to_be_odd(self, column):
            median = self.get_column_median(column)
            return {
                "success": median % 2,
                "result": {
                    "observed_value": median
                }
            }

    df = CustomPandasDataset({
        "all_odd": [1, 3, 5, 7, 9],
        "all_even": [2, 4, 6, 8, 10],
        "odd_missing": [1, 3, 5, None, None],
        "mixed_missing": [1, 2, None, None, 6],
        "mixed_missing_2": [1, 3, None, None, 6],
        "all_missing": [
            None,
            None,
            None,
            None,
            None,
        ],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_median_to_be_odd(
        "all_odd") == ExpectationValidationResult(
            result={
                "observed_value": 5.0,
                "element_count": 5,
                "missing_count": None,
                "missing_percent": None,
            },
            success=True,
        )

    assert df.expect_column_median_to_be_odd(
        "all_even") == ExpectationValidationResult(
            result={
                "observed_value": 6,
                "element_count": 5,
                "missing_count": None,
                "missing_percent": None,
            },
            success=False,
        )

    assert df.expect_column_median_to_be_odd(
        "all_even", result_format="SUMMARY") == ExpectationValidationResult(
            result={
                "observed_value": 6.0,
                "element_count": 5,
                "missing_count": None,
                "missing_percent": None,
            },
            success=False,
        )

    assert df.expect_column_median_to_be_odd(
        "all_even",
        result_format="BOOLEAN_ONLY") == ExpectationValidationResult(
            success=False)

    df.default_expectation_args["result_format"] = "BOOLEAN_ONLY"
    assert df.expect_column_median_to_be_odd(
        "all_even") == ExpectationValidationResult(success=False)

    assert df.expect_column_median_to_be_odd(
        "all_even", result_format="BASIC") == ExpectationValidationResult(
            result={
                "observed_value": 6.0,
                "element_count": 5,
                "missing_count": None,
                "missing_percent": None,
            },
            success=False,
        )