Beispiel #1
0
def test_validate_with_invalid_result(validate_result_dict):
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    results = my_df.validate()  # catch_exceptions=True is default

    with open(
        file_relative_path(
            __file__,
            "./test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json",
        )
    ) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(f.read())

    del results.meta["great_expectations_version"]
    del results.meta["expectation_suite_meta"]["great_expectations_version"]

    for result in results.results:
        result.exception_info.pop("exception_traceback")

    assert results.to_json_dict() == expected_results.to_json_dict()
Beispiel #2
0
def test_validate_with_invalid_result_catch_exceptions_false(validate_result_dict):

    with open(file_relative_path(__file__, "./test_sets/titanic_expectations.json")) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    with pytest.raises(InvalidCacheValueError):
        my_df.validate(catch_exceptions=False)
def test_validate_with_invalid_result_catch_exceptions_false(empty_data_context):
    context: DataContext = empty_data_context
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite_dict: dict = expectationSuiteSchema.loads(f.read())
        my_expectation_suite: ExpectationSuite = ExpectationSuite(
            **my_expectation_suite_dict, data_context=context
        )

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    with pytest.raises(InvalidCacheValueError):
        with pytest.warns(Warning, match=r"No great_expectations version found"):
            my_df.validate(catch_exceptions=False)
Beispiel #4
0
def test_validate():
    with open(
        file_relative_path(__file__, "./test_sets/titanic_expectations.json")
    ) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(
            file_relative_path(__file__, "./test_sets/Titanic.csv"),
            expectation_suite=my_expectation_suite,
        )
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    results = my_df.validate(catch_exceptions=False)

    with open(
        file_relative_path(
            __file__, "./test_sets/titanic_expected_data_asset_validate_results.json"
        )
    ) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(f.read())

    del results.meta["great_expectations_version"]
    del results.meta["expectation_suite_meta"]["great_expectations_version"]
    assert results.to_json_dict() == expected_results.to_json_dict()

    # Now, change the results and ensure they are no longer equal
    results.results[0] = ExpectationValidationResult()
    assert results.to_json_dict() != expected_results.to_json_dict()

    # Finally, confirm that only_return_failures works
    # and does not affect the "statistics" field.
    validation_results = my_df.validate(only_return_failures=True)
    del validation_results.meta["great_expectations_version"]
    del validation_results.meta["expectation_suite_meta"]["great_expectations_version"]
    expected_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "titanic",
            "run_id": {"run_name": None, "run_time": "1955-11-05T00:00:00+00:00"},
            "validation_time": "19551105T000000.000000Z",
            "batch_kwargs": {"ge_batch_id": "1234"},
            "expectation_suite_meta": {},
            "batch_markers": {},
            "batch_parameters": {},
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_in_set",
                    kwargs={"column": "PClass", "value_set": ["1st", "2nd", "3rd"]},
                ),
                success=False,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "partial_unexpected_index_list": [456],
                    "unexpected_count": 1,
                    "unexpected_list": ["*"],
                    "unexpected_percent": 0.07616146230007616,
                    "element_count": 1313,
                    "missing_percent": 0.0,
                    "partial_unexpected_counts": [{"count": 1, "value": "*"}],
                    "partial_unexpected_list": ["*"],
                    "unexpected_percent_total": 0.07616146230007616,
                    "unexpected_percent_nonmissing": 0.07616146230007616,
                    "missing_count": 0,
                    "unexpected_index_list": [456],
                },
            )
        ],
        success=expected_results.success,  # unaffected
        statistics=expected_results["statistics"],  # unaffected
    )
    assert validation_results.to_json_dict() == expected_results.to_json_dict()
def test_get_and_save_expectation_suite(tmp_path_factory):
    directory_name = str(
        tmp_path_factory.mktemp("test_get_and_save_expectation_config"))
    df = ge.dataset.PandasDataset({
        "x": [1, 2, 4],
        "y": [1, 2, 5],
        "z": ["hello", "jello", "mello"],
    })

    df.expect_column_values_to_be_in_set("x", [1, 2, 4])
    df.expect_column_values_to_be_in_set("y", [1, 2, 4],
                                         catch_exceptions=True,
                                         include_config=True)
    df.expect_column_values_to_match_regex("z", "ello")

    ### First test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )

    assert output_config == df.get_expectation_suite()

    df.save_expectation_suite(directory_name + "/temp1.json")
    with open(directory_name + "/temp1.json") as infile:
        loaded_config = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_config

    ### Second test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "y",
                    "value_set": [1, 2, 4]
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )

    assert output_config == df.get_expectation_suite(
        discard_failed_expectations=False)
    df.save_expectation_suite(directory_name + "/temp2.json",
                              discard_failed_expectations=False)
    with open(directory_name + "/temp2.json") as infile:
        loaded_suite = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_suite

    ### Third test set ###

    output_config = ExpectationSuite(
        expectations=[
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_in_set",
                kwargs={
                    "column": "x",
                    "value_set": [1, 2, 4],
                    "result_format": "BASIC",
                },
            ),
            ExpectationConfiguration(
                expectation_type="expect_column_values_to_match_regex",
                kwargs={
                    "column": "z",
                    "regex": "ello",
                    "result_format": "BASIC"
                },
            ),
        ],
        expectation_suite_name="default",
        data_asset_type="Dataset",
        meta={"great_expectations_version": ge.__version__},
    )
    assert output_config == df.get_expectation_suite(
        discard_result_format_kwargs=False,
        discard_include_config_kwargs=False,
        discard_catch_exceptions_kwargs=False,
    )

    df.save_expectation_suite(
        directory_name + "/temp3.json",
        discard_result_format_kwargs=False,
        discard_include_config_kwargs=False,
        discard_catch_exceptions_kwargs=False,
    )
    with open(directory_name + "/temp3.json") as infile:
        loaded_suite = expectationSuiteSchema.loads(infile.read())
    assert output_config == loaded_suite