Exemple #1
0
def titanic_profiler_evrs_with_exception():
    with open(
            file_relative_path(
                __file__,
                "./fixtures/BasicDatasetProfiler_evrs_with_exception.json"),
    ) as infile:
        return expectationSuiteValidationResultSchema.load(json.load(infile))
def datetime_column_evrs():
    """hand-crafted EVRS for datetime columns"""
    with open(
            file_relative_path(
                __file__, "../fixtures/datetime_column_evrs.json")) as infile:
        return expectationSuiteValidationResultSchema.load(
            json.load(infile, object_pairs_hook=OrderedDict))
Exemple #3
0
def titanic_profiler_evrs():
    with open(
            file_relative_path(
                __file__,
                "./fixtures/BasicDatasetProfiler_evrs.json")) as infile:
        return expectationSuiteValidationResultSchema.load(
            json.load(infile, object_pairs_hook=OrderedDict))
Exemple #4
0
def test_snapshot_BasicDatasetProfiler_on_titanic():
    """
    A snapshot regression test for BasicDatasetProfiler.
    We are running the profiler on the Titanic dataset
    and comparing the EVRs to ones retrieved from a
    previously stored file.
    """
    df = ge.read_csv(file_relative_path(__file__, "../test_sets/Titanic.csv"))
    suite, evrs = df.profile(BasicDatasetProfiler)

    # Check to make sure BasicDatasetProfiler is adding meta.columns with a single "description" field for each column
    assert "columns" in suite.meta
    for k, v in suite.meta["columns"].items():
        assert v == {"description": ""}

    # Note: the above already produces an EVR; rerunning isn't strictly necessary just for EVRs
    evrs = df.validate(result_format="SUMMARY")

    # THIS IS NOT DEAD CODE. UNCOMMENT TO SAVE A SNAPSHOT WHEN UPDATING THIS TEST
    # with open('tests/test_sets/expected_evrs_BasicDatasetProfiler_on_titanic.json', 'w+') as file:
    #     json.dump(expectationSuiteValidationResultSchema.dump(evrs).data, file, indent=2)
    #
    # with open('tests/render/fixtures/BasicDatasetProfiler_evrs.json', 'w+') as file:
    #     json.dump(expectationSuiteValidationResultSchema.dump(evrs).data, file, indent=2)

    with open(
            file_relative_path(
                __file__,
                "../test_sets/expected_evrs_BasicDatasetProfiler_on_titanic.json"
            ),
            "r",
    ) as file:
        expected_evrs = expectationSuiteValidationResultSchema.load(
            json.load(file, object_pairs_hook=OrderedDict)).data

    # We know that python 2 does not guarantee the order of value_counts, which causes a different
    # order for items in the partial_unexpected_value_counts list
    # Remove those before assertions.
    for result in evrs.results:
        if "partial_unexpected_counts" in result.result:
            result.result.pop("partial_unexpected_counts")

    for result in expected_evrs.results:
        if "partial_unexpected_counts" in result.result:
            result.result.pop("partial_unexpected_counts")

    # Version and RUN-ID will be different
    del expected_evrs.meta["great_expectations.__version__"]
    del evrs.meta["great_expectations.__version__"]
    del expected_evrs.meta["run_id"]
    del evrs.meta["run_id"]
    del evrs.meta["batch_kwargs"]["ge_batch_id"]

    # DISABLE TEST IN PY2 BECAUSE OF ORDER ISSUE AND NEAR-EOL
    if not PY2:
        assert expected_evrs == evrs