def test_result_format_argument_in_decorators():
    df = ge.dataset.PandasDataset({
        'x': [1, 3, 5, 7, 9],
        'y': [2, 4, 6, 8, 10],
        'z': [None, 'a', 'b', 'c', 'abc']
    })
    df.set_default_expectation_argument('result_format', 'COMPLETE')
    df.set_default_expectation_argument("include_config", False)

    # Test explicit Nones in result_format
    exp_output = expectationValidationResultSchema.load({
        'success': True,
        'result': {
            'observed_value': 5,
            'element_count': 5,
            'missing_count': 0,
            'missing_percent': 0.0
        }
    }).data
    assert df.expect_column_mean_to_be_between('x', 4, 6, result_format=None)\
        == exp_output

    exp_output = expectationValidationResultSchema.load({
        'result': {
            'element_count':
            5,
            'missing_count':
            0,
            'missing_percent':
            0.0,
            'partial_unexpected_counts': [{
                'count': 1,
                'value': 8
            }, {
                'count': 1,
                'value': 10
            }],
            'partial_unexpected_index_list': [3, 4],
            'partial_unexpected_list': [8, 10],
            'unexpected_count':
            2,
            'unexpected_index_list': [3, 4],
            'unexpected_list': [8, 10],
            'unexpected_percent':
            40.0,
            'unexpected_percent_nonmissing':
            40.0
        },
        'success': False
    }).data

    assert df.expect_column_values_to_be_between('y', 1, 6, result_format=None)\
        == exp_output

    # Test unknown output format
    with pytest.raises(ValueError):
        df.expect_column_values_to_be_between('y', 1, 6, result_format="QUACK")

    with pytest.raises(ValueError):
        df.expect_column_mean_to_be_between('x', 4, 6, result_format="QUACK")
Esempio n. 2
0
def test_result_format_argument_in_decorators():
    df = ge.dataset.PandasDataset(
        {"x": [1, 3, 5, 7, 9], "y": [2, 4, 6, 8, 10], "z": [None, "a", "b", "c", "abc"]}
    )
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    # Test explicit Nones in result_format
    exp_output = expectationValidationResultSchema.load(
        {
            "success": True,
            "result": {
                "observed_value": 5,
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0.0,
            },
        }
    )
    assert (
        df.expect_column_mean_to_be_between("x", 4, 6, result_format=None) == exp_output
    )

    exp_output = expectationValidationResultSchema.load(
        {
            "result": {
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0.0,
                "partial_unexpected_counts": [
                    {"count": 1, "value": 8},
                    {"count": 1, "value": 10},
                ],
                "partial_unexpected_index_list": [3, 4],
                "partial_unexpected_list": [8, 10],
                "unexpected_count": 2,
                "unexpected_index_list": [3, 4],
                "unexpected_list": [8, 10],
                "unexpected_percent": 40.0,
                "unexpected_percent_nonmissing": 40.0,
            },
            "success": False,
        }
    )

    assert (
        df.expect_column_values_to_be_between("y", 1, 6, result_format=None)
        == exp_output
    )

    # Test unknown output format
    with pytest.raises(ValueError):
        df.expect_column_values_to_be_between("y", 1, 6, result_format="QUACK")

    with pytest.raises(ValueError):
        df.expect_column_mean_to_be_between("x", 4, 6, result_format="QUACK")
Esempio n. 3
0
def test_expectation_decorator_summary_mode():

    df = ge.dataset.PandasDataset({
        "x": [1, 2, 3, 4, 5, 6, 7, 7, None, None],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    # print '&'*80
    # print json.dumps(df.expect_column_values_to_be_between('x', min_value=1, max_value=5, result_format="SUMMARY"), indent=2)

    exp_output = expectationValidationResultSchema.load({
        "success": False,
        "result": {
            "element_count":
            10,
            "missing_count":
            2,
            "missing_percent":
            20.0,
            "unexpected_count":
            3,
            "partial_unexpected_counts": [
                {
                    "value": 7.0,
                    "count": 2
                },
                {
                    "value": 6.0,
                    "count": 1
                },
            ],
            "unexpected_percent":
            30.0,
            "unexpected_percent_nonmissing":
            37.5,
            "partial_unexpected_list": [6.0, 7.0, 7.0],
            "partial_unexpected_index_list": [5, 6, 7],
        },
    })
    assert (df.expect_column_values_to_be_between(
        "x", min_value=1, max_value=5, result_format="SUMMARY") == exp_output)

    exp_output = expectationValidationResultSchema.load({
        "success": True,
        "result": {
            "observed_value": 4.375,
            "element_count": 10,
            "missing_count": 2,
            "missing_percent": 20.0,
        },
    })

    assert (df.expect_column_mean_to_be_between(
        "x", 3, 7, result_format="SUMMARY") == exp_output)
Esempio n. 4
0
def test_positional_arguments():
    df = duplicate_and_obfuscuate(
        ge.dataset.PandasDataset({
            "x": [1, 3, 5, 7, 9],
            "y": [2, 4, 6, 8, 10],
            "z": [None, "a", "b", "c", "abc"],
        }))

    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    exp_output = expectationValidationResultSchema.load({
        "success": True,
        "result": {
            "observed_value": 5,
            "element_count": 5,
            "missing_count": 0,
            "missing_percent": 0.0,
        },
    })

    assert (df.expect_column_mean_to_be_between(
        "x", 4, 6, condition_parser="pandas",
        row_condition='group=="a"') == exp_output)
    assert df.expect_column_mean_to_be_between("x", 4, 6) != exp_output

    out = df.expect_column_values_to_be_between("y",
                                                1,
                                                6,
                                                condition_parser="pandas",
                                                row_condition='group=="a"')
    t = {
        "out": {
            "success": False,
            "unexpected_list": [8, 10],
            "unexpected_index_list": [3, 4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (t["out"]["unexpected_index_list"] ==
                    out.result["unexpected_index_list"])
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_between("y",
                                                1,
                                                6,
                                                mostly=0.5,
                                                condition_parser="pandas",
                                                row_condition='group=="a"')
    t = {
        "out": {
            "success": True,
            "unexpected_list": [8, 10],
            "unexpected_index_list": [3, 4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (t["out"]["unexpected_index_list"] ==
                    out.result["unexpected_index_list"])
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"],
                                               condition_parser="pandas",
                                               row_condition='group=="a"')
    t = {
        "out": {
            "success": False,
            "unexpected_list": ["abc"],
            "unexpected_index_list": [4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (t["out"]["unexpected_index_list"] ==
                    out.result["unexpected_index_list"])
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_in_set(
        "z",
        ["a", "b", "c"],
        mostly=0.5,
        condition_parser="pandas",
        row_condition='group=="a"',
    )
    t = {
        "out": {
            "success": True,
            "unexpected_list": ["abc"],
            "unexpected_index_list": [4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (t["out"]["unexpected_index_list"] ==
                    out.result["unexpected_index_list"])
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]
Esempio n. 5
0
def test_positional_arguments():

    df = ge.dataset.PandasDataset(
        {"x": [1, 3, 5, 7, 9], "y": [2, 4, 6, 8, 10], "z": [None, "a", "b", "c", "abc"]}
    )
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    exp_output = expectationValidationResultSchema.load(
        {
            "success": True,
            "result": {
                "observed_value": 5,
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0.0,
            },
        }
    )

    assert df.expect_column_mean_to_be_between("x", 4, 6) == exp_output

    out = df.expect_column_values_to_be_between("y", 1, 6)
    t = {
        "out": {
            "success": False,
            "unexpected_list": [8, 10],
            "unexpected_index_list": [3, 4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_between("y", 1, 8, strict_max=True)
    t = {
        "out": {
            "success": False,
            "unexpected_list": [8, 10],
            "unexpected_index_list": [3, 4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_between("y", 2, 100, strict_min=True)
    t = {
        "out": {"success": False, "unexpected_list": [2], "unexpected_index_list": [0]}
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_between("y", 1, 6, mostly=0.5)
    t = {
        "out": {
            "success": True,
            "unexpected_list": [8, 10],
            "unexpected_index_list": [3, 4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"])
    t = {
        "out": {
            "success": False,
            "unexpected_list": ["abc"],
            "unexpected_index_list": [4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]

    out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"], mostly=0.5)
    t = {
        "out": {
            "success": True,
            "unexpected_list": ["abc"],
            "unexpected_index_list": [4],
        }
    }
    if "out" in t:
        assert t["out"]["success"] == out.success
        if "unexpected_index_list" in t["out"]:
            assert (
                t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]
            )
        if "unexpected_list" in t["out"]:
            assert t["out"]["unexpected_list"] == out.result["unexpected_list"]