def test_result_format_argument_in_decorators(): df = ge.dataset.PandasDataset({ 'x': [1, 3, 5, 7, 9], 'y': [2, 4, 6, 8, 10], 'z': [None, 'a', 'b', 'c', 'abc'] }) df.set_default_expectation_argument('result_format', 'COMPLETE') df.set_default_expectation_argument("include_config", False) # Test explicit Nones in result_format exp_output = expectationValidationResultSchema.load({ 'success': True, 'result': { 'observed_value': 5, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0 } }).data assert df.expect_column_mean_to_be_between('x', 4, 6, result_format=None)\ == exp_output exp_output = expectationValidationResultSchema.load({ 'result': { 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0, 'partial_unexpected_counts': [{ 'count': 1, 'value': 8 }, { 'count': 1, 'value': 10 }], 'partial_unexpected_index_list': [3, 4], 'partial_unexpected_list': [8, 10], 'unexpected_count': 2, 'unexpected_index_list': [3, 4], 'unexpected_list': [8, 10], 'unexpected_percent': 40.0, 'unexpected_percent_nonmissing': 40.0 }, 'success': False }).data assert df.expect_column_values_to_be_between('y', 1, 6, result_format=None)\ == exp_output # Test unknown output format with pytest.raises(ValueError): df.expect_column_values_to_be_between('y', 1, 6, result_format="QUACK") with pytest.raises(ValueError): df.expect_column_mean_to_be_between('x', 4, 6, result_format="QUACK")
def test_result_format_argument_in_decorators(): df = ge.dataset.PandasDataset( {"x": [1, 3, 5, 7, 9], "y": [2, 4, 6, 8, 10], "z": [None, "a", "b", "c", "abc"]} ) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) # Test explicit Nones in result_format exp_output = expectationValidationResultSchema.load( { "success": True, "result": { "observed_value": 5, "element_count": 5, "missing_count": 0, "missing_percent": 0.0, }, } ) assert ( df.expect_column_mean_to_be_between("x", 4, 6, result_format=None) == exp_output ) exp_output = expectationValidationResultSchema.load( { "result": { "element_count": 5, "missing_count": 0, "missing_percent": 0.0, "partial_unexpected_counts": [ {"count": 1, "value": 8}, {"count": 1, "value": 10}, ], "partial_unexpected_index_list": [3, 4], "partial_unexpected_list": [8, 10], "unexpected_count": 2, "unexpected_index_list": [3, 4], "unexpected_list": [8, 10], "unexpected_percent": 40.0, "unexpected_percent_nonmissing": 40.0, }, "success": False, } ) assert ( df.expect_column_values_to_be_between("y", 1, 6, result_format=None) == exp_output ) # Test unknown output format with pytest.raises(ValueError): df.expect_column_values_to_be_between("y", 1, 6, result_format="QUACK") with pytest.raises(ValueError): df.expect_column_mean_to_be_between("x", 4, 6, result_format="QUACK")
def test_expectation_decorator_summary_mode(): df = ge.dataset.PandasDataset({ "x": [1, 2, 3, 4, 5, 6, 7, 7, None, None], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) # print '&'*80 # print json.dumps(df.expect_column_values_to_be_between('x', min_value=1, max_value=5, result_format="SUMMARY"), indent=2) exp_output = expectationValidationResultSchema.load({ "success": False, "result": { "element_count": 10, "missing_count": 2, "missing_percent": 20.0, "unexpected_count": 3, "partial_unexpected_counts": [ { "value": 7.0, "count": 2 }, { "value": 6.0, "count": 1 }, ], "unexpected_percent": 30.0, "unexpected_percent_nonmissing": 37.5, "partial_unexpected_list": [6.0, 7.0, 7.0], "partial_unexpected_index_list": [5, 6, 7], }, }) assert (df.expect_column_values_to_be_between( "x", min_value=1, max_value=5, result_format="SUMMARY") == exp_output) exp_output = expectationValidationResultSchema.load({ "success": True, "result": { "observed_value": 4.375, "element_count": 10, "missing_count": 2, "missing_percent": 20.0, }, }) assert (df.expect_column_mean_to_be_between( "x", 3, 7, result_format="SUMMARY") == exp_output)
def test_positional_arguments(): df = duplicate_and_obfuscuate( ge.dataset.PandasDataset({ "x": [1, 3, 5, 7, 9], "y": [2, 4, 6, 8, 10], "z": [None, "a", "b", "c", "abc"], })) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) exp_output = expectationValidationResultSchema.load({ "success": True, "result": { "observed_value": 5, "element_count": 5, "missing_count": 0, "missing_percent": 0.0, }, }) assert (df.expect_column_mean_to_be_between( "x", 4, 6, condition_parser="pandas", row_condition='group=="a"') == exp_output) assert df.expect_column_mean_to_be_between("x", 4, 6) != exp_output out = df.expect_column_values_to_be_between("y", 1, 6, condition_parser="pandas", row_condition='group=="a"') t = { "out": { "success": False, "unexpected_list": [8, 10], "unexpected_index_list": [3, 4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert (t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_between("y", 1, 6, mostly=0.5, condition_parser="pandas", row_condition='group=="a"') t = { "out": { "success": True, "unexpected_list": [8, 10], "unexpected_index_list": [3, 4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert (t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"], condition_parser="pandas", row_condition='group=="a"') t = { "out": { "success": False, "unexpected_list": ["abc"], "unexpected_index_list": [4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert (t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_in_set( "z", ["a", "b", "c"], mostly=0.5, condition_parser="pandas", row_condition='group=="a"', ) t = { "out": { "success": True, "unexpected_list": ["abc"], "unexpected_index_list": [4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert (t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"]) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"]
def test_positional_arguments(): df = ge.dataset.PandasDataset( {"x": [1, 3, 5, 7, 9], "y": [2, 4, 6, 8, 10], "z": [None, "a", "b", "c", "abc"]} ) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) exp_output = expectationValidationResultSchema.load( { "success": True, "result": { "observed_value": 5, "element_count": 5, "missing_count": 0, "missing_percent": 0.0, }, } ) assert df.expect_column_mean_to_be_between("x", 4, 6) == exp_output out = df.expect_column_values_to_be_between("y", 1, 6) t = { "out": { "success": False, "unexpected_list": [8, 10], "unexpected_index_list": [3, 4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_between("y", 1, 8, strict_max=True) t = { "out": { "success": False, "unexpected_list": [8, 10], "unexpected_index_list": [3, 4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_between("y", 2, 100, strict_min=True) t = { "out": {"success": False, "unexpected_list": [2], "unexpected_index_list": [0]} } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_between("y", 1, 6, mostly=0.5) t = { "out": { "success": True, "unexpected_list": [8, 10], "unexpected_index_list": [3, 4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"]) t = { "out": { "success": False, "unexpected_list": ["abc"], "unexpected_index_list": [4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"] out = df.expect_column_values_to_be_in_set("z", ["a", "b", "c"], mostly=0.5) t = { "out": { "success": True, "unexpected_list": ["abc"], "unexpected_index_list": [4], } } if "out" in t: assert t["out"]["success"] == out.success if "unexpected_index_list" in t["out"]: assert ( t["out"]["unexpected_index_list"] == out.result["unexpected_index_list"] ) if "unexpected_list" in t["out"]: assert t["out"]["unexpected_list"] == out.result["unexpected_list"]