def test_profile_enum_schema(empty_data_context, enum_types_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(enum_types_schema, "enums") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "enums" assert [e.to_json_dict() for e in obs.expectations] == [ { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "shirt-size" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "shirt-size", "value_set": ["XS", "S", "M", "XL", "XXL"], }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "shirt-size" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "optional-color" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "optional-color", "value_set": ["red", "green", "blue"], }, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "optional-hat" }, }, { "kwargs": { "column": "optional-hat", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "optional-hat", "value_set": ["red", "green", "blue"], }, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "optional-answer" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "optional-answer", "value_set": ["yes", "no"], }, }, ] context = empty_data_context context.save_expectation_suite(obs)
def test_profile_string_lengths_schema(empty_data_context, string_lengths_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(string_lengths_schema, "lengths") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "lengths" assert [e.to_json_dict() for e in obs.expectations] == [ { "kwargs": { "column": "comments-no-constraints" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "comments-no-constraints", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "comments-no-constraints" }, "meta": {}, }, { "kwargs": { "column": "state-abbreviation-equal-min-max" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "state-abbreviation-equal-min-max", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "state-abbreviation-equal-min-max", "value": 2 }, "expectation_type": "expect_column_value_lengths_to_equal", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "state-abbreviation-equal-min-max" }, "meta": {}, }, { "kwargs": { "column": "ICD10-code-3-7" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "ICD10-code-3-7", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "ICD10-code-3-7", "min_value": 3, "max_value": 7 }, "expectation_type": "expect_column_value_lengths_to_be_between", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "ICD10-code-3-7" }, "meta": {}, }, { "kwargs": { "column": "name-no-max" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "name-no-max", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "name-no-max", "min_value": 1 }, "expectation_type": "expect_column_value_lengths_to_be_between", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "name-no-max" }, "meta": {}, }, { "kwargs": { "column": "password-max-33" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "password-max-33", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "password-max-33", "max_value": 33 }, "expectation_type": "expect_column_value_lengths_to_be_between", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "password-max-33" }, "meta": {}, }, { "kwargs": { "column": "optional-min-1" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "optional-min-1", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "optional-min-1", "min_value": 1 }, "expectation_type": "expect_column_value_lengths_to_be_between", "meta": {}, }, ] context = empty_data_context context.save_expectation_suite(obs)
def test_profile_enum_with_bad_input_raises_schema_error(enum_types_schema): profiler = JsonSchemaProfiler() # mangle the enum list enum_types_schema["properties"]["shirt-size"]["enum"] = "foo" with pytest.raises(jsonschema.SchemaError): profiler.profile(enum_types_schema, "enums")
def test_profile_boolean_schema(empty_data_context, boolean_types_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(boolean_types_schema, "bools") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "bools" assert [e.to_json_dict() for e in obs.expectations] == [ { "meta": {}, "kwargs": { "column": "active" }, "expectation_type": "expect_column_to_exist", }, { "meta": {}, "kwargs": { "column": "active", "type_list": list(ProfilerTypeMapping.BOOLEAN_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", }, { "meta": {}, "kwargs": { "column": "active", "value_set": [True, False] }, "expectation_type": "expect_column_values_to_be_in_set", }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "active" }, "meta": {}, }, { "meta": {}, "kwargs": { "column": "optional" }, "expectation_type": "expect_column_to_exist", }, { "meta": {}, "kwargs": { "column": "optional", "type_list": list(ProfilerTypeMapping.BOOLEAN_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", }, { "meta": {}, "kwargs": { "column": "optional", "value_set": [True, False] }, "expectation_type": "expect_column_values_to_be_in_set", }, ] context = empty_data_context context.save_expectation_suite(obs)
def test_profile_raises_error_on_missing_suite_name(simple_schema): profiler = JsonSchemaProfiler() with pytest.raises(ValueError) as e: profiler.profile(simple_schema, suite_name=None) message = str(e.value) assert "provide a suite name" in message
def test_profile_raises_errors_on_bad_inputs(): profiler = JsonSchemaProfiler() for bad in [1, 1.1, None, "junk"]: with pytest.raises(TypeError): profiler.profile(bad, "foo")
def test_null_fields_schema(empty_data_context, null_fields_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(null_fields_schema, "null_fields") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "null_fields" assert [e.to_json_dict() for e in obs.expectations] == [ { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "null" }, }, { "expectation_type": "expect_column_values_to_be_null", "kwargs": { "column": "null" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "string-or-null" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "string-or-null", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "int-or-null" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "int-or-null", "type_list": list(ProfilerTypeMapping.INT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "number-or-null" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "number-or-null", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "enum-or-null" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "enum-or-null", "value_set": ["a", "b", "c"], }, }, ] context = empty_data_context context.save_expectation_suite(obs)
def test_has_profile_create_expectations_from_complex_schema( empty_data_context, complex_flat_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(complex_flat_schema, "complex") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "complex" assert obs.meta["notes"] == { "format": "markdown", "content": ["### Description:\nAn address"], } assert [e.to_json_dict() for e in obs.expectations] == [ { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "post-office-box" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "post-office-box", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "post-office-box" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "street-name" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "street-name", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "street-name" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "street-number" }, "meta": { "notes": { "format": "markdown", "content": ["### Description:\nOnly the address number."], } }, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "street-number", "type_list": list(ProfilerTypeMapping.INT_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "street-number" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "locality" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "locality", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "locality" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "region" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "region", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "region" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "postal-code" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "postal-code", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "postal-code" }, "meta": {}, }, { "expectation_type": "expect_column_to_exist", "kwargs": { "column": "country-name" }, "meta": {}, }, { "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "country-name", "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES), }, "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "country-name" }, "meta": {}, }, ] context = empty_data_context context.save_expectation_suite(obs)
def test_profile_number_ranges_schema(empty_data_context, number_ranges_schema): profiler = JsonSchemaProfiler() obs = profiler.profile(number_ranges_schema, "number_ranges") assert isinstance(obs, ExpectationSuite) assert obs.expectation_suite_name == "number_ranges" assert [e.to_json_dict() for e in obs.expectations] == [ { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "favorite-number" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "favorite-number", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "favorite-number" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "age-0-130" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "age-0-130", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "age-0-130", "min_value": 0.5, "max_value": 130.5 }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "age-0-130" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "wheel-count-0-plus" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "wheel-count-0-plus", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "wheel-count-0-plus", "min_value": 0.5 }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "wheel-count-0-plus" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "rpm-max-7000" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "rpm-max-7000", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "rpm-max-7000", "max_value": 7000.5 }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "rpm-max-7000" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "lake-depth-max-minus-100" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "lake-depth-max-minus-100", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "lake-depth-max-minus-100", "max_value": -100.5 }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "lake-depth-max-minus-100" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "floor-exclusive-min-0" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "floor-exclusive-min-0", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "floor-exclusive-min-0", "min_value": 0.5, "strict_min": True, }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "floor-exclusive-min-0" }, "meta": {}, }, { "meta": {}, "expectation_type": "expect_column_to_exist", "kwargs": { "column": "floor-exclusive-max-100" }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "floor-exclusive-max-100", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, }, { "meta": {}, "expectation_type": "expect_column_values_to_be_between", "kwargs": { "column": "floor-exclusive-max-100", "max_value": 100.5, "strict_max": True, }, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "floor-exclusive-max-100" }, "meta": {}, }, { "kwargs": { "column": "gear-exclusive-0-6" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "gear-exclusive-0-6", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "gear-exclusive-0-6", "min_value": 0.5, "strict_min": True, "max_value": 6.5, "strict_max": True, }, "expectation_type": "expect_column_values_to_be_between", "meta": {}, }, { "expectation_type": "expect_column_values_to_not_be_null", "kwargs": { "column": "gear-exclusive-0-6" }, "meta": {}, }, { "kwargs": { "column": "optional-min-half" }, "expectation_type": "expect_column_to_exist", "meta": {}, }, { "kwargs": { "column": "optional-min-half", "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES), }, "expectation_type": "expect_column_values_to_be_in_type_list", "meta": {}, }, { "kwargs": { "column": "optional-min-half", "min_value": 0.5, }, "expectation_type": "expect_column_values_to_be_between", "meta": {}, }, ] context = empty_data_context context.save_expectation_suite(obs)