def test_profile_enum_schema(empty_data_context, enum_types_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(enum_types_schema, "enums")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "enums"
    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "shirt-size"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "shirt-size",
                "value_set": ["XS", "S", "M", "XL", "XXL"],
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "shirt-size"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "optional-color"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "optional-color",
                "value_set": ["red", "green", "blue"],
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "optional-hat"
            },
        },
        {
            "kwargs": {
                "column": "optional-hat",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "optional-hat",
                "value_set": ["red", "green", "blue"],
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "optional-answer"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "optional-answer",
                "value_set": ["yes", "no"],
            },
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)
def test_profile_string_lengths_schema(empty_data_context,
                                       string_lengths_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(string_lengths_schema, "lengths")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "lengths"
    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "kwargs": {
                "column": "comments-no-constraints"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "comments-no-constraints",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "comments-no-constraints"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "state-abbreviation-equal-min-max"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "state-abbreviation-equal-min-max",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "state-abbreviation-equal-min-max",
                "value": 2
            },
            "expectation_type": "expect_column_value_lengths_to_equal",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "state-abbreviation-equal-min-max"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "ICD10-code-3-7"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "ICD10-code-3-7",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "ICD10-code-3-7",
                "min_value": 3,
                "max_value": 7
            },
            "expectation_type": "expect_column_value_lengths_to_be_between",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "ICD10-code-3-7"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "name-no-max"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "name-no-max",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "name-no-max",
                "min_value": 1
            },
            "expectation_type": "expect_column_value_lengths_to_be_between",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "name-no-max"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "password-max-33"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "password-max-33",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "password-max-33",
                "max_value": 33
            },
            "expectation_type": "expect_column_value_lengths_to_be_between",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "password-max-33"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-1"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-1",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-1",
                "min_value": 1
            },
            "expectation_type": "expect_column_value_lengths_to_be_between",
            "meta": {},
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)
def test_profile_enum_with_bad_input_raises_schema_error(enum_types_schema):
    profiler = JsonSchemaProfiler()
    # mangle the enum list
    enum_types_schema["properties"]["shirt-size"]["enum"] = "foo"
    with pytest.raises(jsonschema.SchemaError):
        profiler.profile(enum_types_schema, "enums")
def test_profile_boolean_schema(empty_data_context, boolean_types_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(boolean_types_schema, "bools")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "bools"
    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "meta": {},
            "kwargs": {
                "column": "active"
            },
            "expectation_type": "expect_column_to_exist",
        },
        {
            "meta": {},
            "kwargs": {
                "column": "active",
                "type_list": list(ProfilerTypeMapping.BOOLEAN_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
        },
        {
            "meta": {},
            "kwargs": {
                "column": "active",
                "value_set": [True, False]
            },
            "expectation_type": "expect_column_values_to_be_in_set",
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "active"
            },
            "meta": {},
        },
        {
            "meta": {},
            "kwargs": {
                "column": "optional"
            },
            "expectation_type": "expect_column_to_exist",
        },
        {
            "meta": {},
            "kwargs": {
                "column": "optional",
                "type_list": list(ProfilerTypeMapping.BOOLEAN_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
        },
        {
            "meta": {},
            "kwargs": {
                "column": "optional",
                "value_set": [True, False]
            },
            "expectation_type": "expect_column_values_to_be_in_set",
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)
def test_profile_raises_error_on_missing_suite_name(simple_schema):
    profiler = JsonSchemaProfiler()
    with pytest.raises(ValueError) as e:
        profiler.profile(simple_schema, suite_name=None)
    message = str(e.value)
    assert "provide a suite name" in message
def test_profile_raises_errors_on_bad_inputs():
    profiler = JsonSchemaProfiler()
    for bad in [1, 1.1, None, "junk"]:
        with pytest.raises(TypeError):
            profiler.profile(bad, "foo")
def test_null_fields_schema(empty_data_context, null_fields_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(null_fields_schema, "null_fields")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "null_fields"
    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "null"
            },
        },
        {
            "expectation_type": "expect_column_values_to_be_null",
            "kwargs": {
                "column": "null"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "string-or-null"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "string-or-null",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "int-or-null"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "int-or-null",
                "type_list": list(ProfilerTypeMapping.INT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "number-or-null"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "number-or-null",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "enum-or-null"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "enum-or-null",
                "value_set": ["a", "b", "c"],
            },
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)
def test_has_profile_create_expectations_from_complex_schema(
        empty_data_context, complex_flat_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(complex_flat_schema, "complex")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "complex"
    assert obs.meta["notes"] == {
        "format": "markdown",
        "content": ["### Description:\nAn address"],
    }

    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "post-office-box"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "post-office-box",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "post-office-box"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "street-name"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "street-name",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "street-name"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "street-number"
            },
            "meta": {
                "notes": {
                    "format": "markdown",
                    "content": ["### Description:\nOnly the address number."],
                }
            },
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "street-number",
                "type_list": list(ProfilerTypeMapping.INT_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "street-number"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "locality"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "locality",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "locality"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "region"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "region",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "region"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "postal-code"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "postal-code",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "postal-code"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "country-name"
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "country-name",
                "type_list": list(ProfilerTypeMapping.STRING_TYPE_NAMES),
            },
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "country-name"
            },
            "meta": {},
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)
def test_profile_number_ranges_schema(empty_data_context,
                                      number_ranges_schema):
    profiler = JsonSchemaProfiler()
    obs = profiler.profile(number_ranges_schema, "number_ranges")
    assert isinstance(obs, ExpectationSuite)
    assert obs.expectation_suite_name == "number_ranges"
    assert [e.to_json_dict() for e in obs.expectations] == [
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "favorite-number"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "favorite-number",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "favorite-number"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "age-0-130"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "age-0-130",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "age-0-130",
                "min_value": 0.5,
                "max_value": 130.5
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "age-0-130"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "wheel-count-0-plus"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "wheel-count-0-plus",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "wheel-count-0-plus",
                "min_value": 0.5
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "wheel-count-0-plus"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "rpm-max-7000"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "rpm-max-7000",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "rpm-max-7000",
                "max_value": 7000.5
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "rpm-max-7000"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "lake-depth-max-minus-100"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "lake-depth-max-minus-100",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "lake-depth-max-minus-100",
                "max_value": -100.5
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "lake-depth-max-minus-100"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "floor-exclusive-min-0"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "floor-exclusive-min-0",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "floor-exclusive-min-0",
                "min_value": 0.5,
                "strict_min": True,
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "floor-exclusive-min-0"
            },
            "meta": {},
        },
        {
            "meta": {},
            "expectation_type": "expect_column_to_exist",
            "kwargs": {
                "column": "floor-exclusive-max-100"
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "kwargs": {
                "column": "floor-exclusive-max-100",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
        },
        {
            "meta": {},
            "expectation_type": "expect_column_values_to_be_between",
            "kwargs": {
                "column": "floor-exclusive-max-100",
                "max_value": 100.5,
                "strict_max": True,
            },
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "floor-exclusive-max-100"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "gear-exclusive-0-6"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "gear-exclusive-0-6",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "gear-exclusive-0-6",
                "min_value": 0.5,
                "strict_min": True,
                "max_value": 6.5,
                "strict_max": True,
            },
            "expectation_type": "expect_column_values_to_be_between",
            "meta": {},
        },
        {
            "expectation_type": "expect_column_values_to_not_be_null",
            "kwargs": {
                "column": "gear-exclusive-0-6"
            },
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-half"
            },
            "expectation_type": "expect_column_to_exist",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-half",
                "type_list": list(ProfilerTypeMapping.FLOAT_TYPE_NAMES),
            },
            "expectation_type": "expect_column_values_to_be_in_type_list",
            "meta": {},
        },
        {
            "kwargs": {
                "column": "optional-min-half",
                "min_value": 0.5,
            },
            "expectation_type": "expect_column_values_to_be_between",
            "meta": {},
        },
    ]
    context = empty_data_context
    context.save_expectation_suite(obs)