コード例 #1
0
    def _create_string_length_expectation(
            self, key: str,
            details: dict) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/string.html#length"""
        type_ = details.get("type", None)
        minimum = details.get("minLength", None)
        maximum = details.get("maxLength", None)

        if type_ != JsonSchemaTypes.STRING.value:
            return None
        if minimum is None and maximum is None:
            return None

        kwargs = {
            "column": key,
        }
        if minimum == maximum:
            kwargs["value"] = minimum
            return ExpectationConfiguration(
                "expect_column_value_lengths_to_equal",
                ExpectationKwargs(kwargs))
        if minimum is not None:
            kwargs["min_value"] = minimum
        if maximum is not None:
            kwargs["max_value"] = maximum

        return ExpectationConfiguration(
            "expect_column_value_lengths_to_be_between",
            ExpectationKwargs(kwargs))
コード例 #2
0
    def _create_range_expectation(
            self, key: str,
            details: dict) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/numeric.html#range"""
        type_ = details.get("type", None)
        if type_ not in [
                JsonSchemaTypes.INTEGER.value, JsonSchemaTypes.NUMBER.value
        ]:
            return None

        minimum = details.get("minimum", None)
        maximum = details.get("maximum", None)
        exclusive_minimum = details.get("exclusiveMinimum", None)
        exclusive_maximum = details.get("exclusiveMaximum", None)

        if (minimum is None and maximum is None and exclusive_minimum is None
                and exclusive_maximum is None):
            return None

        kwargs: Dict[str, Any] = {"column": key}
        if minimum is not None:
            kwargs["min_value"] = minimum
        if maximum is not None:
            kwargs["max_value"] = maximum
        if exclusive_minimum is not None:
            kwargs["min_value"] = exclusive_minimum
            kwargs["strict_min"] = True
        if exclusive_maximum is not None:
            kwargs["max_value"] = exclusive_maximum
            kwargs["strict_max"] = True

        return ExpectationConfiguration("expect_column_values_to_be_between",
                                        ExpectationKwargs(kwargs))
コード例 #3
0
def test_catch_exceptions_with_bad_expectation_type():
    # We want to catch degenerate cases where an expectation suite is incompatible with
    my_df = PandasDataset({"x": range(10)})
    my_df._append_expectation(
        ExpectationConfiguration(expectation_type="foobar", kwargs={})
    )
    result = my_df.validate(catch_exceptions=True)

    # Find the foobar result
    idx = 0
    for idx, val_result in enumerate(result.results):
        if val_result.expectation_config.expectation_type == "foobar":
            break

    assert result.results[idx].success is False
    assert result.results[idx].expectation_config.expectation_type == "foobar"
    assert result.results[idx].expectation_config.kwargs == ExpectationKwargs()
    assert result.results[idx].exception_info["raised_exception"] is True
    assert (
        "AttributeError: 'PandasDataset' object has no attribute 'foobar'"
        in result.results[idx].exception_info["exception_traceback"]
    )

    with pytest.raises(AttributeError):
        result = my_df.validate(catch_exceptions=False)
コード例 #4
0
    def _create_type_expectation(
            self, key: str,
            details: dict) -> Optional[ExpectationConfiguration]:
        type_ = details.get("type", None)
        if type_ is None:
            return None

        type_list = self.PROFILER_TYPE_LIST_BY_JSON_SCHEMA_TYPE[type_]
        kwargs = ExpectationKwargs(column=key, type_list=type_list)
        return ExpectationConfiguration(
            "expect_column_values_to_be_in_type_list", kwargs)
コード例 #5
0
def test_expectation_decorator_build_config():
    eds = ExpectationOnlyDataAsset()
    eds.no_op_expectation()
    eds.no_op_value_expectation('a')

    config = eds.get_expectation_suite()
    assert ExpectationConfiguration(expectation_type='no_op_expectation',
                                    kwargs={}) == config.expectations[0]

    assert ExpectationConfiguration(
        expectation_type='no_op_value_expectation',
        kwargs=ExpectationKwargs({'value': 'a'})) == config.expectations[1]
コード例 #6
0
    def _create_boolean_expectation(
            self, key: str,
            details: dict) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/boolean.html"""
        type_ = details.get("type", None)
        if type_ != JsonSchemaTypes.BOOLEAN.value:
            return None

        # TODO map JSONSchema types to which type backend? Pandas? Should this value set be parameterized per back end?
        kwargs = ExpectationKwargs(column=key, value_set=[True, False])
        return ExpectationConfiguration("expect_column_values_to_be_in_set",
                                        kwargs)
コード例 #7
0
def test_expectation_decorator_build_config():
    eds = ExpectationOnlyDataAsset()
    eds.no_op_expectation()
    eds.no_op_value_expectation("a")

    config = eds.get_expectation_suite()
    assert (ExpectationConfiguration(expectation_type="no_op_expectation",
                                     kwargs={}) == config.expectations[0])

    assert (ExpectationConfiguration(
        expectation_type="no_op_value_expectation",
        kwargs=ExpectationKwargs({"value": "a"}),
    ) == config.expectations[1])
コード例 #8
0
    def _create_set_expectation(
            self, key: str,
            details: dict) -> Optional[ExpectationConfiguration]:
        """https://json-schema.org/understanding-json-schema/reference/generic.html#enumerated-values"""
        if JsonSchemaTypes.ENUM.value not in details.keys():
            return None
        enum = details.get("enum", None)
        if not isinstance(enum, list):
            return None

        kwargs = ExpectationKwargs(column=key, value_set=enum)
        return ExpectationConfiguration("expect_column_values_to_be_in_set",
                                        kwargs)
コード例 #9
0
 def _create_existence_expectation(
         self, key: str, details: dict) -> ExpectationConfiguration:
     kwargs = ExpectationKwargs(column=key)
     description = details.get("description", None)
     meta = None
     if description:
         meta = {
             "notes": {
                 "format": "markdown",
                 "content": [f"### Description:\n{description}"],
             }
         }
     return ExpectationConfiguration("expect_column_to_exist",
                                     kwargs,
                                     meta=meta)
コード例 #10
0
def kwargs1():
    return ExpectationKwargs({
        "column": "a",
        "value_set": [1, 2, 3],
        "result_format": "BASIC"
    })
コード例 #11
0
def kwargs3():
    return ExpectationKwargs(column="a",
                             value_set=[1, 2, 3],
                             result_format="COMPLETE")
コード例 #12
0
def kwargs2():
    return ExpectationKwargs(column="a",
                             value_set=[1, 2, 3],
                             result_format="BASIC")
コード例 #13
0
def test_expectation_suite_filedata_asset():
    # Load in data files
    file_path = file_relative_path(__file__,
                                   '../test_sets/toy_data_complete.csv')

    # Create FileDataAsset objects
    f_dat = ge.data_asset.FileDataAsset(file_path)

    # Set up expectations
    f_dat.expect_file_line_regex_match_count_to_equal(regex=r',\S',
                                                      expected_count=3,
                                                      skip=1,
                                                      result_format="BASIC",
                                                      catch_exceptions=True)

    f_dat.expect_file_line_regex_match_count_to_be_between(
        regex=r',\S',
        expected_max_count=2,
        skip=1,
        result_format="SUMMARY",
        include_config=True)

    # Test basic config output
    complete_config = f_dat.get_expectation_suite()
    assert [
        ExpectationConfiguration(
            expectation_type='expect_file_line_regex_match_count_to_equal',
            kwargs=ExpectationKwargs(expected_count=3, regex=',\\S', skip=1))
    ] == complete_config.expectations

    # Include result format kwargs
    complete_config2 = f_dat.get_expectation_suite(
        discard_result_format_kwargs=False, discard_failed_expectations=False)
    assert [
        ExpectationConfiguration(
            expectation_type='expect_file_line_regex_match_count_to_equal',
            kwargs={
                'expected_count': 3,
                'regex': ',\\S',
                'result_format': "BASIC",
                'skip': 1
            }),
        ExpectationConfiguration(
            expectation_type='expect_file_line_regex_match_count_to_be_between',
            kwargs={
                'expected_max_count': 2,
                'regex': ',\\S',
                'result_format': 'SUMMARY',
                'skip': 1
            })
    ] == complete_config2.expectations

    # Discard Failing Expectations
    complete_config3 = f_dat.get_expectation_suite(
        discard_result_format_kwargs=False, discard_failed_expectations=True)

    assert [
        ExpectationConfiguration(
            expectation_type='expect_file_line_regex_match_count_to_equal',
            kwargs={
                'expected_count': 3,
                'regex': ',\\S',
                'result_format': 'BASIC',
                'skip': 1
            })
    ] == complete_config3.expectations
コード例 #14
0
def test_expectation_suite_filedata_asset():
    # Load in data files
    file_path = file_relative_path(__file__,
                                   "../test_sets/toy_data_complete.csv")

    # Create FileDataAsset objects
    f_dat = ge.data_asset.FileDataAsset(file_path)

    # Set up expectations
    f_dat.expect_file_line_regex_match_count_to_equal(
        regex=r",\S",
        expected_count=3,
        skip=1,
        result_format="BASIC",
        catch_exceptions=True,
    )

    f_dat.expect_file_line_regex_match_count_to_be_between(
        regex=r",\S",
        expected_max_count=2,
        skip=1,
        result_format="SUMMARY",
        include_config=True,
    )

    # Test basic config output
    complete_config = f_dat.get_expectation_suite()
    assert [
        ExpectationConfiguration(
            expectation_type="expect_file_line_regex_match_count_to_equal",
            kwargs=ExpectationKwargs(expected_count=3, regex=",\\S", skip=1),
        )
    ] == complete_config.expectations

    # Include result format kwargs
    complete_config2 = f_dat.get_expectation_suite(
        discard_result_format_kwargs=False, discard_failed_expectations=False)
    assert [
        ExpectationConfiguration(
            expectation_type="expect_file_line_regex_match_count_to_equal",
            kwargs={
                "expected_count": 3,
                "regex": ",\\S",
                "result_format": "BASIC",
                "skip": 1,
            },
        ),
        ExpectationConfiguration(
            expectation_type="expect_file_line_regex_match_count_to_be_between",
            kwargs={
                "expected_max_count": 2,
                "regex": ",\\S",
                "result_format": "SUMMARY",
                "skip": 1,
            },
        ),
    ] == complete_config2.expectations

    # Discard Failing Expectations
    complete_config3 = f_dat.get_expectation_suite(
        discard_result_format_kwargs=False, discard_failed_expectations=True)

    assert [
        ExpectationConfiguration(
            expectation_type="expect_file_line_regex_match_count_to_equal",
            kwargs={
                "expected_count": 3,
                "regex": ",\\S",
                "result_format": "BASIC",
                "skip": 1,
            },
        )
    ] == complete_config3.expectations