Python RegexBasedColumnMapExpectation Examples, great_expectations.expectations.regex_based_column_map_expectation.RegexBasedColumnMapExpectation Python Examples

Example #1

0

Show file

File: regex_based_column_map_expectation_template.py Project: rpatil524/great_expectations

class ExpectColumnValuesToMatchSomeRegex(RegexBasedColumnMapExpectation):
    """TODO: Add a docstring here"""

    # These values will be used to configure the metric created by your expectation
    regex_camel_name = "RegexName"
    regex = "regex pattern"
    semantic_type_name_plural = None

    # These examples will be shown in the public gallery.
    # They will also be executed as unit tests for your Expectation.
    examples = []

    # Here your regex is used to create a custom metric for this expectation
    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    # This object contains metadata for display in the public Gallery
    library_metadata = {
        "tags": [],  # Tags for this Expectation in the Gallery
        "contributors":
        [  # Github handles for all contributors to this Expectation.
            "@your_name_here",  # Don't forget to add your github handle here!
        ],
    }

Example #2

0

Show file

File: expect_column_values_to_have_valid_icd10_code_format.py Project: rpatil524/great_expectations

class ExpectColumnValuesToHaveValidICD10CodeFormat(
        RegexBasedColumnMapExpectation):
    """Checks if each value matches a regex for ICD10 codes. Does NOT ensure
    the given code actually exists in any version of the ICD10.
    """

    regex_camel_name = "ICD10Codes"
    regex = "[A-Za-z][0-9][A-Za-z0-9](?:\\.[A-Za-z0-9]{0,4})?\\Z"
    semantic_type_name_plural = None

    examples = [{
        "data": {
            "valid_icd10": ["Z99.0", "Z08", "J09.X2", "S22.000A"],
            "invalid_icd10": ["XXX.X", "AA2.01", "2A", "S22.0000A"],
        },
        "tests": [
            {
                "title": "positive_test",
                "exact_match_out": False,
                "in": {
                    "column": "valid_icd10"
                },
                "out": {
                    "success": True
                },
                "include_in_gallery": True,
            },
            {
                "title": "negative_test",
                "exact_match_out": False,
                "in": {
                    "column": "invalid_icd10"
                },
                "out": {
                    "success": False,
                    "unexpected_index_list": [0, 1, 2, 3]
                },
            },
        ],
        "test_backends": [{
            "backend": "pandas",
            "dialects": None
        }],
    }]

    # Here your regex is used to create a custom metric for this expectation
    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    # This object contains metadata for display in the public Gallery
    library_metadata = {
        "tags": ["typed-entities", "hackathon"],
        "contributors": [
            "@zachlindsey",
        ],
    }

Example #3

0

Show file

File: expect_column_values_to_be_valid_scientific_notation.py Project: admariner/great_expectations

class ExpectColumnValuesToBeValidScientificNotation(
        RegexBasedColumnMapExpectation):
    """Expect values in this column to be a valid scientific notation string."""

    # These values will be used to configure the metric created by your expectation
    regex_camel_name = "ScientificNotation"
    regex = "^[+\-]?(?=\.\d|\d)(?:0|[1-9]\d*)?(?:\.\d+)?(?:(?<=\d)(?:[eE][+\-]?\d+))?$"
    semantic_type_name_plural = "scientific_notations"

    # These examples will be shown in the public gallery.
    # They will also be executed as unit tests for your Expectation.
    examples = [{
        "data": {
            "valid": ["-3.14", "1.07E-10", "3.14e-12"],
            "invalid": ["11.e-12", "0E+5", "007"],
            "empty": ["", None, False],
        },
        "tests": [
            {
                "title": "basic_positive_test",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "valid"
                },
                "out": {
                    "success": True,
                },
            },
            {
                "title": "basic_negative_test",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "invalid",
                    "mostly": 1
                },
                "out": {
                    "success": False,
                },
            },
            {
                "title": "empty",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "empty",
                    "mostly": 1
                },
                "out": {
                    "success": False,
                },
            },
        ],
    }]

    # Here your regex is used to create a custom metric for this expectation
    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    # This object contains metadata for display in the public Gallery
    library_metadata = {
        "maturity":
        "experimental",
        "tags": [
            "scientific_notation",
            "expectation",
        ],  # Tags for this Expectation in the Gallery
        "contributors":
        [  # Github handles for all contributors to this Expectation.
            "@rdodev",  # Don't forget to add your github handle here!
        ],
    }

Example #4

0

Show file

class ExpectColumnValuesToBeValidGeohash(RegexBasedColumnMapExpectation):
    """Expect values in this column to be a valid geohash."""

    # These values will be used to configure the metric created by your expectation
    regex_camel_name = "Geohash"
    regex = "^[0123456789bcdefghjkmnpqrstuvwxyz]+$"
    semantic_type_name_plural = "geohashes"

    # These examples will be shown in the public gallery.
    # They will also be executed as unit tests for your Expectation.
    examples = [{
        "data": {
            "valid_geohash": ["dpz8"],
            "invalid_alphanumeric": ["apz8"],  # "a" is an invalid geohash char
            "invalid_non_alphanumeric": ["dp2-"],
            "empty": [""],
        },
        "tests": [
            {
                "title": "basic_positive_test",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "valid_geohash"
                },
                "out": {
                    "success": True,
                },
            },
            {
                "title": "basic_negative_test",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "invalid_alphanumeric",
                    "mostly": 1
                },
                "out": {
                    "success": False,
                },
            },
            {
                "title": "invalid_non_alphanumeric",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "invalid_non_alphanumeric",
                    "mostly": 1
                },
                "out": {
                    "success": False,
                },
            },
            {
                "title": "empty",
                "exact_match_out": False,
                "include_in_gallery": True,
                "in": {
                    "column": "empty",
                    "mostly": 1
                },
                "out": {
                    "success": False,
                },
            },
        ],
    }]

    # Here your regex is used to create a custom metric for this expectation
    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    # This object contains metadata for display in the public Gallery
    library_metadata = {
        "tags": [
            "geospatial",
            "hackathon-22",
        ],  # Tags for this Expectation in the Gallery
        "contributors":
        [  # Github handles for all contributors to this Expectation.
            "@chrisarnold91",  # Don't forget to add your github handle here!
        ],
    }

Example #5

0

Show file

File: expect_column_values_to_only_contain_vowels.py Project: rpatil524/great_expectations

class ExpectColumnValuesToOnlyContainVowels(RegexBasedColumnMapExpectation):
    """Values in this column should only contain vowels"""

    regex_camel_name = "Vowel"
    regex = "^[aeiouyAEIOUY]*$"
    semantic_type_name_plural = "vowels"

    examples = [{
        "data": {
            "only_vowels": ["a", "e", "I", "O", "U", "y", ""],
            "mixed": ["A", "b", "c", "D", "E", "F", "g"],
            "longer_vowels": ["aei", "YAY", "oYu", "eee", "", "aeIOUY", None],
            "contains_vowels_but_also_other_stuff": [
                "baa",
                "aba",
                "aab",
                "1a1",
                "a a",
                " ",
                "*",
            ],
        },
        "tests": [
            {
                "title": "positive_test",
                "exact_match_out": False,
                "in": {
                    "column": "only_vowels"
                },
                "out": {
                    "success": True,
                },
                "include_in_gallery": True,
            },
            {
                "title": "negative_test",
                "exact_match_out": False,
                "in": {
                    "column": "mixed"
                },
                "out": {
                    "success": False,
                    "unexpected_index_list": [1, 2, 3, 5, 6],
                },
                "include_in_gallery": True,
            },
            {
                "title": "another_postive_test",
                "exact_match_out": False,
                "in": {
                    "column": "longer_vowels"
                },
                "out": {
                    "success": True,
                },
                "include_in_gallery": True,
            },
            {
                "title": "another_negative_test",
                "exact_match_out": False,
                "in": {
                    "column": "contains_vowels_but_also_other_stuff"
                },
                "out": {
                    "success": False,
                    "unexpected_index_list": [0, 1, 2, 3, 4, 5, 6],
                },
                "include_in_gallery": True,
            },
            {
                "title": "mostly_positive_test",
                "exact_match_out": False,
                "in": {
                    "column": "mixed",
                    "mostly": 0.1
                },
                "out": {
                    "success": True,
                },
                "include_in_gallery": True,
            },
            {
                "title": "mostly_negative_test",
                "exact_match_out": False,
                "in": {
                    "column": "mixed",
                    "mostly": 0.3
                },
                "out": {
                    "success": False,
                },
                "include_in_gallery": True,
            },
        ],
        "test_backends": [
            {
                "backend": "pandas",
                "dialects": None,
            },
            {
                "backend": "sqlalchemy",
                "dialects": ["sqlite", "postgresql"],
            },
            {
                "backend": "spark",
                "dialects": None,
            },
        ],
    }]

    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    library_metadata = {
        "tags": ["regex"],
        "contributors": ["@joegargery"],
    }

Example #6

0

Show file

class ExpectColumnValuesToBeValidArn(RegexBasedColumnMapExpectation):
    """Expect values in this column to be a valid amazon arn."""

    # These values will be used to configure the metric created by your expectation
    regex_camel_name = "AmazonResourceName"
    regex = "^arn:(?P<Partition>[^:\n]*):(?P<Service>[^:\n]*):(?P<Region>[^:\n]*):(?P<AccountID>[^:\n]*):(?P<Ignore>(?P<ResourceType>[^:\/\n]*)[:\/])?(?P<Resource>.*)$"
    semantic_type_name_plural = "arns"

    # These examples will be shown in the public gallery.
    # They will also be executed as unit tests for your Expectation.
    examples = [
        {
            "data": {
                "valid_arns": [
                    "arn:aws:s3:::my-bucket/my-object",
                    "arn:partition:service:region:account-id:resource",
                ],
                "invalid_alphanumeric": [
                    "apz8",
                    "bubba:arn:123",
                ],
                "invalid_arn": [
                    "arn:aws:::::::my-bucket/my-object",
                    "arn::::",
                ],
                "empty": ["", None],
            },
            "tests": [
                {
                    "title": "basic_positive_test",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "valid_arns"},
                    "out": {
                        "success": True,
                    },
                },
                {
                    "title": "basic_negative_test",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "invalid_alphanumeric", "mostly": 1},
                    "out": {
                        "success": False,
                    },
                },
                {
                    "title": "invalid_non_alphanumeric",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "invalid_arn", "mostly": 1},
                    "out": {
                        "success": False,
                    },
                },
                {
                    "title": "empty",
                    "exact_match_out": False,
                    "include_in_gallery": True,
                    "in": {"column": "empty", "mostly": 1},
                    "out": {
                        "success": False,
                    },
                },
            ],
        }
    ]

    # Here your regex is used to create a custom metric for this expectation
    map_metric = RegexBasedColumnMapExpectation.register_metric(
        regex_camel_name=regex_camel_name,
        regex_=regex,
    )

    # This object contains metadata for display in the public Gallery
    library_metadata = {
        "maturity": "experimental",
        "tags": [
            "amazon",
            "arn",
            "expectation",
        ],  # Tags for this Expectation in the Gallery
        "contributors": [  # Github handles for all contributors to this Expectation.
            "@rdodev",  # Don't forget to add your github handle here!
        ],
    }