def _atomic_prescriptive_template(
     cls,
     configuration=None,
     result=None,
     language=None,
     runtime_configuration=None,
     **kwargs,
 ):
     runtime_configuration = runtime_configuration or {}
     include_column_name = runtime_configuration.get(
         "include_column_name", True)
     include_column_name = (include_column_name
                            if include_column_name is not None else True)
     styling = runtime_configuration.get("styling")
     params = substitute_none_for_missing(
         configuration.kwargs,
         ["min_value", "max_value", "strict_min", "strict_max"],
     )
     if params["min_value"] is None and params["max_value"] is None:
         template_str = "May have any number of columns."
     else:
         at_least_str, at_most_str = handle_strict_min_max(params)
         if params["min_value"] is not None and params[
                 "max_value"] is not None:
             template_str = f"Must have {at_least_str} $min_value and {at_most_str} $max_value columns."
         elif params["min_value"] is None:
             template_str = f"Must have {at_most_str} $max_value columns."
         elif params["max_value"] is None:
             template_str = f"Must have {at_least_str} $min_value columns."
     params_with_json_schema = {
         "min_value": {
             "schema": {
                 "type": "number"
             },
             "value": params.get("min_value"),
         },
         "max_value": {
             "schema": {
                 "type": "number"
             },
             "value": params.get("max_value"),
         },
         "strict_min": {
             "schema": {
                 "type": "boolean"
             },
             "value": params.get("strict_min"),
         },
         "strict_max": {
             "schema": {
                 "type": "boolean"
             },
             "value": params.get("strict_max"),
         },
     }
     return (template_str, params_with_json_schema, styling)
Exemple #2
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "value", "mostly", "row_condition", "condition_parser"],
        )

        if params.get("value") is None:
            template_str = "values may have any length."
        else:
            template_str = "values must be $value characters long"
            if params["mostly"] is not None:
                params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                                  precision=15,
                                                  no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                template_str += ", at least $mostly_pct % of the time."
            else:
                template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = conditional_template_str + ", then " + template_str
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
Exemple #3
0
    def _prescriptive_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        language: str = None,
        runtime_configuration: dict = None,
        **kwargs,
    ) -> List[Union[dict, str, RenderedStringTemplateContent,
                    RenderedTableContent, RenderedBulletListContent,
                    RenderedGraphContent, Any, ]]:
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "center_point",
                "range",
                "unit",
                "projection",
                "mostly",
            ],
        )

        if params["mostly"] is None:
            template_str = "values must be in $projection projection within $range $unit of $center_point"
        else:
            if params["mostly"] is not None:
                params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                                  precision=15,
                                                  no_scientific=True)
                template_str += ", at least $mostly_pct % of the time."
            else:
                template_str += "."

        if include_column_name:
            template_str = f"$column {template_str}"

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_set", "exact_match"])

        if params["column_set"] is None:
            template_str = "Must specify a set or list of columns."

        else:
            # standardize order of the set for output
            params["column_list"] = list(params["column_set"])

            column_list_template_str = ", ".join([
                f"$column_list_{idx}"
                for idx in range(len(params["column_list"]))
            ])

            exact_match_str = "exactly" if params[
                "exact_match"] is True else "at least"

            template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}"

            for idx in range(len(params["column_list"])):
                params[f"column_list_{str(idx)}"] = params["column_list"][idx]

        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list"),
            },
            "exact_match": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("exact_match"),
            },
        }
        return (template_str, params_with_json_schema, styling)
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["value", "row_condition", "condition_parser"],
        )
        params_with_json_schema = {
            "value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("value"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
        }
        template_str = "Must have exactly $value rows."

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)
Exemple #6
0
def test_substitute_none_for_missing():
    assert substitute_none_for_missing(kwargs={
        "a": 1,
        "b": 2
    },
                                       kwarg_list=["c", "d"]) == {
                                           "a": 1,
                                           "b": 2,
                                           "c": None,
                                           "d": None
                                       }

    my_kwargs = {"a": 1, "b": 2}
    assert substitute_none_for_missing(kwargs=my_kwargs,
                                       kwarg_list=["c", "d"]) == {
                                           "a": 1,
                                           "b": 2,
                                           "c": None,
                                           "d": None,
                                       }
    assert my_kwargs == {
        "a": 1,
        "b": 2,
    }, "substitute_none_for_missing should not change input kwargs in place."
Exemple #7
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "min_value",
                "max_value",
                "strict_min",
                "strict_max",
            ],
        )

        if params["min_value"] is None and params["max_value"] is None:
            template_str = "May have any number of rows."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str = f"Must have {at_least_str} $min_value and {at_most_str} $max_value rows."
            elif params["min_value"] is None:
                template_str = f"Must have {at_most_str} $max_value rows."
            elif params["max_value"] is None:
                template_str = f"Must have {at_least_str} $min_value rows."

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "country_iso_a3", "polygon_points", "mostly"],
        )

        template_str = "values must be inside "
        if params["country_iso_a3"] is not None:
            template_str = "$country_iso_a3 country"
        else:
            if params["polygon_points"] is not None:
                template_str = "polygon defined by $polygon_points"
        if params["mostly"] is not None:
            params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                              precision=15,
                                              no_scientific=True)
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
Exemple #9
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_set", "exact_match"])

        if params["column_set"] is None:
            template_str = "Must specify a set or list of columns."

        else:
            # standardize order of the set for output
            params["column_list"] = list(params["column_set"])

            column_list_template_str = ", ".join([
                f"$column_list_{idx}"
                for idx in range(len(params["column_list"]))
            ])

            exact_match_str = "exactly" if params[
                "exact_match"] is True else "at least"

            template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}"

            for idx in range(len(params["column_list"])):
                params["column_list_" + str(idx)] = params["column_list"][idx]

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
Exemple #10
0
 def _prescriptive_renderer(cls,
                            configuration=None,
                            result=None,
                            language=None,
                            runtime_configuration=None,
                            **kwargs):
     runtime_configuration = runtime_configuration or {}
     include_column_name = runtime_configuration.get(
         "include_column_name", True)
     include_column_name = (include_column_name
                            if include_column_name is not None else True)
     styling = runtime_configuration.get("styling")
     params = substitute_none_for_missing(
         configuration.kwargs,
         ["column", "mostly", "row_condition", "condition_parser"],
     )
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "column_index"],
        )

        if params["column_index"] is None:
            if include_column_name:
                template_str = "$column is a required field."
            else:
                template_str = "is a required field."
        else:
            params["column_indexth"] = ordinal(params["column_index"])
            if include_column_name:
                template_str = "$column must be the $column_indexth field."
            else:
                template_str = "must be the $column_indexth field."

        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "column_index": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("column_index"),
            },
        }

        return (template_str, params_with_json_schema, styling)
Exemple #12
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_list"])

        if params["column_list"] is None:
            template_str = "Must have a list of columns in a specific order, but that order is not specified."

        else:
            template_str = "Must have these columns in this order: "
            for idx in range(len(params["column_list"]) - 1):
                template_str += "$column_list_" + str(idx) + ", "
                params["column_list_" + str(idx)] = params["column_list"][idx]

            last_idx = len(params["column_list"]) - 1
            template_str += "$column_list_" + str(last_idx)
            params["column_list_" +
                   str(last_idx)] = params["column_list"][last_idx]

        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list"),
            },
        }
        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="column_list",
        )
        return (template_str, params_with_json_schema, styling)
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["value", "row_condition", "condition_parser"],
        )
        template_str = "Must have exactly $value rows."

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(params["row_condition"])
            template_str = (
                conditional_template_str
                + ", then "
                + template_str[0].lower()
                + template_str[1:]
            )
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]
    def _prescriptive_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        language: str = None,
        runtime_configuration: dict = None,
        **kwargs,
    ) -> List[Union[dict, str, RenderedStringTemplateContent,
                    RenderedTableContent, RenderedBulletListContent,
                    RenderedGraphContent, Any, ]]:
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "range",
                "center_point",
            ],
        )

        template_str = (
            "column average must be in fcc projection within $range of $center_point."
        )

        if include_column_name:
            template_str = f"$column {template_str}"

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
Exemple #15
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_list"])

        if params["column_list"] is None:
            template_str = "Must have a list of columns in a specific order, but that order is not specified."

        else:
            template_str = "Must have these columns in this order: "
            for idx in range(len(params["column_list"]) - 1):
                template_str += "$column_list_" + str(idx) + ", "
                params["column_list_" + str(idx)] = params["column_list"][idx]

            last_idx = len(params["column_list"]) - 1
            template_str += "$column_list_" + str(last_idx)
            params["column_list_" +
                   str(last_idx)] = params["column_list"][last_idx]

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "column_index"],
        )

        if params["column_index"] is None:
            if include_column_name:
                template_str = "$column is a required field."
            else:
                template_str = "is a required field."
        else:
            params["column_indexth"] = ordinal(params["column_index"])
            if include_column_name:
                template_str = "$column must be the $column_indexth field."
            else:
                template_str = "must be the $column_indexth field."

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
Exemple #17
0
 def _atomic_prescriptive_template(
     cls,
     configuration=None,
     result=None,
     language=None,
     runtime_configuration=None,
     **kwargs,
 ):
     runtime_configuration = runtime_configuration or {}
     include_column_name = runtime_configuration.get("include_column_name", True)
     include_column_name = (
         include_column_name if include_column_name is not None else True
     )
     styling = runtime_configuration.get("styling")
     params = substitute_none_for_missing(configuration.kwargs, ["value"])
     template_str = "Must have exactly $value columns."
     params_with_json_schema = {
         "value": {"schema": {"type": "number"}, "value": params.get("value")},
     }
     return (template_str, params_with_json_schema, styling)
Exemple #18
0
 def _prescriptive_renderer(cls,
                            configuration=None,
                            result=None,
                            language=None,
                            runtime_configuration=None,
                            **kwargs):
     runtime_configuration = runtime_configuration or {}
     styling = runtime_configuration.get("styling")
     params = substitute_none_for_missing(configuration.kwargs, ["value"])
     template_str = "Must have exactly 4 columns."
     return [
         RenderedStringTemplateContent(
             **{
                 "content_block_type": "string_template",
                 "string_template": {
                     "template": template_str,
                     "params": params,
                     "styling": styling,
                 },
             })
     ]
Exemple #19
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_A", "column_B"])
        if (params["column_A"] is None) or (params["column_B"] is None):
            template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column."
        else:
            template_str = "Values in $column_A and $column_B must be independent."

        params_with_json_schema = {
            "column_A": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column_A")
            },
            "column_B": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column_B")
            },
        }
        return (template_str, params_with_json_schema, styling)
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "regex_list",
                "mostly",
                "match_on",
                "row_condition",
                "condition_parser",
            ],
        )

        if not params.get("regex_list") or len(params.get("regex_list")) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["regex_list"]):
                params["v__" + str(i)] = v
            values_string = " ".join(
                ["$v__" + str(i) for i, v in enumerate(params["regex_list"])]
            )

        if params.get("match_on") == "all":
            template_str = (
                "values must match all of the following regular expressions: "
                + values_string
            )
        else:
            template_str = (
                "values must match any of the following regular expressions: "
                + values_string
            )

        if params["mostly"] is not None:
            params["mostly_pct"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True
            )
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(params["row_condition"])
            template_str = conditional_template_str + ", then " + template_str
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]
Exemple #21
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "set_",
                "mostly",
                "row_condition",
                "condition_parser",
                "set_semantic_name",
            ],
        )

        if not params.get("set_"):
            template_str = "values must match a set but none was specified."
        else:
            if params.get("set_semantic_name"):
                template_str = "values must match the set $set_semantic_name: $set_"
            else:
                template_str = "values must match this set: $set_"
            if params["mostly"] is not None:
                params["mostly_pct"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True
                )
                template_str += ", at least $mostly_pct % of the time."
            else:
                template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(params["row_condition"])
            template_str = conditional_template_str + ", then " + template_str
            params.update(conditional_params)

        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")},
            "mostly_pct": {
                "schema": {"type": "number"},
                "value": params.get("mostly_pct"),
            },
            "set_": {"schema": {"type": "string"}, "value": params.get("set_")},
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
            "set_semantic_name": {
                "schema": {"type": "string"},
                "value": params.get("set_semantic_name"),
            },
        }

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]
Exemple #22
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        at_least_str, at_most_str = handle_strict_min_max(params)

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "may have any number of unique values."
        else:
            if params["mostly"] is not None and params["mostly"] < 1.0:
                params["mostly_pct"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True
                )
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                if params["min_value"] is None:
                    template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time."
                elif params["max_value"] is None:
                    template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time."
                else:
                    template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time."
            else:
                if params["min_value"] is None:
                    template_str = f"must have {at_most_str} $max_value unique values."
                elif params["max_value"] is None:
                    template_str = f"must have {at_least_str} $min_value unique values."
                else:
                    template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                }
            )
        ]
Exemple #23
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "mostly",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "min_value": {
                "schema": {"type": "number"},
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {"type": "number"},
                "value": params.get("max_value"),
            },
            "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")},
            "mostly_pct": {
                "schema": {"type": "string"},
                "value": params.get("mostly_pct"),
            },
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {"type": "boolean"},
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {"type": "boolean"},
                "value": params.get("strict_max"),
            },
        }

        at_least_str, at_most_str = handle_strict_min_max(params)

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "may have any number of unique values."
        else:
            if params["mostly"] is not None and params["mostly"] < 1.0:
                params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True
                )
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                if params["min_value"] is None:
                    template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time."
                elif params["max_value"] is None:
                    template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time."
                else:
                    template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time."
            else:
                if params["min_value"] is None:
                    template_str = f"must have {at_most_str} $max_value unique values."
                elif params["max_value"] is None:
                    template_str = f"must have {at_least_str} $min_value unique values."
                else:
                    template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)
Exemple #24
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "mostly", "row_condition", "condition_parser"],
        )
        # format params
        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")},
            "mostly_pct": {
                "schema": {"type": "string"},
                "value": params.get("mostly_pct"),
            },
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
        }

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True
            )
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            if include_column_name:
                template_str = "$column values must not be null, at least $mostly_pct % of the time."
            else:
                template_str = (
                    "values must not be null, at least $mostly_pct % of the time."
                )
        else:
            if include_column_name:
                template_str = "$column values must never be null."
            else:
                template_str = "values must never be null."

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)
Exemple #25
0
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")

        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column_list",
                "ignore_row_if",
                "row_condition",
                "condition_parser",
                "mostly",
            ],
        )

        if params["mostly"] is not None:
            params["mostly_pct"] = num_to_str(params["mostly"] * 100,
                                              precision=15,
                                              no_scientific=True)
        mostly_str = ("" if params.get("mostly") is None else
                      ", at least $mostly_pct % of the time")

        template_str = f"Values must always be unique across columns{mostly_str}: "
        for idx in range(len(params["column_list"]) - 1):
            template_str += "$column_list_" + str(idx) + ", "
            params["column_list_" + str(idx)] = params["column_list"][idx]

        last_idx = len(params["column_list"]) - 1
        template_str += "$column_list_" + str(last_idx)
        params["column_list_" +
               str(last_idx)] = params["column_list"][last_idx]

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
    def _prescriptive_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "standard deviation may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value."
            elif params["min_value"] is None:
                template_str = f"standard deviation must be {at_most_str} $max_value."
            elif params["max_value"] is None:
                template_str = f"standard deviation must be {at_least_str} $min_value."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = f"{conditional_template_str}, then {template_str}"
            params.update(conditional_params)

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": params,
                        "styling": styling,
                    },
                })
        ]
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "min_value",
                "max_value",
                "row_condition",
                "condition_parser",
                "strict_min",
                "strict_max",
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "min_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("min_value"),
            },
            "max_value": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("max_value"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "strict_min": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_min"),
            },
            "strict_max": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("strict_max"),
            },
        }

        if (params["min_value"] is None) and (params["max_value"] is None):
            template_str = "standard deviation may have any numerical value."
        else:
            at_least_str, at_most_str = handle_strict_min_max(params)

            if params["min_value"] is not None and params[
                    "max_value"] is not None:
                template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value."
            elif params["min_value"] is None:
                template_str = f"standard deviation must be {at_most_str} $max_value."
            elif params["max_value"] is None:
                template_str = f"standard deviation must be {at_least_str} $min_value."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        return (template_str, params_with_json_schema, styling)
Exemple #28
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")

        # NOTE: This expectation is deprecated, please use
        # expect_select_column_values_to_be_unique_within_record instead.

        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column_list",
                "ignore_row_if",
                "row_condition",
                "condition_parser",
                "mostly",
            ],
        )
        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list"),
            },
            "ignore_row_if": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("ignore_row_if"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly"),
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
        }

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True)
            template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: "
        else:
            template_str = f"Values must always be unique across columns: "

        for idx in range(len(params["column_list"]) - 1):
            template_str += f"$column_list_{str(idx)}, "
            params[f"column_list_{str(idx)}"] = params["column_list"][idx]

        last_idx = len(params["column_list"]) - 1
        template_str += f"$column_list_{str(last_idx)}"
        params[f"column_list_{str(last_idx)}"] = params["column_list"][
            last_idx]

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="column_list",
        )
        return (template_str, params_with_json_schema, styling)
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "regex_list",
                "mostly",
                "match_on",
                "row_condition",
                "condition_parser",
            ],
        )
        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "regex_list": {
                "schema": {"type": "array"},
                "value": params.get("regex_list"),
            },
            "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")},
            "match_on": {"schema": {"type": "string"}, "value": params.get("match_on")},
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
        }

        if not params.get("regex_list") or len(params.get("regex_list")) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["regex_list"]):
                params["v__" + str(i)] = v
            values_string = " ".join(
                ["$v__" + str(i) for i, v in enumerate(params["regex_list"])]
            )

        if params.get("match_on") == "all":
            template_str = (
                "values must match all of the following regular expressions: "
                + values_string
            )
        else:
            template_str = (
                "values must match any of the following regular expressions: "
                + values_string
            )

        if params["mostly"] is not None:
            params["mostly_pct"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True
            )
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = conditional_template_str + ", then " + template_str
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="regex_list",
        )

        return (template_str, params_with_json_schema, styling)
Exemple #30
0
    def _prescriptive_renderer(cls,
                               configuration=None,
                               result=None,
                               language=None,
                               runtime_configuration=None,
                               **kwargs):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration["kwargs"],
            ["column", "quantile_ranges", "row_condition", "condition_parser"],
        )
        template_str = "quantiles must be within the following value ranges."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"])
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params.update(conditional_params)

        expectation_string_obj = {
            "content_block_type": "string_template",
            "string_template": {
                "template": template_str,
                "params": params
            },
        }

        quantiles = params["quantile_ranges"]["quantiles"]
        value_ranges = params["quantile_ranges"]["value_ranges"]

        table_header_row = ["Quantile", "Min Value", "Max Value"]
        table_rows = []

        quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"}

        for quantile, value_range in zip(quantiles, value_ranges):
            quantile_string = quantile_strings.get(quantile,
                                                   "{:3.2f}".format(quantile))
            table_rows.append([
                quantile_string,
                str(value_range[0]) if value_range[0] is not None else "Any",
                str(value_range[1]) if value_range[1] is not None else "Any",
            ])

        quantile_range_table = RenderedTableContent(
            **{
                "content_block_type": "table",
                "header_row": table_header_row,
                "table": table_rows,
                "styling": {
                    "body": {
                        "classes": [
                            "table",
                            "table-sm",
                            "table-unbordered",
                            "col-4",
                            "mt-2",
                        ],
                    },
                    "parent": {
                        "styles": {
                            "list-style-type": "none"
                        }
                    },
                },
            })

        return [expectation_string_obj, quantile_range_table]