コード例 #1
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(configuration.kwargs,
                                             ["column_list"])

        if params["column_list"] is None:
            template_str = "Must have a list of columns in a specific order, but that order is not specified."

        else:
            template_str = "Must have these columns in this order: "
            for idx in range(len(params["column_list"]) - 1):
                template_str += "$column_list_" + str(idx) + ", "
                params["column_list_" + str(idx)] = params["column_list"][idx]

            last_idx = len(params["column_list"]) - 1
            template_str += "$column_list_" + str(last_idx)
            params["column_list_" +
                   str(last_idx)] = params["column_list"][last_idx]

        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list"),
            },
        }
        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="column_list",
        )
        return (template_str, params_with_json_schema, styling)
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column",
                "regex_list",
                "mostly",
                "match_on",
                "row_condition",
                "condition_parser",
            ],
        )
        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "regex_list": {
                "schema": {"type": "array"},
                "value": params.get("regex_list"),
            },
            "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")},
            "match_on": {"schema": {"type": "string"}, "value": params.get("match_on")},
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
        }

        if not params.get("regex_list") or len(params.get("regex_list")) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["regex_list"]):
                params["v__" + str(i)] = v
            values_string = " ".join(
                ["$v__" + str(i) for i, v in enumerate(params["regex_list"])]
            )

        if params.get("match_on") == "all":
            template_str = (
                "values must match all of the following regular expressions: "
                + values_string
            )
        else:
            template_str = (
                "values must match any of the following regular expressions: "
                + values_string
            )

        if params["mostly"] is not None:
            params["mostly_pct"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True
            )
            # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
            template_str += ", at least $mostly_pct % of the time."
        else:
            template_str += "."

        if include_column_name:
            template_str = "$column " + template_str

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = conditional_template_str + ", then " + template_str
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="regex_list",
        )

        return (template_str, params_with_json_schema, styling)
コード例 #3
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")

        # NOTE: This expectation is deprecated, please use
        # expect_select_column_values_to_be_unique_within_record instead.

        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column_list",
                "ignore_row_if",
                "row_condition",
                "condition_parser",
                "mostly",
            ],
        )
        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list"),
            },
            "ignore_row_if": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("ignore_row_if"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly"),
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
        }

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True)
            template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: "
        else:
            template_str = f"Values must always be unique across columns: "

        for idx in range(len(params["column_list"]) - 1):
            template_str += f"$column_list_{str(idx)}, "
            params[f"column_list_{str(idx)}"] = params["column_list"][idx]

        last_idx = len(params["column_list"]) - 1
        template_str += f"$column_list_{str(last_idx)}"
        params[f"column_list_{str(last_idx)}"] = params["column_list"][
            last_idx]

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="column_list",
        )
        return (template_str, params_with_json_schema, styling)
コード例 #4
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")

        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column_list",
                "ignore_row_if",
                "row_condition",
                "condition_parser",
                "mostly",
            ],
        )
        params_with_json_schema = {
            "column_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("column_list", []),
            },
            "ignore_row_if": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("ignore_row_if"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition", ""),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser", ""),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly", 1),
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
        }

        if params["mostly"] is not None and params["mostly"] < 1.0:
            params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                params["mostly"] * 100, precision=15, no_scientific=True)
            template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: "
        else:
            template_str = f"Values must always be unique across columns: "

        column_list = params.get("column_list") if params.get(
            "column_list") else []
        if len(column_list) > 0:
            for idx, val in enumerate(column_list[:-1]):
                param = f"$column_list_{idx}"
                template_str += f"{param}, "
                params[param] = val

            last_idx = len(column_list) - 1
            last_param = f"$column_list_{last_idx}"
            template_str += last_param
            params[last_param] = column_list[last_idx]

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = (conditional_template_str + ", then " +
                            template_str[0].lower() + template_str[1:])
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="column_list",
        )
        return (template_str, params_with_json_schema, styling)
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get("include_column_name", True)
        include_column_name = (
            include_column_name if include_column_name is not None else True
        )
        styling = runtime_configuration.get("styling")

        params = substitute_none_for_missing(
            configuration.kwargs,
            ["column", "value_set", "row_condition", "condition_parser"],
        )
        params_with_json_schema = {
            "column": {"schema": {"type": "string"}, "value": params.get("column")},
            "value_set": {
                "schema": {"type": "array"},
                "value": params.get("value_set"),
            },
            "row_condition": {
                "schema": {"type": "string"},
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {"type": "string"},
                "value": params.get("condition_parser"),
            },
        }

        if params["value_set"] is None or len(params["value_set"]) == 0:

            if include_column_name:
                template_str = "$column distinct values must belong to this set: [ ]"
            else:
                template_str = "distinct values must belong to a set, but that set is not specified."

        else:

            for i, v in enumerate(params["value_set"]):
                params["v__" + str(i)] = v
            values_string = " ".join(
                ["$v__" + str(i) for i, v in enumerate(params["value_set"])]
            )

            if include_column_name:
                template_str = (
                    "$column distinct values must belong to this set: "
                    + values_string
                    + "."
                )
            else:
                template_str = (
                    "distinct values must belong to this set: " + values_string + "."
                )

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True
            )
            template_str = conditional_template_str + ", then " + template_str
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="value_set",
        )

        return (template_str, params_with_json_schema, styling)
コード例 #6
0
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column", "type_list", "mostly", "row_condition",
                "condition_parser"
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "type_list": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("type_list"),
            },
            "mostly": {
                "schema": {
                    "type": "number"
                },
                "value": params.get("mostly")
            },
            "mostly_pct": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("mostly_pct"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
        }

        if params["type_list"] is not None:
            for i, v in enumerate(params["type_list"]):
                params[f"v__{str(i)}"] = v
            values_string = " ".join(
                [f"$v__{str(i)}" for i, v in enumerate(params["type_list"])])

            if params["mostly"] is not None and params["mostly"] < 1.0:
                params_with_json_schema["mostly_pct"]["value"] = num_to_str(
                    params["mostly"] * 100, precision=15, no_scientific=True)
                # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".")
                if include_column_name:
                    template_str = (
                        "$column value types must belong to this set: " +
                        values_string +
                        ", at least $mostly_pct % of the time.")
                else:
                    template_str = ("value types must belong to this set: " +
                                    values_string +
                                    ", at least $mostly_pct % of the time.")
            else:
                if include_column_name:
                    template_str = (
                        f"$column value types must belong to this set: {values_string}."
                    )
                else:
                    template_str = (
                        f"value types must belong to this set: {values_string}."
                    )
        else:
            if include_column_name:
                template_str = "$column value types may be any value, but observed value will be reported"
            else:
                template_str = (
                    "value types may be any value, but observed value will be reported"
                )

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="type_list",
        )
        return (template_str, params_with_json_schema, styling)
    def _atomic_prescriptive_template(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
    ):
        runtime_configuration = runtime_configuration or {}
        include_column_name = runtime_configuration.get(
            "include_column_name", True)
        include_column_name = (include_column_name
                               if include_column_name is not None else True)
        styling = runtime_configuration.get("styling")
        params = substitute_none_for_missing(
            configuration.kwargs,
            [
                "column", "value_set", "ties_okay", "row_condition",
                "condition_parser"
            ],
        )
        params_with_json_schema = {
            "column": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("column")
            },
            "value_set": {
                "schema": {
                    "type": "array"
                },
                "value": params.get("value_set"),
            },
            "ties_okay": {
                "schema": {
                    "type": "boolean"
                },
                "value": params.get("ties_okay"),
            },
            "row_condition": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("row_condition"),
            },
            "condition_parser": {
                "schema": {
                    "type": "string"
                },
                "value": params.get("condition_parser"),
            },
        }

        if params["value_set"] is None or len(params["value_set"]) == 0:
            values_string = "[ ]"
        else:
            for i, v in enumerate(params["value_set"]):
                params[f"v__{str(i)}"] = v

            values_string = " ".join(
                [f"$v__{str(i)}" for i, v in enumerate(params["value_set"])])

        template_str = f"most common value must belong to this set: {values_string}."

        if params.get("ties_okay"):
            template_str += " Values outside this set that are as common (but not more common) are allowed."

        if include_column_name:
            template_str = f"$column {template_str}"

        if params["row_condition"] is not None:
            (
                conditional_template_str,
                conditional_params,
            ) = parse_row_condition_string_pandas_engine(
                params["row_condition"], with_schema=True)
            template_str = f"{conditional_template_str}, then {template_str}"
            params_with_json_schema.update(conditional_params)

        params_with_json_schema = add_values_with_json_schema_from_list_in_params(
            params=params,
            params_with_json_schema=params_with_json_schema,
            param_key_with_list="value_set",
        )

        return (template_str, params_with_json_schema, styling)