def test_parse_row_condition_string_pandas_engine(): test_condition_string = "" assert parse_row_condition_string_pandas_engine(test_condition_string) == ( "if $row_condition__0", { "row_condition__0": "True" }, ) test_condition_string = "Age in [0, 42]" assert parse_row_condition_string_pandas_engine(test_condition_string) == ( "if $row_condition__0", { "row_condition__0": "Age in [0, 42]" }, ) test_condition_string = "Survived == 1 and (SexCode not in (0, 7, x) | ~(Age > 50)) & not (PClass != '1st')" assert parse_row_condition_string_pandas_engine(test_condition_string) == ( "if $row_condition__0 and ($row_condition__1 or not ($row_condition__2)) and not ($row_condition__3)", { "row_condition__0": "Survived == 1", "row_condition__1": "SexCode not in [0, 7, x]", "row_condition__2": "Age > 50", "row_condition__3": "PClass != '1st'", }, )
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) template_str = f"Values for given compound columns must be unique together, at least $mostly_pct % of the time: " else: template_str = ( f"Values for given compound columns must be unique together: ") for idx in range(len(params["column_list"]) - 1): template_str += f"$column_list_{str(idx)}, " params[f"column_list_{str(idx)}"] = params["column_list"][idx] last_idx = len(params["column_list"]) - 1 template_str += f"$column_list_{str(last_idx)}" params[f"column_list_{str(last_idx)}"] = params["column_list"][ last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "value_set", "ties_okay", "row_condition", "condition_parser" ], ) if params["value_set"] is None or len(params["value_set"]) == 0: values_string = "[ ]" else: for i, v in enumerate(params["value_set"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["value_set"])]) template_str = ("most common value must belong to this set: " + values_string + ".") if params.get("ties_okay"): template_str += " Values outside this set that are as common (but not more common) are allowed." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "mostly", "row_condition", "condition_parser"], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "mostly_pct": { "schema": {"type": "number"}, "value": params.get("mostly_pct"), }, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, } if params["mostly"] is not None: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str = "values must be null, at least $mostly_pct % of the time." else: template_str = "values must be null." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "partition_object", "threshold", "row_condition", "condition_parser", ], ) expected_distribution = None if not params.get("partition_object"): template_str = "can match any distribution." else: template_str = ( "Kullback-Leibler (KL) divergence with respect to the following distribution must be " "lower than $threshold." ) expected_distribution = cls._get_kl_divergence_chart( params.get("partition_object") ) if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) expectation_string_obj = { "content_block_type": "string_template", "string_template": {"template": template_str, "params": params}, } if expected_distribution: return [expectation_string_obj, expected_distribution] else: return [expectation_string_obj]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "mostly", "row_condition", "condition_parser"], ) if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if include_column_name: template_str = "$column values must not be null, at least $mostly_pct % of the time." else: template_str = ( "values must not be null, at least $mostly_pct % of the time." ) else: if include_column_name: template_str = "$column values must never be null." else: template_str = "values must never be null." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["value", "row_condition", "condition_parser"], ) params_with_json_schema = { "value": { "schema": { "type": "number" }, "value": params.get("value"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } template_str = "Must have exactly $value rows." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["value", "row_condition", "condition_parser"], ) template_str = "Must have exactly $value rows." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = ( conditional_template_str + ", then " + template_str[0].lower() + template_str[1:] ) params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "standard deviation may have any numerical value." else: at_least_str, at_most_str = handle_strict_min_max(params) if params["min_value"] is not None and params[ "max_value"] is not None: template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." elif params["min_value"] is None: template_str = f"standard deviation must be {at_most_str} $max_value." elif params["max_value"] is None: template_str = f"standard deviation must be {at_least_str} $min_value." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "regex_list", "mostly", "match_on", "row_condition", "condition_parser", ], ) if not params.get("regex_list") or len(params.get("regex_list")) == 0: values_string = "[ ]" else: for i, v in enumerate(params["regex_list"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["regex_list"])] ) if params.get("match_on") == "all": template_str = ( "values must match all of the following regular expressions: " + values_string ) else: template_str = ( "values must match any of the following regular expressions: " + values_string ) if params["mostly"] is not None: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "min_value": { "schema": { "type": "number" }, "value": params.get("min_value"), }, "max_value": { "schema": { "type": "number" }, "value": params.get("max_value"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, "strict_min": { "schema": { "type": "boolean" }, "value": params.get("strict_min"), }, "strict_max": { "schema": { "type": "boolean" }, "value": params.get("strict_max"), }, } if (params["min_value"] is None) and (params["max_value"] is None): template_str = "standard deviation may have any numerical value." else: at_least_str, at_most_str = handle_strict_min_max(params) if params["min_value"] is not None and params[ "max_value"] is not None: template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." elif params["min_value"] is None: template_str = f"standard deviation must be {at_most_str} $max_value." elif params["max_value"] is None: template_str = f"standard deviation must be {at_least_str} $min_value." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list"), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly"), }, "mostly_pct": { "schema": { "type": "number" }, "value": params.get("mostly_pct"), }, } if params["mostly"] is not None: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) mostly_str = ("" if params.get("mostly") is None else ", at least $mostly_pct % of the time") template_str = ( f"Values for given compound columns must be unique together{mostly_str}: " ) column_list = params.get("column_list") if params.get( "column_list") else [] if len(column_list) > 0: for idx, val in enumerate(column_list[:-1]): param = f"$column_list_{idx}" template_str += f"{param}, " params[param] = val last_idx = len(column_list) - 1 last_param = f"$column_list_{last_idx}" template_str += last_param params[last_param] = column_list[last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "regex_list", "mostly", "match_on", "row_condition", "condition_parser", ], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "regex_list": { "schema": {"type": "array"}, "value": params.get("regex_list"), }, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "match_on": {"schema": {"type": "string"}, "value": params.get("match_on")}, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, } if not params.get("regex_list") or len(params.get("regex_list")) == 0: values_string = "[ ]" else: for i, v in enumerate(params["regex_list"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["regex_list"])] ) if params.get("match_on") == "all": template_str = ( "values must match all of the following regular expressions: " + values_string ) else: template_str = ( "values must match any of the following regular expressions: " + values_string ) if params["mostly"] is not None: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="regex_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "mostly", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "min_value": { "schema": {"type": "number"}, "value": params.get("min_value"), }, "max_value": { "schema": {"type": "number"}, "value": params.get("max_value"), }, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "mostly_pct": { "schema": {"type": "string"}, "value": params.get("mostly_pct"), }, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, "strict_min": { "schema": {"type": "boolean"}, "value": params.get("strict_min"), }, "strict_max": { "schema": {"type": "boolean"}, "value": params.get("strict_max"), }, } at_least_str, at_most_str = handle_strict_min_max(params) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "may have any number of unique values." else: if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time." else: if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "set_", "mostly", "row_condition", "condition_parser", "set_semantic_name", ], ) if not params.get("set_"): template_str = "values must match a set but none was specified." else: if params.get("set_semantic_name"): template_str = "values must match the set $set_semantic_name: $set_" else: template_str = "values must match this set: $set_" if params["mostly"] is not None: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "mostly_pct": { "schema": {"type": "number"}, "value": params.get("mostly_pct"), }, "set_": {"schema": {"type": "string"}, "value": params.get("set_")}, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, "set_semantic_name": { "schema": {"type": "string"}, "value": params.get("set_semantic_name"), }, } return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "regex", "mostly", "row_condition", "condition_parser"], ) if not params.get("regex"): template_str = ( "values must match a regular expression but none was specified." ) else: template_str = "values must match this regular expression: $regex" if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "number" }, "value": params.get("mostly_pct"), }, "regex": { "schema": { "type": "string" }, "value": params.get("regex") }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_A", "column_B", "ignore_row_if", "mostly", "row_condition", "condition_parser", ], ) # NOTE: This renderer doesn't do anything with "ignore_row_if" if (params["column_A"] is None) or (params["column_B"] is None): template_str = " unrecognized kwargs for expect_column_pair_values_to_be_equal: missing column." params["row_condition"] = None if params["mostly"] is None: template_str = "Values in $column_A and $column_B must always be equal." else: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "value_set", "ties_okay", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "value_set": { "schema": { "type": "array" }, "value": params.get("value_set"), }, "ties_okay": { "schema": { "type": "boolean" }, "value": params.get("ties_okay"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if params["value_set"] is None or len(params["value_set"]) == 0: values_string = "[ ]" else: for i, v in enumerate(params["value_set"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["value_set"])]) template_str = f"most common value must belong to this set: {values_string}." if params.get("ties_okay"): template_str += " Values outside this set that are as common (but not more common) are allowed." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="value_set", ) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "strictly", "mostly", "parse_strings_as_datetimes", "row_condition", "condition_parser", ], ) if params.get("strictly"): template_str = "values must be strictly less than previous values" else: template_str = "values must be less than or equal to previous values" if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if params.get("parse_strings_as_datetimes"): template_str += " Values should be parsed as datetimes." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list", []), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition", ""), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser", ""), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly", 1), }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, } if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: " else: template_str = f"Values must always be unique across columns: " column_list = params.get("column_list") if params.get( "column_list") else [] if len(column_list) > 0: for idx, val in enumerate(column_list[:-1]): param = f"$column_list_{idx}" template_str += f"{param}, " params[param] = val last_idx = len(column_list) - 1 last_param = f"$column_list_{last_idx}" template_str += last_param params[last_param] = column_list[last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="column_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "value_set", "row_condition", "condition_parser"], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "value_set": { "schema": {"type": "array"}, "value": params.get("value_set"), }, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, } if params["value_set"] is None or len(params["value_set"]) == 0: if include_column_name: template_str = "$column distinct values must belong to this set: [ ]" else: template_str = "distinct values must belong to a set, but that set is not specified." else: for i, v in enumerate(params["value_set"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["value_set"])] ) if include_column_name: template_str = ( "$column distinct values must belong to this set: " + values_string + "." ) else: template_str = ( "distinct values must belong to this set: " + values_string + "." ) if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="value_set", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "min_value", "max_value", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) # format params params_with_json_schema = { "min_value": { "schema": {"type": "number"}, "value": params.get("min_value"), }, "max_value": { "schema": {"type": "number"}, "value": params.get("max_value"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, "strict_min": { "schema": {"type": "boolean"}, "value": params.get("strict_min"), }, "strict_max": { "schema": {"type": "boolean"}, "value": params.get("strict_max"), }, } if params["min_value"] is None and params["max_value"] is None: template_str = "May have any number of rows." else: at_least_str, at_most_str = handle_strict_min_max(params) if params["min_value"] is not None and params["max_value"] is not None: template_str = f"Must have {at_least_str} $min_value and {at_most_str} $max_value rows." elif params["min_value"] is None: template_str = f"Must have {at_most_str} $max_value rows." elif params["max_value"] is None: template_str = f"Must have {at_least_str} $min_value rows." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = ( conditional_template_str + ", then " + template_str[0].lower() + template_str[1:] ) params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, precision=15, no_scientific=True) mostly_str = ("" if params.get("mostly") is None else ", at least $mostly_pct % of the time") template_str = f"Values must always be unique across columns{mostly_str}: " for idx in range(len(params["column_list"]) - 1): template_str += "$column_list_" + str(idx) + ", " params["column_list_" + str(idx)] = params["column_list"][idx] last_idx = len(params["column_list"]) - 1 template_str += "$column_list_" + str(last_idx) params["column_list_" + str(last_idx)] = params["column_list"][last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, }) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_A", "column_B", "ignore_row_if", "mostly", "row_condition", "condition_parser", ], ) params_with_json_schema = { "column_A": { "schema": { "type": "string" }, "value": params.get("column_A") }, "column_B": { "schema": { "type": "string" }, "value": params.get("column_B") }, "parse_strings_as_datetimes": { "schema": { "type": "boolean" }, "value": params.get("parse_strings_as_datetimes"), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "number" }, "value": params.get("mostly_pct"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } # NOTE: This renderer doesn't do anything with "ignore_row_if" if (params["column_A"] is None) or (params["column_B"] is None): template_str = " unrecognized kwargs for expect_column_pair_values_to_be_equal: missing column." params["row_condition"] = None if params["mostly"] is None: template_str = "Values in $column_A and $column_B must always be equal." else: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "strictly", "mostly", "parse_strings_as_datetimes", "row_condition", "condition_parser", ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "strictly": { "schema": { "type": "boolean" }, "value": params.get("strictly"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, "parse_strings_as_datetimes": { "schema": { "type": "boolean" }, "value": params.get("parse_strings_as_datetimes"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if params.get("strictly"): template_str = "values must be strictly greater than previous values" else: template_str = "values must be greater than or equal to previous values" if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if params.get("parse_strings_as_datetimes"): template_str += " Values should be parsed as datetimes." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") # NOTE: This expectation is deprecated, please use # expect_select_column_values_to_be_unique_within_record instead. params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list"), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly"), }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, } if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: " else: template_str = f"Values must always be unique across columns: " for idx in range(len(params["column_list"]) - 1): template_str += f"$column_list_{str(idx)}, " params[f"column_list_{str(idx)}"] = params["column_list"][idx] last_idx = len(params["column_list"]) - 1 template_str += f"$column_list_{str(last_idx)}" params[f"column_list_{str(last_idx)}"] = params["column_list"][ last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="column_list", ) return (template_str, params_with_json_schema, styling)
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "min_value", "max_value", "mostly", "row_condition", "condition_parser", "strict_min", "strict_max", ], ) at_least_str, at_most_str = handle_strict_min_max(params) if (params["min_value"] is None) and (params["max_value"] is None): template_str = "may have any number of unique values." else: if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values, at least $mostly_pct % of the time." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values, at least $mostly_pct % of the time." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values, at least $mostly_pct % of the time." else: if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values." else: template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = f"{conditional_template_str}, then {template_str}" params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _prescriptive_renderer(cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration["kwargs"], ["column", "quantile_ranges", "row_condition", "condition_parser"], ) template_str = "quantiles must be within the following value ranges." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"]) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params.update(conditional_params) expectation_string_obj = { "content_block_type": "string_template", "string_template": { "template": template_str, "params": params }, } quantiles = params["quantile_ranges"]["quantiles"] value_ranges = params["quantile_ranges"]["value_ranges"] table_header_row = ["Quantile", "Min Value", "Max Value"] table_rows = [] quantile_strings = {0.25: "Q1", 0.75: "Q3", 0.50: "Median"} for quantile, value_range in zip(quantiles, value_ranges): quantile_string = quantile_strings.get(quantile, "{:3.2f}".format(quantile)) table_rows.append([ quantile_string, str(value_range[0]) if value_range[0] is not None else "Any", str(value_range[1]) if value_range[1] is not None else "Any", ]) quantile_range_table = RenderedTableContent( **{ "content_block_type": "table", "header_row": table_header_row, "table": table_rows, "styling": { "body": { "classes": [ "table", "table-sm", "table-unbordered", "col-4", "mt-2", ], }, "parent": { "styles": { "list-style-type": "none" } }, }, }) return [expectation_string_obj, quantile_range_table]
def _prescriptive_renderer( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "type_list", "mostly", "row_condition", "condition_parser"], ) if params["type_list"] is not None: for i, v in enumerate(params["type_list"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["type_list"])] ) if params["mostly"] is not None: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if include_column_name: template_str = ( "$column value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time." ) else: template_str = ( "value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time." ) else: if include_column_name: template_str = ( "$column value types must belong to this set: " + values_string + "." ) else: template_str = ( "value types must belong to this set: " + values_string + "." ) else: if include_column_name: template_str = "$column value types may be any value, but observed value will be reported" else: template_str = ( "value types may be any value, but observed value will be reported" ) if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine(params["row_condition"]) template_str = conditional_template_str + ", then " + template_str params.update(conditional_params) return [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": template_str, "params": params, "styling": styling, }, } ) ]
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "type_list", "mostly", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "type_list": { "schema": { "type": "array" }, "value": params.get("type_list"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if params["type_list"] is not None: for i, v in enumerate(params["type_list"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["type_list"])]) if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if include_column_name: template_str = ( "$column value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time.") else: template_str = ("value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time.") else: if include_column_name: template_str = ( f"$column value types must belong to this set: {values_string}." ) else: template_str = ( f"value types must belong to this set: {values_string}." ) else: if include_column_name: template_str = "$column value types may be any value, but observed value will be reported" else: template_str = ( "value types may be any value, but observed value will be reported" ) if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="type_list", ) return (template_str, params_with_json_schema, styling)