def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing(configuration.kwargs, ["column_list"]) if params["column_list"] is None: template_str = "Must have a list of columns in a specific order, but that order is not specified." else: template_str = "Must have these columns in this order: " for idx in range(len(params["column_list"]) - 1): template_str += "$column_list_" + str(idx) + ", " params["column_list_" + str(idx)] = params["column_list"][idx] last_idx = len(params["column_list"]) - 1 template_str += "$column_list_" + str(last_idx) params["column_list_" + str(last_idx)] = params["column_list"][last_idx] params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list"), }, } params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="column_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "regex_list", "mostly", "match_on", "row_condition", "condition_parser", ], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "regex_list": { "schema": {"type": "array"}, "value": params.get("regex_list"), }, "mostly": {"schema": {"type": "number"}, "value": params.get("mostly")}, "match_on": {"schema": {"type": "string"}, "value": params.get("match_on")}, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, } if not params.get("regex_list") or len(params.get("regex_list")) == 0: values_string = "[ ]" else: for i, v in enumerate(params["regex_list"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["regex_list"])] ) if params.get("match_on") == "all": template_str = ( "values must match all of the following regular expressions: " + values_string ) else: template_str = ( "values must match any of the following regular expressions: " + values_string ) if params["mostly"] is not None: params["mostly_pct"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True ) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") template_str += ", at least $mostly_pct % of the time." else: template_str += "." if include_column_name: template_str = "$column " + template_str if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="regex_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") # NOTE: This expectation is deprecated, please use # expect_select_column_values_to_be_unique_within_record instead. params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list"), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly"), }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, } if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: " else: template_str = f"Values must always be unique across columns: " for idx in range(len(params["column_list"]) - 1): template_str += f"$column_list_{str(idx)}, " params[f"column_list_{str(idx)}"] = params["column_list"][idx] last_idx = len(params["column_list"]) - 1 template_str += f"$column_list_{str(last_idx)}" params[f"column_list_{str(last_idx)}"] = params["column_list"][ last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="column_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column_list", "ignore_row_if", "row_condition", "condition_parser", "mostly", ], ) params_with_json_schema = { "column_list": { "schema": { "type": "array" }, "value": params.get("column_list", []), }, "ignore_row_if": { "schema": { "type": "string" }, "value": params.get("ignore_row_if"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition", ""), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser", ""), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly", 1), }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, } if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) template_str = f"Values must be unique across columns, at least $mostly_pct % of the time: " else: template_str = f"Values must always be unique across columns: " column_list = params.get("column_list") if params.get( "column_list") else [] if len(column_list) > 0: for idx, val in enumerate(column_list[:-1]): param = f"$column_list_{idx}" template_str += f"{param}, " params[param] = val last_idx = len(column_list) - 1 last_param = f"$column_list_{last_idx}" template_str += last_param params[last_param] = column_list[last_idx] if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = (conditional_template_str + ", then " + template_str[0].lower() + template_str[1:]) params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="column_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get("include_column_name", True) include_column_name = ( include_column_name if include_column_name is not None else True ) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, ["column", "value_set", "row_condition", "condition_parser"], ) params_with_json_schema = { "column": {"schema": {"type": "string"}, "value": params.get("column")}, "value_set": { "schema": {"type": "array"}, "value": params.get("value_set"), }, "row_condition": { "schema": {"type": "string"}, "value": params.get("row_condition"), }, "condition_parser": { "schema": {"type": "string"}, "value": params.get("condition_parser"), }, } if params["value_set"] is None or len(params["value_set"]) == 0: if include_column_name: template_str = "$column distinct values must belong to this set: [ ]" else: template_str = "distinct values must belong to a set, but that set is not specified." else: for i, v in enumerate(params["value_set"]): params["v__" + str(i)] = v values_string = " ".join( ["$v__" + str(i) for i, v in enumerate(params["value_set"])] ) if include_column_name: template_str = ( "$column distinct values must belong to this set: " + values_string + "." ) else: template_str = ( "distinct values must belong to this set: " + values_string + "." ) if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True ) template_str = conditional_template_str + ", then " + template_str params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="value_set", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "type_list", "mostly", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "type_list": { "schema": { "type": "array" }, "value": params.get("type_list"), }, "mostly": { "schema": { "type": "number" }, "value": params.get("mostly") }, "mostly_pct": { "schema": { "type": "string" }, "value": params.get("mostly_pct"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if params["type_list"] is not None: for i, v in enumerate(params["type_list"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["type_list"])]) if params["mostly"] is not None and params["mostly"] < 1.0: params_with_json_schema["mostly_pct"]["value"] = num_to_str( params["mostly"] * 100, precision=15, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if include_column_name: template_str = ( "$column value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time.") else: template_str = ("value types must belong to this set: " + values_string + ", at least $mostly_pct % of the time.") else: if include_column_name: template_str = ( f"$column value types must belong to this set: {values_string}." ) else: template_str = ( f"value types must belong to this set: {values_string}." ) else: if include_column_name: template_str = "$column value types may be any value, but observed value will be reported" else: template_str = ( "value types may be any value, but observed value will be reported" ) if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="type_list", ) return (template_str, params_with_json_schema, styling)
def _atomic_prescriptive_template( cls, configuration=None, result=None, language=None, runtime_configuration=None, **kwargs, ): runtime_configuration = runtime_configuration or {} include_column_name = runtime_configuration.get( "include_column_name", True) include_column_name = (include_column_name if include_column_name is not None else True) styling = runtime_configuration.get("styling") params = substitute_none_for_missing( configuration.kwargs, [ "column", "value_set", "ties_okay", "row_condition", "condition_parser" ], ) params_with_json_schema = { "column": { "schema": { "type": "string" }, "value": params.get("column") }, "value_set": { "schema": { "type": "array" }, "value": params.get("value_set"), }, "ties_okay": { "schema": { "type": "boolean" }, "value": params.get("ties_okay"), }, "row_condition": { "schema": { "type": "string" }, "value": params.get("row_condition"), }, "condition_parser": { "schema": { "type": "string" }, "value": params.get("condition_parser"), }, } if params["value_set"] is None or len(params["value_set"]) == 0: values_string = "[ ]" else: for i, v in enumerate(params["value_set"]): params[f"v__{str(i)}"] = v values_string = " ".join( [f"$v__{str(i)}" for i, v in enumerate(params["value_set"])]) template_str = f"most common value must belong to this set: {values_string}." if params.get("ties_okay"): template_str += " Values outside this set that are as common (but not more common) are allowed." if include_column_name: template_str = f"$column {template_str}" if params["row_condition"] is not None: ( conditional_template_str, conditional_params, ) = parse_row_condition_string_pandas_engine( params["row_condition"], with_schema=True) template_str = f"{conditional_template_str}, then {template_str}" params_with_json_schema.update(conditional_params) params_with_json_schema = add_values_with_json_schema_from_list_in_params( params=params, params_with_json_schema=params_with_json_schema, param_key_with_list="value_set", ) return (template_str, params_with_json_schema, styling)