def find_evaluation_parameter_dependencies(parameter_expression): """Parse a parameter expression to identify dependencies including GE URNs. Args: parameter_expression: the parameter to parse Returns: a dictionary including: - "urns": set of strings that are valid GE URN objects - "other": set of non-GE URN strings that are required to evaluate the parameter expression """ expr = EvaluationParameterParser() dependencies = {"urns": set(), "other": set()} # Calling get_parser clears the stack parser = expr.get_parser() try: _ = parser.parseString(parameter_expression, parseAll=True) except ParseException as err: raise EvaluationParameterError( f"Unable to parse evaluation parameter: {str(err)} at line {err.line}, column {err.column}" ) except AttributeError as err: raise EvaluationParameterError( f"Unable to parse evaluation parameter: {str(err)}" ) for word in expr.exprStack: if isinstance(word, (int, float)): continue if not isinstance(word, str): # If we have a function that itself is a tuple (e.g. (trunc, 1)) continue if word in expr.opn or word in expr.fn or word == "unary -": # operations and functions continue # if this is parseable as a number, then we do not include it try: _ = float(word) continue except ValueError: pass try: _ = ge_urn.parseString(word) dependencies["urns"].add(word) continue except ParseException: # This particular evaluation_parameter or operator is not a valid URN pass # If we got this far, it's a legitimate "other" evaluation parameter dependencies["other"].add(word) return dependencies
def test_ge_stores_urn(): urn = "urn:great_expectations:stores:my_store:mymetric:kw=param" res = ge_urn.parseString(urn) assert res["urn_type"] == "stores" assert res["store_name"] == "my_store" assert res["metric_name"] == "mymetric" kwargs_dict = parse_qs(res["metric_kwargs"]) assert kwargs_dict == { "kw": ["param"], } # No kwargs is ok urn = "urn:great_expectations:stores:my_store:mymetric" res = ge_urn.parseString(urn) assert res["urn_type"] == "stores" assert res["store_name"] == "my_store" assert res["metric_name"] == "mymetric" assert "metric_kwargs" not in res
def test_ge_metrics_urn(): urn = "urn:great_expectations:metrics:20200403T1234.324Z:my_suite:expect_something.observed_value:column=mycol" res = ge_urn.parseString(urn) assert res["urn_type"] == "metrics" assert res["run_id"] == "20200403T1234.324Z" assert res["expectation_suite_name"] == "my_suite" assert res["metric_name"] == "expect_something.observed_value" kwargs_dict = parse_qs(res["metric_kwargs"]) assert kwargs_dict == {"column": ["mycol"]} # No kwargs is ok urn = "urn:great_expectations:metrics:20200403T1234.324Z:my_suite:expect_something.observed_value" res = ge_urn.parseString(urn) assert res["urn_type"] == "metrics" assert res["run_id"] == "20200403T1234.324Z" assert res["expectation_suite_name"] == "my_suite" assert res["metric_name"] == "expect_something.observed_value" assert "kwargs_dict" not in res
def test_ge_validations_urn(): # We should be able to parse validations urns urn = ( "urn:great_expectations:validations:my_suite:expect_something.observed_value:query=s%20tring&query=" "string3&query2=string2") res = ge_urn.parseString(urn) assert res["urn_type"] == "validations" assert res["expectation_suite_name"] == "my_suite" assert res["metric_name"] == "expect_something.observed_value" kwargs_dict = parse_qs(res["metric_kwargs"]) assert kwargs_dict == { "query": ["s tring", "string3"], "query2": ["string2"] } # no kwargs is ok urn = "urn:great_expectations:validations:my_suite:expect_something.observed_value" res = ge_urn.parseString(urn) assert res["urn_type"] == "validations" assert res["expectation_suite_name"] == "my_suite" assert res["metric_name"] == "expect_something.observed_value" assert "metric_kwargs" not in res
def get_evaluation_parameter_dependencies(self): parsed_dependencies = dict() for key, value in self.kwargs.items(): if isinstance(value, dict) and "$PARAMETER" in value: param_string_dependencies = find_evaluation_parameter_dependencies( value["$PARAMETER"] ) nested_update(parsed_dependencies, param_string_dependencies) dependencies = dict() urns = parsed_dependencies.get("urns", []) for string_urn in urns: try: urn = ge_urn.parseString(string_urn) except ParserError: logger.warning( "Unable to parse great_expectations urn {}".format( value["$PARAMETER"] ) ) continue if not urn.get("metric_kwargs"): nested_update( dependencies, {urn["expectation_suite_name"]: [urn["metric_name"]]}, ) else: nested_update( dependencies, { urn["expectation_suite_name"]: [ { "metric_kwargs_id": { urn["metric_kwargs"]: [urn["metric_name"]] } } ] }, ) dependencies = _deduplicate_evaluation_parameter_dependencies(dependencies) return dependencies
def test_invalid_urn(): # Must start with "urn:great_expectations" with pytest.raises(ParseException) as e: ge_urn.parseString("not_a_ge_urn") assert "not_a_ge_urn" in e.value.line # Must have one of the recognized types with pytest.raises(ParseException) as e: ge_urn.parseString("urn:great_expectations:foo:bar:baz:bin:barg") assert "urn:great_expectations:foo:bar:baz:bin:barg" in e.value.line # Cannot have too many parts with pytest.raises(ParseException) as e: ge_urn.parseString( "urn:great_expectations:validations:foo:bar:baz:bin:barg:boo") assert "urn:great_expectations:validations:foo:bar:baz:bin:barg:boo" in e.value.line
def parse_evaluation_parameter(parameter_expression, evaluation_parameters=None, data_context=None): """Use the provided evaluation_parameters dict to parse a given parameter expression. Args: parameter_expression (str): A string, potentially containing basic arithmetic operations and functions, and variables to be substituted evaluation_parameters (dict): A dictionary of name-value pairs consisting of values to substitute data_context (DataContext): A data context to use to obtain metrics, if necessary The parser will allow arithmetic operations +, -, /, *, as well as basic functions, including trunc() and round() to obtain integer values when needed for certain expectations (e.g. expect_column_value_length_to_be_between). Valid variables must begin with an alphabetic character and may contain alphanumeric characters plus '_' and '$', EXCEPT if they begin with the string "urn:great_expectations" in which case they may also include additional characters to support inclusion of GE URLs (see :ref:`evaluation_parameters` for more information). """ if evaluation_parameters is None: evaluation_parameters = {} # Calling get_parser clears the stack parser = expr.get_parser() try: L = parser.parseString(parameter_expression, parseAll=True) except ParseException as err: L = [ "Parse Failure", parameter_expression, (str(err), err.line, err.column) ] if len(L) == 1 and L[0] not in evaluation_parameters: # In this special case there were no operations to find, so only one value, but we don't have something to # substitute for that value try: res = ge_urn.parseString(L[0]) if res["urn_type"] == "stores": store = data_context.stores.get(res["store_name"]) return store.get_query_result(res["metric_name"], res.get("metric_kwargs", {})) else: logger.error( "Unrecognized urn_type in ge_urn: must be 'stores' to use a metric store." ) raise EvaluationParameterError( "No value found for $PARAMETER " + str(L[0])) except ParseException: raise EvaluationParameterError("No value found for $PARAMETER " + str(L[0])) except AttributeError: logger.warning( "Unable to get store for store-type valuation parameter.") raise EvaluationParameterError("No value found for $PARAMETER " + str(L[0])) elif len(L) == 1: # In this case, we *do* have a substitution for a single type. We treat this specially because in this # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing) return evaluation_parameters[L[0]] elif len(L) == 0 or L[0] != "Parse Failure": for i, ob in enumerate(expr.exprStack): if isinstance(ob, str) and ob in evaluation_parameters: expr.exprStack[i] = str(evaluation_parameters[ob]) else: err_str, err_line, err_col = L[-1] raise EvaluationParameterError( f"Parse Failure: {err_str}\nStatement: {err_line}\nColumn: {err_col}" ) try: result = expr.evaluate_stack(expr.exprStack) except Exception as e: exception_traceback = traceback.format_exc() exception_message = ( f'{type(e).__name__}: "{str(e)}". Traceback: "{exception_traceback}".' ) logger.debug(exception_message, e, exc_info=True) raise EvaluationParameterError( "Error while evaluating evaluation parameter expression: " + str(e)) return result
def parse_evaluation_parameter( parameter_expression: str, evaluation_parameters: Optional[Dict[str, Any]] = None, data_context: Optional[ Any] = None, # Cannot type 'DataContext' due to import cycle ) -> Any: """Use the provided evaluation_parameters dict to parse a given parameter expression. Args: parameter_expression (str): A string, potentially containing basic arithmetic operations and functions, and variables to be substituted evaluation_parameters (dict): A dictionary of name-value pairs consisting of values to substitute data_context (DataContext): A data context to use to obtain metrics, if necessary The parser will allow arithmetic operations +, -, /, *, as well as basic functions, including trunc() and round() to obtain integer values when needed for certain expectations (e.g. expect_column_value_length_to_be_between). Valid variables must begin with an alphabetic character and may contain alphanumeric characters plus '_' and '$', EXCEPT if they begin with the string "urn:great_expectations" in which case they may also include additional characters to support inclusion of GE URLs (see :ref:`evaluation_parameters` for more information). """ if evaluation_parameters is None: evaluation_parameters = {} # Calling get_parser clears the stack parser = expr.get_parser() try: L = parser.parseString(parameter_expression, parseAll=True) except ParseException as err: L = [ "Parse Failure", parameter_expression, (str(err), err.line, err.column) ] # Represents a valid parser result of a single function that has no arguments if len(L) == 1 and isinstance(L[0], tuple) and L[0][2] is False: # Necessary to catch `now()` (which only needs to be evaluated with `expr.exprStack`) # NOTE: 20211122 - Chetan - Any future built-ins that are zero arity functions will match this behavior pass elif len(L) == 1 and L[0] not in evaluation_parameters: # In this special case there were no operations to find, so only one value, but we don't have something to # substitute for that value try: res = ge_urn.parseString(L[0]) if res["urn_type"] == "stores": store = data_context.stores.get(res["store_name"]) return store.get_query_result(res["metric_name"], res.get("metric_kwargs", {})) else: logger.error( "Unrecognized urn_type in ge_urn: must be 'stores' to use a metric store." ) raise EvaluationParameterError( f"No value found for $PARAMETER {str(L[0])}") except ParseException as e: logger.debug( f"Parse exception while parsing evaluation parameter: {str(e)}" ) raise EvaluationParameterError( f"No value found for $PARAMETER {str(L[0])}") except AttributeError: logger.warning( "Unable to get store for store-type valuation parameter.") raise EvaluationParameterError( f"No value found for $PARAMETER {str(L[0])}") elif len(L) == 1: # In this case, we *do* have a substitution for a single type. We treat this specially because in this # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing) # NOTE: 20201023 - JPC - to support MetricDefinition as an evaluation parameter type, we need to handle that # case here; is the evaluation parameter provided here in fact a metric definition? return evaluation_parameters[L[0]] elif len(L) == 0 or L[0] != "Parse Failure": # we have a stack to evaluate and there was no parse failure. # iterate through values and look for URNs pointing to a store: for i, ob in enumerate(expr.exprStack): if isinstance(ob, str) and ob in evaluation_parameters: expr.exprStack[i] = str(evaluation_parameters[ob]) elif isinstance(ob, str) and ob not in evaluation_parameters: # try to retrieve this value from a store try: res = ge_urn.parseString(ob) if res["urn_type"] == "stores": store = data_context.stores.get(res["store_name"]) expr.exprStack[i] = str( store.get_query_result( res["metric_name"], res.get("metric_kwargs", {})) ) # value placed back in stack must be a string else: # handle other urn_types here, but note that validations URNs are being resolved elsewhere. pass # graceful error handling for cases where the value in the stack isn't a URN: except ParseException: pass except AttributeError: pass else: err_str, err_line, err_col = L[-1] raise EvaluationParameterError( f"Parse Failure: {err_str}\nStatement: {err_line}\nColumn: {err_col}" ) try: result = expr.evaluate_stack(expr.exprStack) result = convert_to_json_serializable(result) except Exception as e: exception_traceback = traceback.format_exc() exception_message = ( f'{type(e).__name__}: "{str(e)}". Traceback: "{exception_traceback}".' ) logger.debug(exception_message, e, exc_info=True) raise EvaluationParameterError( f"Error while evaluating evaluation parameter expression: {str(e)}" ) return result