def find_evaluation_parameter_dependencies(parameter_expression):
    """Parse a parameter expression to identify dependencies including GE URNs.

    Args:
        parameter_expression: the parameter to parse

    Returns:
        a dictionary including:
          - "urns": set of strings that are valid GE URN objects
          - "other": set of non-GE URN strings that are required to evaluate the parameter expression

    """
    expr = EvaluationParameterParser()

    dependencies = {"urns": set(), "other": set()}
    # Calling get_parser clears the stack
    parser = expr.get_parser()
    try:
        _ = parser.parseString(parameter_expression, parseAll=True)
    except ParseException as err:
        raise EvaluationParameterError(
            f"Unable to parse evaluation parameter: {str(err)} at line {err.line}, column {err.column}"
        )
    except AttributeError as err:
        raise EvaluationParameterError(
            f"Unable to parse evaluation parameter: {str(err)}"
        )

    for word in expr.exprStack:
        if isinstance(word, (int, float)):
            continue

        if not isinstance(word, str):
            # If we have a function that itself is a tuple (e.g. (trunc, 1))
            continue

        if word in expr.opn or word in expr.fn or word == "unary -":
            # operations and functions
            continue

        # if this is parseable as a number, then we do not include it
        try:
            _ = float(word)
            continue
        except ValueError:
            pass

        try:
            _ = ge_urn.parseString(word)
            dependencies["urns"].add(word)
            continue
        except ParseException:
            # This particular evaluation_parameter or operator is not a valid URN
            pass

        # If we got this far, it's a legitimate "other" evaluation parameter
        dependencies["other"].add(word)

    return dependencies
Esempio n. 2
0
def test_ge_stores_urn():
    urn = "urn:great_expectations:stores:my_store:mymetric:kw=param"
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "stores"
    assert res["store_name"] == "my_store"
    assert res["metric_name"] == "mymetric"
    kwargs_dict = parse_qs(res["metric_kwargs"])
    assert kwargs_dict == {
        "kw": ["param"],
    }

    # No kwargs is ok
    urn = "urn:great_expectations:stores:my_store:mymetric"
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "stores"
    assert res["store_name"] == "my_store"
    assert res["metric_name"] == "mymetric"
    assert "metric_kwargs" not in res
Esempio n. 3
0
def test_ge_metrics_urn():
    urn = "urn:great_expectations:metrics:20200403T1234.324Z:my_suite:expect_something.observed_value:column=mycol"
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "metrics"
    assert res["run_id"] == "20200403T1234.324Z"
    assert res["expectation_suite_name"] == "my_suite"
    assert res["metric_name"] == "expect_something.observed_value"
    kwargs_dict = parse_qs(res["metric_kwargs"])
    assert kwargs_dict == {"column": ["mycol"]}

    # No kwargs is ok
    urn = "urn:great_expectations:metrics:20200403T1234.324Z:my_suite:expect_something.observed_value"
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "metrics"
    assert res["run_id"] == "20200403T1234.324Z"
    assert res["expectation_suite_name"] == "my_suite"
    assert res["metric_name"] == "expect_something.observed_value"
    assert "kwargs_dict" not in res
Esempio n. 4
0
def test_ge_validations_urn():
    # We should be able to parse validations urns
    urn = (
        "urn:great_expectations:validations:my_suite:expect_something.observed_value:query=s%20tring&query="
        "string3&query2=string2")
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "validations"
    assert res["expectation_suite_name"] == "my_suite"
    assert res["metric_name"] == "expect_something.observed_value"
    kwargs_dict = parse_qs(res["metric_kwargs"])
    assert kwargs_dict == {
        "query": ["s tring", "string3"],
        "query2": ["string2"]
    }

    # no kwargs is ok
    urn = "urn:great_expectations:validations:my_suite:expect_something.observed_value"
    res = ge_urn.parseString(urn)

    assert res["urn_type"] == "validations"
    assert res["expectation_suite_name"] == "my_suite"
    assert res["metric_name"] == "expect_something.observed_value"
    assert "metric_kwargs" not in res
Esempio n. 5
0
    def get_evaluation_parameter_dependencies(self):
        parsed_dependencies = dict()
        for key, value in self.kwargs.items():
            if isinstance(value, dict) and "$PARAMETER" in value:
                param_string_dependencies = find_evaluation_parameter_dependencies(
                    value["$PARAMETER"]
                )
                nested_update(parsed_dependencies, param_string_dependencies)

        dependencies = dict()
        urns = parsed_dependencies.get("urns", [])
        for string_urn in urns:
            try:
                urn = ge_urn.parseString(string_urn)
            except ParserError:
                logger.warning(
                    "Unable to parse great_expectations urn {}".format(
                        value["$PARAMETER"]
                    )
                )
                continue

            if not urn.get("metric_kwargs"):
                nested_update(
                    dependencies,
                    {urn["expectation_suite_name"]: [urn["metric_name"]]},
                )
            else:
                nested_update(
                    dependencies,
                    {
                        urn["expectation_suite_name"]: [
                            {
                                "metric_kwargs_id": {
                                    urn["metric_kwargs"]: [urn["metric_name"]]
                                }
                            }
                        ]
                    },
                )

        dependencies = _deduplicate_evaluation_parameter_dependencies(dependencies)
        return dependencies
Esempio n. 6
0
def test_invalid_urn():
    # Must start with "urn:great_expectations"
    with pytest.raises(ParseException) as e:
        ge_urn.parseString("not_a_ge_urn")
    assert "not_a_ge_urn" in e.value.line

    # Must have one of the recognized types
    with pytest.raises(ParseException) as e:
        ge_urn.parseString("urn:great_expectations:foo:bar:baz:bin:barg")
    assert "urn:great_expectations:foo:bar:baz:bin:barg" in e.value.line

    # Cannot have too many parts
    with pytest.raises(ParseException) as e:
        ge_urn.parseString(
            "urn:great_expectations:validations:foo:bar:baz:bin:barg:boo")
    assert "urn:great_expectations:validations:foo:bar:baz:bin:barg:boo" in e.value.line
Esempio n. 7
0
def parse_evaluation_parameter(parameter_expression,
                               evaluation_parameters=None,
                               data_context=None):
    """Use the provided evaluation_parameters dict to parse a given parameter expression.

    Args:
        parameter_expression (str): A string, potentially containing basic arithmetic operations and functions,
            and variables to be substituted
        evaluation_parameters (dict): A dictionary of name-value pairs consisting of values to substitute
        data_context (DataContext): A data context to use to obtain metrics, if necessary

    The parser will allow arithmetic operations +, -, /, *, as well as basic functions, including trunc() and round() to
    obtain integer values when needed for certain expectations (e.g. expect_column_value_length_to_be_between).

    Valid variables must begin with an alphabetic character and may contain alphanumeric characters plus '_' and '$',
    EXCEPT if they begin with the string "urn:great_expectations" in which case they may also include additional
    characters to support inclusion of GE URLs (see :ref:`evaluation_parameters` for more information).
    """
    if evaluation_parameters is None:
        evaluation_parameters = {}

    # Calling get_parser clears the stack
    parser = expr.get_parser()
    try:
        L = parser.parseString(parameter_expression, parseAll=True)
    except ParseException as err:
        L = [
            "Parse Failure", parameter_expression,
            (str(err), err.line, err.column)
        ]

    if len(L) == 1 and L[0] not in evaluation_parameters:
        # In this special case there were no operations to find, so only one value, but we don't have something to
        # substitute for that value
        try:
            res = ge_urn.parseString(L[0])
            if res["urn_type"] == "stores":
                store = data_context.stores.get(res["store_name"])
                return store.get_query_result(res["metric_name"],
                                              res.get("metric_kwargs", {}))
            else:
                logger.error(
                    "Unrecognized urn_type in ge_urn: must be 'stores' to use a metric store."
                )
                raise EvaluationParameterError(
                    "No value found for $PARAMETER " + str(L[0]))
        except ParseException:
            raise EvaluationParameterError("No value found for $PARAMETER " +
                                           str(L[0]))
        except AttributeError:
            logger.warning(
                "Unable to get store for store-type valuation parameter.")
            raise EvaluationParameterError("No value found for $PARAMETER " +
                                           str(L[0]))

    elif len(L) == 1:
        # In this case, we *do* have a substitution for a single type. We treat this specially because in this
        # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing)
        return evaluation_parameters[L[0]]

    elif len(L) == 0 or L[0] != "Parse Failure":
        for i, ob in enumerate(expr.exprStack):
            if isinstance(ob, str) and ob in evaluation_parameters:
                expr.exprStack[i] = str(evaluation_parameters[ob])

    else:
        err_str, err_line, err_col = L[-1]
        raise EvaluationParameterError(
            f"Parse Failure: {err_str}\nStatement: {err_line}\nColumn: {err_col}"
        )

    try:
        result = expr.evaluate_stack(expr.exprStack)
    except Exception as e:
        exception_traceback = traceback.format_exc()
        exception_message = (
            f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
        )
        logger.debug(exception_message, e, exc_info=True)
        raise EvaluationParameterError(
            "Error while evaluating evaluation parameter expression: " +
            str(e))

    return result
def parse_evaluation_parameter(
        parameter_expression: str,
        evaluation_parameters: Optional[Dict[str, Any]] = None,
        data_context: Optional[
            Any] = None,  # Cannot type 'DataContext' due to import cycle
) -> Any:
    """Use the provided evaluation_parameters dict to parse a given parameter expression.

    Args:
        parameter_expression (str): A string, potentially containing basic arithmetic operations and functions,
            and variables to be substituted
        evaluation_parameters (dict): A dictionary of name-value pairs consisting of values to substitute
        data_context (DataContext): A data context to use to obtain metrics, if necessary

    The parser will allow arithmetic operations +, -, /, *, as well as basic functions, including trunc() and round() to
    obtain integer values when needed for certain expectations (e.g. expect_column_value_length_to_be_between).

    Valid variables must begin with an alphabetic character and may contain alphanumeric characters plus '_' and '$',
    EXCEPT if they begin with the string "urn:great_expectations" in which case they may also include additional
    characters to support inclusion of GE URLs (see :ref:`evaluation_parameters` for more information).
    """
    if evaluation_parameters is None:
        evaluation_parameters = {}

    # Calling get_parser clears the stack
    parser = expr.get_parser()
    try:
        L = parser.parseString(parameter_expression, parseAll=True)
    except ParseException as err:
        L = [
            "Parse Failure", parameter_expression,
            (str(err), err.line, err.column)
        ]

    # Represents a valid parser result of a single function that has no arguments
    if len(L) == 1 and isinstance(L[0], tuple) and L[0][2] is False:
        # Necessary to catch `now()` (which only needs to be evaluated with `expr.exprStack`)
        # NOTE: 20211122 - Chetan - Any future built-ins that are zero arity functions will match this behavior
        pass

    elif len(L) == 1 and L[0] not in evaluation_parameters:
        # In this special case there were no operations to find, so only one value, but we don't have something to
        # substitute for that value
        try:
            res = ge_urn.parseString(L[0])
            if res["urn_type"] == "stores":
                store = data_context.stores.get(res["store_name"])
                return store.get_query_result(res["metric_name"],
                                              res.get("metric_kwargs", {}))
            else:
                logger.error(
                    "Unrecognized urn_type in ge_urn: must be 'stores' to use a metric store."
                )
                raise EvaluationParameterError(
                    f"No value found for $PARAMETER {str(L[0])}")
        except ParseException as e:
            logger.debug(
                f"Parse exception while parsing evaluation parameter: {str(e)}"
            )
            raise EvaluationParameterError(
                f"No value found for $PARAMETER {str(L[0])}")
        except AttributeError:
            logger.warning(
                "Unable to get store for store-type valuation parameter.")
            raise EvaluationParameterError(
                f"No value found for $PARAMETER {str(L[0])}")

    elif len(L) == 1:
        # In this case, we *do* have a substitution for a single type. We treat this specially because in this
        # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing)
        # NOTE: 20201023 - JPC - to support MetricDefinition as an evaluation parameter type, we need to handle that
        # case here; is the evaluation parameter provided here in fact a metric definition?
        return evaluation_parameters[L[0]]

    elif len(L) == 0 or L[0] != "Parse Failure":
        # we have a stack to evaluate and there was no parse failure.
        # iterate through values and look for URNs pointing to a store:
        for i, ob in enumerate(expr.exprStack):
            if isinstance(ob, str) and ob in evaluation_parameters:
                expr.exprStack[i] = str(evaluation_parameters[ob])
            elif isinstance(ob, str) and ob not in evaluation_parameters:
                # try to retrieve this value from a store
                try:
                    res = ge_urn.parseString(ob)
                    if res["urn_type"] == "stores":
                        store = data_context.stores.get(res["store_name"])
                        expr.exprStack[i] = str(
                            store.get_query_result(
                                res["metric_name"],
                                res.get("metric_kwargs", {}))
                        )  # value placed back in stack must be a string
                    else:
                        # handle other urn_types here, but note that validations URNs are being resolved elsewhere.
                        pass
                # graceful error handling for cases where the value in the stack isn't a URN:
                except ParseException:
                    pass
                except AttributeError:
                    pass

    else:
        err_str, err_line, err_col = L[-1]
        raise EvaluationParameterError(
            f"Parse Failure: {err_str}\nStatement: {err_line}\nColumn: {err_col}"
        )

    try:
        result = expr.evaluate_stack(expr.exprStack)
        result = convert_to_json_serializable(result)
    except Exception as e:
        exception_traceback = traceback.format_exc()
        exception_message = (
            f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
        )
        logger.debug(exception_message, e, exc_info=True)
        raise EvaluationParameterError(
            f"Error while evaluating evaluation parameter expression: {str(e)}"
        )

    return result