Ejemplo n.º 1
0
 def preprocess_literal(op: str, literal: Any) -> Expression:
     """
     Replaces lists with a function call to tuple.
     """
     if isinstance(literal, (list, tuple)):
         if op not in ["IN", "NOT IN"]:
             raise ParsingException(
                 (
                     f"Invalid operator {op} for literal {literal}. Literal is a sequence. "
                     "Operator must be IN/NOT IN"
                 ),
                 report=False,
             )
         literals = tuple([Literal(None, lit) for lit in literal])
         return FunctionCall(None, "tuple", literals)
     else:
         if op in ["IN", "NOT IN"]:
             raise ParsingException(
                 (
                     f"Invalid operator {op} for literal {literal}. Literal is not a sequence. "
                     "Operator cannot be IN/NOT IN"
                 ),
                 report=False,
             )
         return Literal(None, literal)
Ejemplo n.º 2
0
 def preprocess_condition_function_literal(func: str,
                                           literal: Any) -> Expression:
     """
     Replaces lists with a function call to tuple.
     """
     if isinstance(literal, (list, tuple)):
         if func not in [ConditionFunctions.IN, ConditionFunctions.NOT_IN]:
             raise ParsingException(
                 (f"Invalid function {func} for literal {literal}. Literal is a sequence. "
                  "Function must be in()/notIn()"),
                 report=False,
             )
         literals = tuple([parse_string_to_expr(lit) for lit in literal])
         return FunctionCall(None, "tuple", literals)
     else:
         if func in [ConditionFunctions.IN, ConditionFunctions.NOT_IN]:
             raise ParsingException(
                 (f"Invalid function {func} for literal {literal}. Literal is not a sequence. "
                  "Function cannot be in()/notIn()"),
                 report=False,
             )
         if isinstance(literal, str):
             return parse_string_to_expr(literal)
         else:
             return Literal(None, literal)
Ejemplo n.º 3
0
    def visit_relationship_match(
        self,
        node: Node,
        visited_children: Tuple[
            Any,
            IndividualNode[QueryEntity],
            Any,
            Node,
            Any,
            IndividualNode[QueryEntity],
        ],
    ) -> RelationshipTuple:
        _, lhs, _, relationship, _, rhs = visited_children
        assert isinstance(lhs.data_source, QueryEntity)
        assert isinstance(rhs.data_source, QueryEntity)
        lhs_entity = get_entity(lhs.data_source.key)
        data = lhs_entity.get_join_relationship(relationship)
        if data is None:
            raise ParsingException(
                f"{lhs.data_source.key.value} does not have a join relationship -[{relationship}]->"
            )
        elif data.rhs_entity != rhs.data_source.key:
            raise ParsingException(
                f"-[{relationship}]-> cannot be used to join {lhs.data_source.key.value} to {rhs.data_source.key.value}"
            )

        return RelationshipTuple(lhs, relationship, rhs, data)
Ejemplo n.º 4
0
def _align_max_days_date_align(
    key: EntityKey,
    old_top_level: Sequence[Expression],
    max_days: Optional[int],
    date_align: int,
    alias: Optional[str] = None,
) -> Sequence[Expression]:
    entity = get_entity(key)
    if not entity.required_time_column:
        return old_top_level

    # If there is an = or IN condition on time, we don't need to do any of this
    match = build_match(
        entity.required_time_column, [ConditionFunctions.EQ], datetime, alias
    )
    if any(match.match(cond) for cond in old_top_level):
        return old_top_level

    lower, upper = get_time_range_expressions(
        old_top_level, entity.required_time_column, alias
    )
    if not lower:
        raise ParsingException(
            f"missing >= condition on column {entity.required_time_column} for entity {key.value}"
        )
    elif not upper:
        raise ParsingException(
            f"missing < condition on column {entity.required_time_column} for entity {key.value}"
        )

    from_date, from_exp = lower
    to_date, to_exp = upper

    from_date = from_date - timedelta(
        seconds=(from_date - from_date.min).seconds % date_align
    )
    to_date = to_date - timedelta(seconds=(to_date - to_date.min).seconds % date_align)
    if from_date > to_date:
        raise ParsingException(f"invalid time conditions on entity {key.value}")

    if max_days is not None and (to_date - from_date).days > max_days:
        from_date = to_date - timedelta(days=max_days)

    def replace_cond(exp: Expression) -> Expression:
        if not isinstance(exp, FunctionCall):
            return exp
        elif exp == from_exp:
            return replace(
                exp, parameters=(from_exp.parameters[0], Literal(None, from_date)),
            )
        elif exp == to_exp:
            return replace(
                exp, parameters=(to_exp.parameters[0], Literal(None, to_date))
            )

        return exp

    return list(map(replace_cond, old_top_level))
Ejemplo n.º 5
0
def parse_snql_query_initial(
    body: str,
) -> Union[CompositeQuery[QueryEntity], LogicalQuery]:
    """
    Parses the query body generating the AST. This only takes into
    account the initial query body. Extensions are parsed by extension
    processors and are supposed to update the AST.
    """
    try:
        exp_tree = snql_grammar.parse(body)
        parsed = SnQLVisitor().visit(exp_tree)
    except ParsingException as e:
        logger.warning(f"Invalid SnQL query ({e}): {body}")
        raise e
    except IncompleteParseError as e:
        lines = body.split("\n")
        if e.line() > len(lines):
            line = body
        else:
            line = lines[e.line() - 1]

        idx = e.column()
        prefix = line[max(0, idx - 3) : idx]
        suffix = line[idx : (idx + 10)]
        raise ParsingException(
            f"Parsing error on line {e.line()} at '{prefix}{suffix}'"
        )
    except Exception as e:
        message = str(e)
        if "\n" in message:
            message, _ = message.split("\n", 1)
        raise ParsingException(message)

    assert isinstance(parsed, (CompositeQuery, LogicalQuery))  # mypy

    # Add these defaults here to avoid them getting applied to subqueries
    limit = parsed.get_limit()
    if limit is None:
        parsed.set_limit(1000)
    elif limit > 10000:
        raise ParsingException(
            "queries cannot have a limit higher than 10000", report=False
        )

    if parsed.get_offset() is None:
        parsed.set_offset(0)

    return parsed
Ejemplo n.º 6
0
def parse_clickhouse_function(function: str) -> Expression:
    try:
        expression_tree = minimal_clickhouse_grammar.parse(function)
    except Exception as cause:
        raise ParsingException(f"Cannot parse aggregation {function}", cause) from cause

    return ClickhouseVisitor().visit(expression_tree)  # type: ignore
Ejemplo n.º 7
0
    def simple_condition_builder(lhs: Expression, op: str,
                                 literal: Any) -> Expression:
        if op in UNARY_OPERATORS:
            if literal is not None:
                raise ParsingException(
                    f"Right hand side operand {literal} provided to unary operator {op}"
                )
            return unary_condition(None, OPERATOR_TO_FUNCTION[op], lhs)

        else:
            if literal is None:
                raise ParsingException(
                    f"Missing right hand side operand for binary operator {op}"
                )
            return binary_condition(None, OPERATOR_TO_FUNCTION[op], lhs,
                                    preprocess_literal(op, literal))
Ejemplo n.º 8
0
    def transform(exp: Expression) -> Expression:
        if not isinstance(exp, Column):
            return exp

        parts = exp.column_name.split(".", 1)
        if len(parts) != 2 or parts[0] not in aliases:
            raise ParsingException(
                f"column {exp.column_name} must be qualified in a join query")

        return Column(exp.alias, parts[0], parts[1])
Ejemplo n.º 9
0
def parse_expression(val: Any, dataset_columns: ColumnSet,
                     arrayjoin: Set[str]) -> Expression:
    """
    Parse a simple or structured expression encoded in the Snuba query language
    into an AST Expression.
    """
    if is_function(val, 0):
        return parse_function_to_expr(val, dataset_columns, arrayjoin)
    if isinstance(val, str):
        return parse_string_to_expr(val)
    raise ParsingException(
        f"Expression to parse can only be a function or a string: {val}")
Ejemplo n.º 10
0
def _validate_required_conditions(
    query: Union[CompositeQuery[QueryEntity], LogicalQuery], ) -> None:
    if isinstance(query, LogicalQuery):
        entity = get_entity(query.get_from_clause().key)
        if not entity.validate_required_conditions(query):
            raise ParsingException(
                f"{query.get_from_clause().key} is missing required conditions"
            )
    else:
        from_clause = query.get_from_clause()
        if isinstance(from_clause, (LogicalQuery, CompositeQuery)):
            return _validate_required_conditions(from_clause)

        assert isinstance(from_clause, JoinClause)  # mypy
        alias_map = from_clause.get_alias_node_map()
        for alias, node in alias_map.items():
            assert isinstance(node.data_source, QueryEntity)  # mypy
            entity = get_entity(node.data_source.key)
            if not entity.validate_required_conditions(query, alias):
                raise ParsingException(
                    f"{node.data_source.key} is missing required conditions")
Ejemplo n.º 11
0
def parse_aggregation(
    aggregation_function: str,
    column: Any,
    alias: Optional[str],
    dataset_columns: ColumnSet,
    array_join_cols: Set[str],
) -> Expression:
    """
    Aggregations, unfortunately, support both Snuba syntax and a subset
    of Clickhouse syntax. In order to preserve this behavior and still build
    a meaningful AST when parsing the query, we need to do some parsing of
    the clickhouse expression. (not that we should support this, but it is
    used in production).
    """

    if not isinstance(column, (list, tuple)):
        columns: Iterable[Any] = (column, )
    else:
        columns = column

    columns_expr = [
        parse_expression(column, dataset_columns, array_join_cols)
        for column in columns if column
    ]

    matched = FUNCTION_NAME_RE.fullmatch(aggregation_function)

    if matched is not None:
        return FunctionCall(alias, aggregation_function, tuple(columns_expr))

    parsed_expression = parse_clickhouse_function(aggregation_function)

    if (
            # Simple Clickhouse expression with no snuba syntax
            # ["ifNull(count(somthing), something)", None, None]
            isinstance(parsed_expression, (FunctionCall, CurriedFunctionCall))
            and not columns_expr):
        return replace(parsed_expression, alias=alias)

    elif isinstance(parsed_expression, FunctionCall) and columns_expr:
        # Mix of clickhouse syntax and snuba syntax that generates a CurriedFunction
        # ["f(a)", "b", None]
        return CurriedFunctionCall(
            alias,
            parsed_expression,
            tuple(columns_expr),
        )

    else:
        raise ParsingException(
            f"Invalid aggregation format {aggregation_function} {column}",
            report=False)
Ejemplo n.º 12
0
def validate_entities_with_query(
    query: Union[CompositeQuery[QueryEntity], LogicalQuery]
) -> None:
    if isinstance(query, LogicalQuery):
        entity = get_entity(query.get_from_clause().key)
        try:
            for v in entity.get_validators():
                v.validate(query)
        except InvalidQueryException as e:
            raise ParsingException(
                f"validation failed for entity {query.get_from_clause().key.value}: {e}",
                report=e.report,
            )
        except InvalidExpressionException as e:
            raise ParsingException(
                f"validation failed for entity {query.get_from_clause().key.value}: {e}",
                report=e.report,
            )
    else:
        from_clause = query.get_from_clause()
        if isinstance(from_clause, JoinClause):
            alias_map = from_clause.get_alias_node_map()
            for alias, node in alias_map.items():
                assert isinstance(node.data_source, QueryEntity)  # mypy
                entity = get_entity(node.data_source.key)
                try:
                    for v in entity.get_validators():
                        v.validate(query)
                except InvalidQueryException as e:
                    raise ParsingException(
                        f"validation failed for entity {node.data_source.key.value}: {e}",
                        report=e.report,
                    )
                except InvalidExpressionException as e:
                    raise ParsingException(
                        f"validation failed for entity {node.data_source.key.value}: {e}",
                        report=e.report,
                    )
Ejemplo n.º 13
0
import pytest

from snuba.datasets.factory import get_dataset
from snuba.query.exceptions import InvalidExpressionException
from snuba.query.parser.exceptions import ParsingException
from snuba.query.snql.parser import parse_snql_query

test_cases = [
    pytest.param(
        """
        MATCH (events)
        SELECT event_id
        WHERE timestamp LIKE 'carbonara'
        """,
        ParsingException("missing >= condition on column timestamp for entity events"),
        id="Invalid LIKE param",
    ),
    pytest.param(
        "MATCH (discover_events) SELECT arrayMap((`x`) -> identity(`y`), sdk_integrations) AS sdks WHERE project_id = 1 AND timestamp >= toDateTime('2021-01-01') AND timestamp < toDateTime('2021-01-02')",
        InvalidExpressionException("identifier(s) `y` not defined"),
        id="invalid lambda identifier",
    ),
    pytest.param(
        "MATCH (discover_events) SELECT arrayMap((`x`) -> arrayMap((`y`) -> identity(`z`), sdk_integrations), sdk_integrations) AS sdks WHERE project_id = 1 AND timestamp >= toDateTime('2021-01-01') AND timestamp < toDateTime('2021-01-02')",
        InvalidExpressionException("identifier(s) `z` not defined"),
        id="invalid nested lambda identifier",
    ),
]

Ejemplo n.º 14
0
def build_list(relationships: Sequence[RelationshipTuple]) -> Node:
    """
    Most of the complication of this algorithm is here. This takes a list of joins of the form
    a -> b, b -> c etc. and converts it to a linked list, where the parent entity is the root node
    and the children each keep a reference to their join parent.

    Example:
    [a -> b, b -> c] ==> a() -> b(a) -> c(b), where `b(a)` denotes entity `b` with a reference to `a` as a parent.

    Since joins can be received in any order, we keep track of all the linked lists that are not connected (roots).
    Once we find a connection between two lists, the child list is inserted into the parent list and removed
    from the roots. Once all the joins have been added, there should be exactly one root left. New joins that are
    children of existing roots are pushed into that roots children. We also keep a backreference of the children
    which keeps a reference to the Node for a given entity. This is to avoid having to scan through the roots
    every time we want to find a specific Node.

    Example:
    Input: [a -> b, c -> d, b -> c]
    After processing the first two joins, we will have two roots: `a() -> b(a)` and `c() -> d(c)`. Once the third
    join is processed, it will see that `c` is a child of `b`, and add it as a child: `b() -> c(b) -> d(c)`.
    This tree will then be added to the root `a` list, and `c` will be removed from the roots, resulting in
    one root: `a() -> b(a) -> c(b) -> d(c)`.
    """

    roots: MutableMapping[EntityKey, Node] = {}
    children: MutableMapping[EntityKey, Node] = {}

    def update_children(child: Optional[Node]) -> None:
        while child is not None:
            children[child.entity] = child
            child = child.child

    for rel in relationships:
        lhs = Node(rel.lhs)
        rhs = Node(rel.rhs, rel.data)
        orphan = roots.get(rhs.entity)
        if orphan:
            if not orphan.has_child(lhs.entity):
                # The orphan is a child of this join. Combine them.
                if orphan.child:
                    rhs.push_child(orphan.child)
                del roots[orphan.entity]

        if lhs.entity in roots:
            roots[lhs.entity].push_child(rhs)
            update_children(rhs)
        else:
            if lhs.entity in children:
                children[lhs.entity].push_child(rhs)
                update_children(rhs)
            else:
                lhs.push_child(rhs)
                roots[lhs.entity] = lhs
                update_children(rhs)

    if len(roots) > 1:
        raise ParsingException("invalid join: join is disconnected")
    if len(roots) < 1:
        raise ParsingException("invalid join: join is cyclical")

    key = list(roots.keys())[0]
    return roots[key]
Ejemplo n.º 15
0
def parse_conditions(
    operand_builder: Callable[[Any, ColumnSet, Set[str]], TExpression],
    and_builder: Callable[[Sequence[TExpression]], Optional[TExpression]],
    or_builder: Callable[[Sequence[TExpression]], Optional[TExpression]],
    unpack_array_condition_builder: Callable[[TExpression, str, Any],
                                             TExpression],
    simple_condition_builder: Callable[[TExpression, str, Any], TExpression],
    entity: Entity,
    conditions: Any,
    arrayjoin_cols: Set[str],
    depth: int = 0,
) -> Optional[TExpression]:
    """
    Return a boolean expression suitable for putting in the WHERE clause of the
    query.  The expression is constructed by ANDing groups of OR expressions.
    Expansion of columns is handled, as is replacement of columns with aliases,
    if the column has already been expanded and aliased elsewhere.

    operand_builder: Builds the TExpression representing the left hand side
      of a simple condition. This can be as nested as the user wants
    and_builder / or_builder: Combine a list of expressions in AND/OR
    unpack_array_condition_builder: Deals with a special case where we unpack conditions
      on array columns. More details in the code.
    simple_condition_builder: Generates a simple condition made by expression on the
      left hand side, an operator and a literal on the right hand side.
    """
    from snuba.clickhouse.columns import Array

    if not conditions:
        return None

    if depth == 0:
        # dedupe conditions at top level, but keep them in order
        sub = OrderedDict((
            parse_conditions(
                operand_builder,
                and_builder,
                or_builder,
                unpack_array_condition_builder,
                simple_condition_builder,
                entity,
                cond,
                arrayjoin_cols,
                depth + 1,
            ),
            None,
        ) for cond in conditions)
        return and_builder([s for s in sub.keys() if s])
    elif is_condition(conditions):
        try:
            lhs, op, lit = conditions
        except Exception as cause:
            raise ParsingException(f"Cannot process condition {conditions}",
                                   cause) from cause

        # facilitate deduping IN conditions by sorting them.
        if op in ("IN", "NOT IN") and isinstance(lit, tuple):
            lit = tuple(sorted(lit))

        # If the LHS is a simple column name that refers to an array column
        # (and we are not arrayJoining on that column, which would make it
        # scalar again) and the RHS is a scalar value, we assume that the user
        # actually means to check if any (or all) items in the array match the
        # predicate, so we return an `any(x == value for x in array_column)`
        # type expression. We assume that operators looking for a specific value
        # (IN, =, LIKE) are looking for rows where any array value matches, and
        # exclusionary operators (NOT IN, NOT LIKE, !=) are looking for rows
        # where all elements match (eg. all NOT LIKE 'foo').
        columns = entity.get_data_model()
        if (isinstance(lhs, str) and lhs in columns
                and isinstance(columns[lhs].type, Array)
                and columns[lhs].base_name not in arrayjoin_cols
                and columns[lhs].flattened not in arrayjoin_cols
                and not isinstance(lit, (list, tuple))):
            return unpack_array_condition_builder(
                operand_builder(lhs, entity.get_data_model(), arrayjoin_cols),
                op,
                lit,
            )
        else:
            return simple_condition_builder(
                operand_builder(lhs, entity.get_data_model(), arrayjoin_cols),
                op,
                lit,
            )

    elif depth == 1:
        sub_expression = (parse_conditions(
            operand_builder,
            and_builder,
            or_builder,
            unpack_array_condition_builder,
            simple_condition_builder,
            entity,
            cond,
            arrayjoin_cols,
            depth + 1,
        ) for cond in conditions)
        return or_builder([s for s in sub_expression if s])
    else:
        raise InvalidConditionException(str(conditions))
Ejemplo n.º 16
0
def parse_function(
    output_builder: Callable[[Optional[str], str, List[TExpression]],
                             TExpression],
    simple_expression_builder: Callable[[str], TExpression],
    literal_builder: Callable[[
        Optional[Union[str, datetime, date, List[Any], Tuple[Any],
                       numbers.Number]]
    ], TExpression, ],
    unpack_array_condition_builder: Callable[
        [TExpression, str, Any, Optional[str]], TExpression],
    dataset_columns: ColumnSet,
    arrayjoin_cols: Set[str],
    expr: Any,
    depth: int = 0,
) -> TExpression:
    """
    Parses a function expression in the Snuba syntax and produces the expected data structure
    to be used in the Query object.

    It relies on three functions:
    - output_builder, this puts alias, function name and parameters together
    - simple_expression_builder, processes one column given the string name
    - literal_builder, processes any individual type that represent a literal.

    The goal of having these three functions is to preserve the parsing algorithm
    but being able to either produce an AST or the old Clickhouse syntax.
    """
    function_tuple = is_function(expr, depth)
    if function_tuple is None:
        raise ParsingException(
            f"complex_column_expr was given an expr {expr} that is not a function at depth {depth}.",
            report=False,
        )

    name, args, alias = function_tuple

    # If the first argument is a simple column name that refers to an array column
    # (and we are not arrayJoining on that column, which would make it scalar again)
    # we assume that the user actually means to check if any (or all) items in the
    # array match the predicate, so we return an `any(x == value for x in array_column)`
    # type expression. We assume that operators looking for a specific value (IN, =, LIKE)
    # are looking for rows where any array value matches, and exclusionary operators
    # (NOT IN, NOT LIKE, !=) are looking for rows where all elements match (eg. all NOT LIKE 'foo').
    # This check will only work if the array column is a bare column in the condition. If the array
    # column is itself nested in further functions, this transform will not work.
    if name in FUNCTION_TO_OPERATOR:
        if len(args) == 2 and isinstance(args[0],
                                         str) and args[0] in dataset_columns:
            column = dataset_columns[args[0]]
            if isinstance(column.type.get_raw(), Array):
                if (column.flattened not in arrayjoin_cols
                        and column.base_name not in arrayjoin_cols):
                    return unpack_array_condition_builder(
                        simple_expression_builder(args[0]),
                        name,
                        args[1],
                        alias,
                    )

    out: List[TExpression] = []
    i = 0
    while i < len(args):
        next_2 = args[i:i + 2]
        if is_function(next_2, depth + 1):
            out.append(
                parse_function(
                    output_builder,
                    simple_expression_builder,
                    literal_builder,
                    unpack_array_condition_builder,
                    dataset_columns,
                    arrayjoin_cols,
                    next_2,
                    depth + 1,
                ))
            i += 2
        else:
            nxt = args[i]
            if is_function(nxt, depth + 1):  # Embedded function
                out.append(
                    parse_function(
                        output_builder,
                        simple_expression_builder,
                        literal_builder,
                        unpack_array_condition_builder,
                        dataset_columns,
                        arrayjoin_cols,
                        nxt,
                        depth + 1,
                    ))
            elif isinstance(nxt, str):
                out.append(simple_expression_builder(nxt))
            else:
                out.append(literal_builder(nxt))
            i += 1

    return output_builder(alias, name, out)
Ejemplo n.º 17
0
 def visit_entity_name(self, node: Node, visited_children: Tuple[Any]) -> EntityKey:
     try:
         return EntityKey(node.text)
     except Exception:
         raise ParsingException(f"{node.text} is not a valid entity name")
Ejemplo n.º 18
0
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query:
    def build_selected_expressions(
        raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
        output = []
        for raw_expression in raw_expressions:
            exp = parse_expression(tuplify(raw_expression),
                                   entity.get_data_model(), set())
            output.append(
                SelectedExpression(
                    # An expression in the query can be a string or a
                    # complex list with an alias. In the second case
                    # we trust the parser to find the alias.
                    name=raw_expression
                    if isinstance(raw_expression, str) else exp.alias,
                    expression=exp,
                ))
        return output

    aggregations = []
    for aggregation in body.get("aggregations", []):
        if not isinstance(aggregation, Sequence):
            raise ParsingException((
                f"Invalid aggregation structure {aggregation}. "
                "It must be a sequence containing expression, column and alias."
            ))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregations.append(
            SelectedExpression(
                name=alias,
                expression=parse_aggregation(
                    aggregation_function,
                    column_expr,
                    alias,
                    entity.get_data_model(),
                    set(),
                ),
            ))

    groupby_clause = build_selected_expressions(
        to_list(body.get("groupby", [])))

    select_clause = (
        groupby_clause + aggregations +
        build_selected_expressions(body.get("selected_columns", [])))

    array_join_cols = set()
    arrayjoin = body.get("arrayjoin")
    # TODO: Properly detect all array join columns in all clauses of the query.
    # This is missing an arrayJoin in condition with an alias that is then
    # used in the select.
    if arrayjoin:
        array_join_cols.add(arrayjoin)
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"], entity.get_data_model(), {arrayjoin})
    else:
        array_join_expr = None
        for select_expr in select_clause:
            if isinstance(select_expr.expression, FunctionCall):
                if select_expr.expression.function_name == "arrayJoin":
                    parameters = select_expr.expression.parameters
                    if len(parameters) != 1:
                        raise ParsingException(
                            "arrayJoin(...) only accepts a single parameter.")
                    if isinstance(parameters[0], Column):
                        array_join_cols.add(parameters[0].column_name)
                    else:
                        # We only accepts columns or functions that do not
                        # reference columns. We could not say whether we are
                        # actually arrayjoining on the values of the column
                        # if it is nested in an arbitrary function. But
                        # functions of literals are fine.
                        for e in parameters[0]:
                            if isinstance(e, Column):
                                raise ParsingException(
                                    "arrayJoin(...) cannot contain columns nested in functions."
                                )

    where_expr = parse_conditions_to_expr(body.get("conditions", []), entity,
                                          array_join_cols)
    having_expr = parse_conditions_to_expr(body.get("having", []), entity,
                                           array_join_cols)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is a string, "
                    "it must respect the format `[-]column`"))
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is an expression, "
                    "the function name must respect the format `[-]func_name`"
                ))
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ParsingException(
                (f"Invalid Order By clause {orderby}. The Clause was neither "
                 "a string nor a function call."))
        orderby_parsed = parse_expression(tuplify(orderby),
                                          entity.get_data_model(), set())
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    return Query(
        body,
        None,
        selected_columns=select_clause,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=[g.expression for g in groupby_clause],
        having=having_expr,
        order_by=orderby_exprs,
    )
Ejemplo n.º 19
0
import logging
from typing import Any

import pytest

from snuba.query.exceptions import InvalidQueryException
from snuba.query.parser.exceptions import ParsingException
from snuba.web.views import handle_invalid_query

invalid_query_exception_test_cases = [
    pytest.param(
        ParsingException("This should be reported at WARNING", should_report=True),
        "WARNING",
        id="Report exception",
    ),
    pytest.param(
        ParsingException("This should be reported at INFO", should_report=False),
        "INFO",
        id="Mute exception",
    ),
]


@pytest.mark.parametrize(
    "exception, expected_log_level", invalid_query_exception_test_cases
)
def test_handle_invalid_query(
    caplog: Any, exception: InvalidQueryException, expected_log_level: str
) -> None:
    with caplog.at_level(logging.INFO):
        caplog.clear()