コード例 #1
0
ファイル: columns.py プロジェクト: denisgolius/snuba
def column_expr(dataset,
                column_name,
                query: Query,
                parsing_context: ParsingContext,
                alias=None,
                aggregate=None):
    """
    Certain special column names expand into more complex expressions. Return
    a 2-tuple of:
        (expanded column expression, sanitized alias)

    Needs the body of the request for some extra data used to expand column expressions.
    """
    assert column_name or aggregate
    assert not aggregate or (aggregate and (column_name or alias))
    column_name = column_name or ''

    if is_function(column_name, 0):
        return complex_column_expr(dataset, column_name, query,
                                   parsing_context)
    elif isinstance(column_name, (list, tuple)) and aggregate:
        return complex_column_expr(dataset, [aggregate, column_name, alias],
                                   query, parsing_context)
    elif isinstance(column_name, str) and QUOTED_LITERAL_RE.match(column_name):
        return escape_literal(column_name[1:-1])
    else:
        expr = dataset.column_expr(column_name, query, parsing_context)

    if aggregate:
        expr = function_expr(aggregate, expr)

    alias = escape_alias(alias or column_name)
    return alias_expr(expr, alias, parsing_context)
コード例 #2
0
ファイル: columns.py プロジェクト: ruezetle/snuba
def column_expr(
    dataset,
    column_name,
    query: Query,
    parsing_context: ParsingContext,
    alias=None,
    aggregate=None,
):
    """
    Certain special column names expand into more complex expressions. Return
    a 2-tuple of:
        (expanded column expression, sanitized alias)

    Needs the body of the request for some extra data used to expand column expressions.
    """
    assert column_name or aggregate
    assert not aggregate or (aggregate and (column_name or alias))
    column_name = column_name or ""

    if is_function(column_name, 0):
        return complex_column_expr(dataset, column_name, query, parsing_context)
    elif isinstance(column_name, (list, tuple)) and aggregate:
        return complex_column_expr(
            dataset, [aggregate, column_name, alias], query, parsing_context
        )
    elif isinstance(column_name, str) and QUOTED_LITERAL_RE.match(column_name):
        return escape_literal(column_name[1:-1])
    else:
        expr = dataset.column_expr(column_name, query, parsing_context)
    if aggregate:
        expr = function_expr(aggregate, expr)

    # in the ORDER BY clause, column_expr may receive column names prefixed with
    # `-`. This is meant to be used for ORDER BY ... DESC.
    # This means we need to keep the `-` outside of the aliased expression when
    # we produce something like (COL AS alias) otherwise we build an invalid
    # syntax.
    # Worse, since escape_alias already does half of this work and keeps `-`
    # outside of the escaped expression we end up in this situation:
    #
    # -events.event_id becomes (-events.event_id AS -`events.event_id`)
    #
    # Thus here we strip the `-` before processing escaping and aliases and we
    # attach it back to the expression right before returning so that
    # -events.event_id becomes -(events.event_id AS `events.event_id`)
    # or
    # -`events.event_id`
    # if the alias already existed.
    #
    # The proper solution would be to strip the `-` before getting to column
    # processing, but this will be done with the new column abstraction.
    negate, col = NEGATE_RE.match(column_name).groups()
    alias = escape_alias(alias or col)
    expr_negate, expr = NEGATE_RE.match(expr).groups()
    # expr_negate and negate should never be inconsistent with each other. Though
    # will ensure this works properly before moving the `-` stripping at the beginning
    # of the method to cover tags as well.
    return f"{negate or expr_negate}{alias_expr(expr, alias, parsing_context)}"
コード例 #3
0
def parse_expression(val: Any, dataset_columns: ColumnSet,
                     arrayjoin: Set[str]) -> Expression:
    """
    Parse a simple or structured expression encoded in the Snuba query language
    into an AST Expression.
    """
    if is_function(val, 0):
        return parse_function_to_expr(val, dataset_columns, arrayjoin)
    if isinstance(val, str):
        return parse_string_to_expr(val)
    raise ParsingException(
        f"Expression to parse can only be a function or a string: {val}")
コード例 #4
0
ファイル: columns.py プロジェクト: denisgolius/snuba
def complex_column_expr(dataset,
                        expr,
                        query: Query,
                        parsing_context: ParsingContext,
                        depth=0):
    function_tuple = is_function(expr, depth)
    if function_tuple is None:
        raise ValueError(
            'complex_column_expr was given an expr %s that is not a function at depth %d.'
            % (expr, depth))

    name, args, alias = function_tuple
    out = []
    i = 0
    while i < len(args):
        next_2 = args[i:i + 2]
        if is_function(next_2, depth + 1):
            out.append(
                complex_column_expr(dataset, next_2, query, parsing_context,
                                    depth + 1))
            i += 2
        else:
            nxt = args[i]
            if is_function(nxt, depth + 1):  # Embedded function
                out.append(
                    complex_column_expr(dataset, nxt, query, parsing_context,
                                        depth + 1))
            elif isinstance(nxt, str):
                out.append(column_expr(dataset, nxt, query, parsing_context))
            else:
                out.append(escape_literal(nxt))
            i += 1

    ret = function_expr(name, ', '.join(out))
    if alias:
        ret = alias_expr(ret, alias, parsing_context)
    return ret
コード例 #5
0
ファイル: expressions.py プロジェクト: ruezetle/snuba
def parse_expression(val: Any) -> Expression:
    if is_function(val, 0):
        return parse_function_to_expr(val)
    # TODO: This will use the schema of the dataset to decide
    # if the expression is a column or a literal.
    if val.isdigit():
        return Literal(None, int(val))
    else:
        try:
            return Literal(None, float(val))
        except Exception:
            if QUOTED_LITERAL_RE.match(val):
                return Literal(None, val[1:-1])
            else:
                return Column(None, val, None)
コード例 #6
0
ファイル: tagsmap.py プロジェクト: anthonynsimon/snuba
    def __is_optimizable(self, condition: Condition,
                         column: str) -> Optional[OptimizableCondition]:
        """
        Recognize if the condition can be optimized.
        This includes these kind of conditions:
        - top level conditions. No nested OR
        - the condition has to be either in the form tag[t] = value
        - functions referencing the tags as parameters are not taken
          into account except for ifNull.
        - Both EQ and NEQ conditions are optimized.
        """
        if not is_condition(condition):
            return None
        if condition[1] not in [Operand.EQ.value, Operand.NEQ.value]:
            return None
        if not isinstance(condition[2], str):
            # We can only support literals for now.
            return None
        lhs = condition[0]

        # This unpacks the ifNull function. This is just an optimization to make this class more
        # useful since the product wraps tags access into ifNull very often and it is a trivial
        # function to unpack. We could exptend it to more functions later.
        function_expr = is_function(lhs, 0)
        if function_expr and function_expr[0] == "ifNull" and len(
                function_expr[1]) > 0:
            lhs = function_expr[1][0]
        if not isinstance(lhs, str):
            return None

        # Now we have a condition in the form of: ["tags[something]", "=", "a string"]
        tag = NESTED_COL_EXPR_RE.match(lhs)
        if tag and tag[1] == self.__nested_col:
            # tag[0] is the full expression that matches the re.
            nested_col_key = tag[2]
            return OptimizableCondition(
                nested_col_key=nested_col_key,
                operand=Operand.EQ if condition[1] == "=" else Operand.NEQ,
                value=condition[2],
            )
        return None
コード例 #7
0
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query:
    def build_selected_expressions(
        raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
        output = []
        for raw_expression in raw_expressions:
            exp = parse_expression(tuplify(raw_expression),
                                   entity.get_data_model(), set())
            output.append(
                SelectedExpression(
                    # An expression in the query can be a string or a
                    # complex list with an alias. In the second case
                    # we trust the parser to find the alias.
                    name=raw_expression
                    if isinstance(raw_expression, str) else exp.alias,
                    expression=exp,
                ))
        return output

    aggregations = []
    for aggregation in body.get("aggregations", []):
        if not isinstance(aggregation, Sequence):
            raise ParsingException((
                f"Invalid aggregation structure {aggregation}. "
                "It must be a sequence containing expression, column and alias."
            ))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregations.append(
            SelectedExpression(
                name=alias,
                expression=parse_aggregation(
                    aggregation_function,
                    column_expr,
                    alias,
                    entity.get_data_model(),
                    set(),
                ),
            ))

    groupby_clause = build_selected_expressions(
        to_list(body.get("groupby", [])))

    select_clause = (
        groupby_clause + aggregations +
        build_selected_expressions(body.get("selected_columns", [])))

    array_join_cols = set()
    arrayjoin = body.get("arrayjoin")
    # TODO: Properly detect all array join columns in all clauses of the query.
    # This is missing an arrayJoin in condition with an alias that is then
    # used in the select.
    if arrayjoin:
        array_join_cols.add(arrayjoin)
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"], entity.get_data_model(), {arrayjoin})
    else:
        array_join_expr = None
        for select_expr in select_clause:
            if isinstance(select_expr.expression, FunctionCall):
                if select_expr.expression.function_name == "arrayJoin":
                    parameters = select_expr.expression.parameters
                    if len(parameters) != 1:
                        raise ParsingException(
                            "arrayJoin(...) only accepts a single parameter.")
                    if isinstance(parameters[0], Column):
                        array_join_cols.add(parameters[0].column_name)
                    else:
                        # We only accepts columns or functions that do not
                        # reference columns. We could not say whether we are
                        # actually arrayjoining on the values of the column
                        # if it is nested in an arbitrary function. But
                        # functions of literals are fine.
                        for e in parameters[0]:
                            if isinstance(e, Column):
                                raise ParsingException(
                                    "arrayJoin(...) cannot contain columns nested in functions."
                                )

    where_expr = parse_conditions_to_expr(body.get("conditions", []), entity,
                                          array_join_cols)
    having_expr = parse_conditions_to_expr(body.get("having", []), entity,
                                           array_join_cols)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is a string, "
                    "it must respect the format `[-]column`"))
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is an expression, "
                    "the function name must respect the format `[-]func_name`"
                ))
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ParsingException(
                (f"Invalid Order By clause {orderby}. The Clause was neither "
                 "a string nor a function call."))
        orderby_parsed = parse_expression(tuplify(orderby),
                                          entity.get_data_model(), set())
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    return Query(
        body,
        None,
        selected_columns=select_clause,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=[g.expression for g in groupby_clause],
        having=having_expr,
        order_by=orderby_exprs,
    )
コード例 #8
0
def parse_function(
    output_builder: Callable[[Optional[str], str, List[TExpression]],
                             TExpression],
    simple_expression_builder: Callable[[str], TExpression],
    literal_builder: Callable[[
        Optional[Union[str, datetime, date, List[Any], Tuple[Any],
                       numbers.Number]]
    ], TExpression, ],
    unpack_array_condition_builder: Callable[
        [TExpression, str, Any, Optional[str]], TExpression],
    dataset_columns: ColumnSet,
    arrayjoin_cols: Set[str],
    expr: Any,
    depth: int = 0,
) -> TExpression:
    """
    Parses a function expression in the Snuba syntax and produces the expected data structure
    to be used in the Query object.

    It relies on three functions:
    - output_builder, this puts alias, function name and parameters together
    - simple_expression_builder, processes one column given the string name
    - literal_builder, processes any individual type that represent a literal.

    The goal of having these three functions is to preserve the parsing algorithm
    but being able to either produce an AST or the old Clickhouse syntax.
    """
    function_tuple = is_function(expr, depth)
    if function_tuple is None:
        raise ParsingException(
            f"complex_column_expr was given an expr {expr} that is not a function at depth {depth}.",
            report=False,
        )

    name, args, alias = function_tuple

    # If the first argument is a simple column name that refers to an array column
    # (and we are not arrayJoining on that column, which would make it scalar again)
    # we assume that the user actually means to check if any (or all) items in the
    # array match the predicate, so we return an `any(x == value for x in array_column)`
    # type expression. We assume that operators looking for a specific value (IN, =, LIKE)
    # are looking for rows where any array value matches, and exclusionary operators
    # (NOT IN, NOT LIKE, !=) are looking for rows where all elements match (eg. all NOT LIKE 'foo').
    # This check will only work if the array column is a bare column in the condition. If the array
    # column is itself nested in further functions, this transform will not work.
    if name in FUNCTION_TO_OPERATOR:
        if len(args) == 2 and isinstance(args[0],
                                         str) and args[0] in dataset_columns:
            column = dataset_columns[args[0]]
            if isinstance(column.type.get_raw(), Array):
                if (column.flattened not in arrayjoin_cols
                        and column.base_name not in arrayjoin_cols):
                    return unpack_array_condition_builder(
                        simple_expression_builder(args[0]),
                        name,
                        args[1],
                        alias,
                    )

    out: List[TExpression] = []
    i = 0
    while i < len(args):
        next_2 = args[i:i + 2]
        if is_function(next_2, depth + 1):
            out.append(
                parse_function(
                    output_builder,
                    simple_expression_builder,
                    literal_builder,
                    unpack_array_condition_builder,
                    dataset_columns,
                    arrayjoin_cols,
                    next_2,
                    depth + 1,
                ))
            i += 2
        else:
            nxt = args[i]
            if is_function(nxt, depth + 1):  # Embedded function
                out.append(
                    parse_function(
                        output_builder,
                        simple_expression_builder,
                        literal_builder,
                        unpack_array_condition_builder,
                        dataset_columns,
                        arrayjoin_cols,
                        nxt,
                        depth + 1,
                    ))
            elif isinstance(nxt, str):
                out.append(simple_expression_builder(nxt))
            else:
                out.append(literal_builder(nxt))
            i += 1

    return output_builder(alias, name, out)
コード例 #9
0
def _parse_query_impl(body: MutableMapping[str, Any],
                      dataset: Dataset) -> Query:
    aggregate_exprs = []
    for aggregation in body.get("aggregations", []):
        assert isinstance(aggregation, (list, tuple))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregate_exprs.append(
            parse_aggregation(aggregation_function, column_expr, alias))

    groupby_exprs = [
        parse_expression(tuplify(group_by))
        for group_by in to_list(body.get("groupby", []))
    ]
    select_exprs = [
        parse_expression(tuplify(select))
        for select in body.get("selected_columns", [])
    ]

    selected_cols = groupby_exprs + aggregate_exprs + select_exprs

    arrayjoin = body.get("arrayjoin")
    if arrayjoin:
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"])
    else:
        array_join_expr = None

    where_expr = parse_conditions_to_expr(body.get("conditions", []), dataset,
                                          arrayjoin)
    having_expr = parse_conditions_to_expr(body.get("having", []), dataset,
                                           arrayjoin)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            assert match is not None, f"Invalid Order By clause {orderby}"
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            assert match is not None, f"Invalid Order By clause {orderby}"
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ValueError(f"Invalid Order By clause {orderby}")
        orderby_parsed = parse_expression(tuplify(orderby))
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    return Query(
        body,
        source,
        selected_columns=selected_cols,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=groupby_exprs,
        having=having_expr,
        order_by=orderby_exprs,
    )
コード例 #10
0
def parse_function(
    output_builder: Callable[[Optional[str], str, List[TExpression]],
                             TExpression],
    simple_expression_builder: Callable[[str], TExpression],
    literal_builder: Callable[[
        Optional[Union[str, datetime, date, List[Any], Tuple[Any],
                       numbers.Number]]
    ], TExpression, ],
    expr: Any,
    depth: int = 0,
) -> TExpression:
    """
    Parses a function expression in the Snuba syntax and produces the expected data structure
    to be used in the Query object.

    It relies on three functions:
    - output_builder, this puts alias, function name and parameters together
    - simple_expression_builder, processes one column given the string name
    - literal_builder, processes any individual type that represent a literal.

    The goal of having these three functions is to preserve the parsing algorithm
    but being able to either produce an AST or the old Clickhouse syntax.
    """
    function_tuple = is_function(expr, depth)
    if function_tuple is None:
        raise ValueError(
            "complex_column_expr was given an expr %s that is not a function at depth %d."
            % (expr, depth))

    name, args, alias = function_tuple
    out: List[TExpression] = []
    i = 0
    while i < len(args):
        next_2 = args[i:i + 2]
        if is_function(next_2, depth + 1):
            out.append(
                parse_function(
                    output_builder,
                    simple_expression_builder,
                    literal_builder,
                    next_2,
                    depth + 1,
                ))
            i += 2
        else:
            nxt = args[i]
            if is_function(nxt, depth + 1):  # Embedded function
                out.append(
                    parse_function(
                        output_builder,
                        simple_expression_builder,
                        literal_builder,
                        nxt,
                        depth + 1,
                    ))
            elif isinstance(nxt, str):
                out.append(simple_expression_builder(nxt))
            else:
                out.append(literal_builder(nxt))
            i += 1

    return output_builder(alias, name, out)