Ejemplo n.º 1
0
def _process_root(
    expression: Expression,
    subqueries: Mapping[str, SubqueryDraft],
    alias_generator: AliasGenerator,
) -> Expression:
    """
    Takes a root expression in the main query, runs the branch cutter
    and pushes down the subexpressions.
    """
    subexpressions = expression.accept(BranchCutter(alias_generator))
    return _push_down_branches(subexpressions, subqueries, alias_generator)
Ejemplo n.º 2
0
def test_branch_cutter(expression: Expression, expected: SubExpression,
                       main_expr: MainQueryExpression) -> None:
    def alias_generator() -> Generator[str, None, None]:
        i = 0
        while True:
            i += 1
            yield f"_snuba_gen_{i}"

    subexpression = expression.accept(BranchCutter(alias_generator()))
    assert subexpression == expected
    assert subexpression.cut_branch(alias_generator()) == main_expr
Ejemplo n.º 3
0
def generate_subqueries(query: CompositeQuery[Entity]) -> None:
    """
    Generates correct subqueries for each of the entities referenced in
    a join query, and pushes down all expressions that can be executed
    in the subquery.

    Columns in the select clause of the subqueries are referenced
    by providing them a mangled alias that is referenced in the external
    query.

    ```
    SELECT e.a, f(g.b) FROM Events e INNER JOIN Groups g ON ...
    ```

    becomes

    ```
    SELECT e._snuba_a, g._snuba_b
    FROM (
        SELECT a as _snuba_a
        FROM events
    ) e INNER JOIN (
        SELECT f(b) as _snuba_b
        FROM groups
    ) g ON ....
    ```

    Conditions are treated differently compared to other expressions. If
    a condition is entirely contained in a single subquery, we push it
    down entirely in the condition clause of the subquery and remove it
    from the main query entirely.
    """

    from_clause = query.get_from_clause()
    if isinstance(from_clause, CompositeQuery):
        generate_subqueries(from_clause)
        return
    elif isinstance(from_clause, ProcessableQuery):
        return

    # Now this has to be a join, so we can work with it.
    subqueries = from_clause.accept(SubqueriesInitializer())

    alias_generator = _alias_generator()
    query.set_ast_selected_columns([
        SelectedExpression(
            name=s.name,
            expression=_process_root(s.expression, subqueries,
                                     alias_generator),
        ) for s in query.get_selected_columns()
    ])

    array_join = query.get_arrayjoin()
    if array_join is not None:
        query.set_arrayjoin([
            _process_root(el, subqueries, alias_generator) for el in array_join
        ])

    ast_condition = query.get_condition()
    if ast_condition is not None:
        main_conditions = []
        for c in get_first_level_and_conditions(ast_condition):
            subexpression = c.accept(BranchCutter(alias_generator))
            if isinstance(subexpression, SubqueryExpression):
                # The expression is entirely contained in a single subquery
                # after we tried to cut subquery branches with the
                # BranchCutter visitor.
                # so push down the entire condition and remove it from
                # the main query.
                subqueries[subexpression.subquery_alias].add_condition(
                    subexpression.main_expression)
            else:
                # This condition has references to multiple subqueries.
                # We cannot push down the condition. We push down the
                # branches into the select clauses and we reference them
                # from the main query condition.
                main_conditions.append(
                    _push_down_branches(subexpression, subqueries,
                                        alias_generator))

        if main_conditions:
            query.set_ast_condition(combine_and_conditions(main_conditions))
        else:
            query.set_ast_condition(None)

    # TODO: push down the group by when it is the same as the join key.
    query.set_ast_groupby([
        _process_root(e, subqueries, alias_generator)
        for e in query.get_groupby()
    ])

    having = query.get_having()
    if having is not None:
        query.set_ast_having(
            combine_and_conditions([
                _process_root(c, subqueries, alias_generator)
                for c in get_first_level_and_conditions(having)
            ]))

    query.set_ast_orderby([
        replace(
            orderby,
            expression=_process_root(orderby.expression, subqueries,
                                     alias_generator),
        ) for orderby in query.get_orderby()
    ])

    limitby = query.get_limitby()
    if limitby is not None:
        query.set_limitby(
            replace(
                limitby,
                columns=[
                    _process_root(
                        column,
                        subqueries,
                        alias_generator,
                    ) for column in limitby.columns
                ],
            ))

    query.set_from_clause(
        SubqueriesReplacer(subqueries).visit_join_clause(from_clause))