Beispiel #1
0
    def visit_order_columns(
            self, node: Node, visited_children: Tuple[Expression, Node, Any,
                                                      Any, Any]) -> OrderBy:
        column, order, _, _, _ = visited_children

        direction = (OrderByDirection.ASC
                     if order.text == "ASC" else OrderByDirection.DESC)
        return OrderBy(direction, column)
Beispiel #2
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = ClickhouseQuery(
        TableSource("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
        sample=0.1,
        totals=True,
        limitby=(10, "environment"),
    )

    query.set_final(True)
    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = AstSqlQuery(query, request_settings)

    expected = {
        "from": "FROM my_table FINAL SAMPLE 0.1",
        "group": "GROUP BY (column1, table1.column2) WITH TOTALS",
        "having": "HAVING eq(column1, 123)",
        "array_join": "ARRAY JOIN column1",
        "limit": "LIMIT 100 OFFSET 50",
        "limitby": "LIMIT 10 BY environment",
        "order": "ORDER BY column1 ASC",
        "select": "SELECT column1, table1.column2",
        "where": "WHERE eq(column1, 'blabla')",
    }

    assert clickhouse_query.sql_data() == expected
Beispiel #3
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = ClickhouseQuery(
        Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
        totals=True,
        limitby=LimitBy(10, Column(None, None, "environment")),
    )

    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = format_query(query, request_settings)

    expected = [
        "SELECT column1, table1.column2",
        ["FROM", "my_table FINAL SAMPLE 0.1"],
        "ARRAY JOIN column1",
        "WHERE eq(column1, 'blabla')",
        "GROUP BY column1, table1.column2 WITH TOTALS",
        "HAVING eq(column1, 123)",
        "ORDER BY column1 ASC",
        "LIMIT 10 BY environment",
        "LIMIT 100 OFFSET 50",
    ]

    assert clickhouse_query.structured() == expected
Beispiel #4
0
def test_iterate_over_query() -> None:
    """
    Creates a query with the new AST and iterate over all expressions.
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(ConditionFunctions.EQ, column1,
                                 Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, column2,
                                Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        prewhere=prewhere,
        having=None,
        order_by=[orderby],
    )

    expected_expressions = [
        # selected columns
        column1,
        column2,
        function_1,
        # condition
        column1,
        Literal(None, "1"),
        condition,
        # groupby
        column1,
        column2,
        function_1,
        # order by
        column2,
        function_2,
        # prewhere
        column2,
        Literal(None, "2"),
        prewhere,
    ]

    assert list(query.get_all_expressions()) == expected_expressions
Beispiel #5
0
    def visit_order_list(
        self, node: Node, visited_children: Tuple[OrderBy, Expression,
                                                  Node]) -> Sequence[OrderBy]:
        left_order_list, right_order, order = visited_children
        ret: List[OrderBy] = []

        # in the case of one OrderBy
        # left_order_list will be an empty node
        if not isinstance(left_order_list, Node):
            if not isinstance(left_order_list, (list, tuple)):
                ret.append(left_order_list)
            else:
                for p in left_order_list:
                    ret.append(p)

        direction = (OrderByDirection.ASC
                     if order.text == "ASC" else OrderByDirection.DESC)
        ret.append(OrderBy(direction, right_order))

        return ret
Beispiel #6
0
         lhs=Column("al", None, "column3"),
         rhs=Literal(None, "blabla"),
     ),
     groupby=[
         Column(None, None, "column1"),
         Column(None, "table1", "column2"),
         Column("al", None, "column3"),
         Column(None, None, "column4"),
     ],
     having=binary_condition(
         "eq",
         lhs=Column(None, None, "column1"),
         rhs=Literal(None, 123),
     ),
     order_by=[
         OrderBy(OrderByDirection.ASC, Column(None, None, "column1")),
         OrderBy(OrderByDirection.DESC, Column(None, "table1",
                                               "column2")),
     ],
 ),
 [
     "SELECT column1, table1.column2, (column3 AS al)",
     ["FROM", "my_table"],
     "WHERE eq(al, 'blabla')",
     "GROUP BY column1, table1.column2, al, column4",
     "HAVING eq(column1, 123)",
     "ORDER BY column1 ASC, table1.column2 DESC",
 ],
 ("SELECT column1, table1.column2, (column3 AS al) "
  "FROM my_table "
  "WHERE eq(al, 'blabla') "
Beispiel #7
0
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
    ],
    array_join=Column(None, None, "col"),
    condition=binary_condition("equals", Column(None, None, "c4"),
                               Literal(None, "asd")),
    groupby=[Column(None, "t", "c4")],
    having=binary_condition("equals", Column(None, None, "c6"),
                            Literal(None, "asd2")),
    order_by=[OrderBy(OrderByDirection.ASC, Column(None, "t", "c"))],
    limitby=LimitBy(100, Column(None, None, "c8")),
    limit=150,
)

SIMPLE_FORMATTED = {
    "FROM": {
        "ENTITY": EntityKey.EVENTS,
        "SAMPLE": "0.5"
    },
    "SELECT": [["c1", "(t.c AS _snuba_c1)"],
               ["f1", ["_snuba_f1", "f", ["t.c2"]]]],
    "ARRAYJOIN": "col",
    "WHERE": ["equals", ["c4", "asd"]],
    "GROUPBY": ["t.c4"],
    "HAVING": ["equals", ["c6", "asd2"]],
Beispiel #8
0
def test_replace_expression() -> None:
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        prewhere=prewhere,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),))
            )
        ],
        array_join=None,
        condition=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        prewhere=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "2"),
        ),
        having=None,
        order_by=[orderby],
    )

    assert query.get_selected_columns() == expected_query.get_selected_columns()
    assert query.get_condition() == expected_query.get_condition()
    assert query.get_groupby() == expected_query.get_groupby()
    assert query.get_having() == expected_query.get_having()
    assert query.get_orderby() == expected_query.get_orderby()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )
Beispiel #9
0
 pytest.param(
     LogicalQuery(
         QueryEntity(
             EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
         ),
         selected_columns=[
             SelectedExpression("count", FunctionCall("_snuba_count", "count", ())),
         ],
         condition=binary_condition(
             "equals",
             Column("_snuba_project_id", None, "project_id"),
             Literal(None, 1),
         ),
         order_by=[
             OrderBy(
                 OrderByDirection.ASC, Column("_snuba_timestamp", None, "timestamp")
             )
         ],
     ),
     id="no orderby clauses",
 ),
 pytest.param(
     LogicalQuery(
         QueryEntity(
             EntityKey.METRICS_COUNTERS,
             get_entity(EntityKey.METRICS_COUNTERS).get_data_model(),
         ),
         selected_columns=[SelectedExpression("value", Column(None, None, "value"))],
         condition=binary_condition(
             BooleanFunctions.AND,
             binary_condition(
Beispiel #10
0
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query:
    def build_selected_expressions(
        raw_expressions: Sequence[Any],
    ) -> List[SelectedExpression]:
        output = []
        for raw_expression in raw_expressions:
            exp = parse_expression(
                tuplify(raw_expression), entity.get_data_model(), set()
            )
            output.append(
                SelectedExpression(
                    # An expression in the query can be a string or a
                    # complex list with an alias. In the second case
                    # we trust the parser to find the alias.
                    name=raw_expression
                    if isinstance(raw_expression, str)
                    else exp.alias,
                    expression=exp,
                )
            )
        return output

    aggregations = []
    for aggregation in body.get("aggregations", []):
        if not isinstance(aggregation, Sequence):
            raise ParsingException(
                (
                    f"Invalid aggregation structure {aggregation}. "
                    "It must be a sequence containing expression, column and alias."
                )
            )
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregations.append(
            SelectedExpression(
                name=alias,
                expression=parse_aggregation(
                    aggregation_function,
                    column_expr,
                    alias,
                    entity.get_data_model(),
                    set(),
                ),
            )
        )

    groupby_clause = build_selected_expressions(to_list(body.get("groupby", [])))

    select_clause = (
        groupby_clause
        + aggregations
        + build_selected_expressions(body.get("selected_columns", []))
    )

    array_join_cols = set()
    arrayjoin = body.get("arrayjoin")
    # TODO: Properly detect all array join columns in all clauses of the query.
    # This is missing an arrayJoin in condition with an alias that is then
    # used in the select.
    if arrayjoin:
        array_join_cols.add(arrayjoin)
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"], entity.get_data_model(), {arrayjoin}
        )
    else:
        array_join_expr = None
        for select_expr in select_clause:
            if isinstance(select_expr.expression, FunctionCall):
                if select_expr.expression.function_name == "arrayJoin":
                    parameters = select_expr.expression.parameters
                    if len(parameters) != 1:
                        raise ParsingException(
                            "arrayJoin(...) only accepts a single parameter."
                        )
                    if isinstance(parameters[0], Column):
                        array_join_cols.add(parameters[0].column_name)
                    else:
                        # We only accepts columns or functions that do not
                        # reference columns. We could not say whether we are
                        # actually arrayjoining on the values of the column
                        # if it is nested in an arbitrary function. But
                        # functions of literals are fine.
                        for e in parameters[0]:
                            if isinstance(e, Column):
                                raise ParsingException(
                                    "arrayJoin(...) cannot contain columns nested in functions."
                                )

    where_expr = parse_conditions_to_expr(
        body.get("conditions", []), entity, array_join_cols
    )
    having_expr = parse_conditions_to_expr(
        body.get("having", []), entity, array_join_cols
    )

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            if match is None:
                raise ParsingException(
                    (
                        f"Invalid Order By clause {orderby}. If the Order By is a string, "
                        "it must respect the format `[-]column`"
                    )
                )
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            if match is None:
                raise ParsingException(
                    (
                        f"Invalid Order By clause {orderby}. If the Order By is an expression, "
                        "the function name must respect the format `[-]func_name`"
                    )
                )
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ParsingException(
                (
                    f"Invalid Order By clause {orderby}. The Clause was neither "
                    "a string nor a function call."
                )
            )
        orderby_parsed = parse_expression(
            tuplify(orderby), entity.get_data_model(), set()
        )
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            )
        )

    return Query(
        body,
        None,
        selected_columns=select_clause,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=[g.expression for g in groupby_clause],
        having=having_expr,
        order_by=orderby_exprs,
        limitby=body.get("limitby"),
        sample=body.get("sample"),
        limit=body.get("limit", None),
        offset=body.get("offset", 0),
        totals=body.get("totals", False),
        granularity=body.get("granularity"),
    )
Beispiel #11
0
              func(column3) DESC
     """.format(conditions=snql_conditions_with_default()),
     Query(
         QueryEntity(EntityKey.EVENTS,
                     get_entity(EntityKey.EVENTS).get_data_model()),
         selected_columns=[
             SelectedExpression("column1",
                                Column("_snuba_column1", None, "column1")),
             SelectedExpression("column2",
                                Column("_snuba_column2", None, "column2")),
         ],
         condition=with_required(),
         groupby=None,
         having=None,
         order_by=[
             OrderBy(OrderByDirection.ASC,
                     Column("_snuba_column1", None, "column1")),
             OrderBy(OrderByDirection.DESC,
                     Column("_snuba_column2", None, "column2")),
             OrderBy(
                 OrderByDirection.DESC,
                 FunctionCall(
                     None, "func",
                     (Column("_snuba_column3", None, "column3"), )),
             ),
         ],
         limit=1000,
     ),
     id="Order by with functions",
 ),
 pytest.param(
     """
Beispiel #12
0
         "orderby": ["column1", "-column2", ["-func", ["column3"]]],
     },
     Query(
         {},
         TableSource("events", ColumnSet([])),
         selected_columns=[
             SelectedExpression("column1", Column("column1", None,
                                                  "column1")),
             SelectedExpression("column2", Column("column2", None,
                                                  "column2")),
         ],
         condition=None,
         groupby=None,
         having=None,
         order_by=[
             OrderBy(OrderByDirection.ASC, Column("column1", None,
                                                  "column1")),
             OrderBy(OrderByDirection.DESC,
                     Column("column2", None, "column2")),
             OrderBy(
                 OrderByDirection.DESC,
                 FunctionCall(None, "func",
                              (Column("column3", None, "column3"), )),
             ),
         ],
     ),
     id="Order by with functions",
 ),
 pytest.param(
     {
         "selected_columns": [],
         "groupby": "column1",
Beispiel #13
0
             ),
             SelectedExpression("c", Column("_snuba_c", None, "c")),
             SelectedExpression("d", Column("_snuba_d", None, "d")),
             SelectedExpression(
                 "2+7",
                 FunctionCall(None, "plus", (Literal(None, 2), Literal(None, 7))),
             ),
         ],
         condition=binary_condition(
             "less", Column("_snuba_a", None, "a"), Literal(None, 3)
         ),
         groupby=[
             Column("_snuba_d", None, "d"),
             FunctionCall(None, "plus", (Literal(None, 2), Literal(None, 7))),
         ],
         order_by=[OrderBy(OrderByDirection.DESC, Column("_snuba_f", None, "f"))],
     ),
     id="Simple complete query with example of parenthesized arithmetic expression in SELECT",
 ),
 pytest.param(
     "MATCH (events) SELECT (2*(4-5)+3), foo(c) AS thing2, c BY d, 2+7 WHERE a<3 ORDER BY f DESC",
     LogicalQuery(
         QueryEntity(
             EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
         ),
         selected_columns=[
             SelectedExpression(
                 "(2*(4-5)+3)",
                 FunctionCall(
                     None,
                     "plus",
Beispiel #14
0
             ),
         ],
         condition=binary_condition(None, "less", Column(None, None, "a"),
                                    Literal(None, 3)),
         groupby=[
             Column(None, None, "d"),
             FunctionCall(
                 None,
                 "plus",
                 (
                     Literal(None, 2),
                     Literal(None, 7),
                 ),
             ),
         ],
         order_by=[OrderBy(OrderByDirection.DESC, Column(None, None, "f"))],
     ),
     id=
     "Simple complete query with example of parenthesized arithmetic expression in COLLECT",
 ),
 pytest.param(
     "MATCH (blah) WHERE time_seen<3 AND last_seen=2 AND c=2 AND d=3 COLLECT a",
     Query(
         {},
         None,
         selected_columns=[
             SelectedExpression(
                 name="a",
                 expression=Column(alias=None,
                                   table_name=None,
                                   column_name="a"),