Beispiel #1
0
def test_type_condition_optimizer() -> None:
    cond1 = binary_condition(
        ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1")
    )

    unprocessed_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                ConditionFunctions.NEQ,
                Column(None, None, "type"),
                Literal(None, "transaction"),
            ),
            cond1,
        ),
    )
    expected_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1),
    )
    TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    FixedStringArrayColumnProcessor(set(["column1", "column2"]),
                                    32).process_query(unprocessed_query,
                                                      HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            Column(None, None, "column2"),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    SliceOfMapOptimizer().process_query(unprocessed_query,
                                        HTTPRequestSettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Beispiel #4
0
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    UUIDArrayColumnProcessor(set(["column1", "column2"
                                  ])).process_query(unprocessed_query,
                                                    HTTPRequestSettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            FunctionCall(
                None,
                "arrayMap",
                (
                    Lambda(
                        None,
                        ("x", ),
                        FunctionCall(
                            None,
                            "replaceAll",
                            (
                                FunctionCall(None, "toString",
                                             (Argument(None, "x"), )),
                                Literal(None, "-"),
                                Literal(None, ""),
                            ),
                        ),
                    ),
                    Column(None, None, "column2"),
                ),
            ),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Beispiel #5
0
def test_hexint_column_processor(unprocessed: Expression,
                                 formatted_value: str) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1"))
        ],
        condition=unprocessed,
    )

    HexIntColumnProcessor(set(["column1"
                               ])).process_query(unprocessed_query,
                                                 HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column1",
            FunctionCall(
                None,
                "lower",
                (FunctionCall(
                    None,
                    "hex",
                    (Column(None, None, "column1"), ),
                ), ),
            ),
        )
    ]

    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Beispiel #6
0
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None:

    query = Query(
        Table(
            table,
            ColumnSet([]),
            final=False,
            sampling_rate=None,
            mandatory_conditions=mand_conditions,
        ),
        None,
        None,
        binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"),
            ),
            binary_condition(
                OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"),
            ),
        ),
    )

    query_ast_copy = copy.deepcopy(query)

    request_settings = HTTPRequestSettings(consistent=True)
    processor = MandatoryConditionApplier()
    processor.process_query(query, request_settings)

    query_ast_copy.add_condition_to_ast(combine_and_conditions(mand_conditions))

    assert query.get_condition_from_ast() == query_ast_copy.get_condition_from_ast()
Beispiel #7
0
def test_query_data_source() -> None:
    """
    Tests using the Query as a data source
    """

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "col1", Column(alias="col1", table_name=None, column_name="col1")
            ),
            SelectedExpression(
                "some_func",
                FunctionCall(
                    "some_func",
                    "f",
                    (Column(alias="col1", table_name=None, column_name="col1"),),
                ),
            ),
            SelectedExpression(
                None, Column(alias="col2", table_name=None, column_name="col2")
            ),
        ],
    )
    assert query.get_columns() == ColumnSet(
        [("col1", Any()), ("some_func", Any()), ("_invalid_alias_2", Any())]
    )
Beispiel #8
0
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected_query = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                Column("the_message", None, "message"),
            ),
        ],
    )

    GroupIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert expected_query.get_selected_columns(
    ) == unprocessed.get_selected_columns()

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(message AS the_message)",
    )

    for idx, column in enumerate(unprocessed.get_selected_columns()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
Beispiel #9
0
def test_event_id_column_format_expressions() -> None:
    unprocessed = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id", Column("the_event_id", None, "event_id")
            ),
        ],
    )
    expected = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id",
                FunctionCall(
                    "the_event_id",
                    "replaceAll",
                    (
                        FunctionCall(
                            None, "toString", (Column(None, None, "event_id"),),
                        ),
                        Literal(None, "-"),
                        Literal(None, ""),
                    ),
                ),
            ),
        ],
    )

    EventIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
Beispiel #10
0
def build_plan(table_name: str, storage_set: StorageSetKey) -> ClickhouseQueryPlan:
    return ClickhouseQueryPlan(
        Query(Table(table_name, ColumnSet([]))),
        SimpleQueryPlanExecutionStrategy(
            get_cluster(storage_set), db_query_processors=[],
        ),
        storage_set,
        plan_query_processors=[],
        db_query_processors=[],
    )
def test_invalid_uuid(unprocessed: Expression) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )

    with pytest.raises(ColumnTypeError):
        UUIDColumnProcessor(set(["column1", "column2"
                                 ])).process_query(unprocessed_query,
                                                   HTTPRequestSettings())
Beispiel #12
0
def test_iterate_over_query() -> None:
    """
    Creates a query with the new AST and iterate over all expressions.
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(ConditionFunctions.EQ, column1,
                                 Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, column2,
                                Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        prewhere=prewhere,
        having=None,
        order_by=[orderby],
    )

    expected_expressions = [
        # selected columns
        column1,
        column2,
        function_1,
        # condition
        column1,
        Literal(None, "1"),
        condition,
        # groupby
        column1,
        column2,
        function_1,
        # order by
        column2,
        function_2,
        # prewhere
        column2,
        Literal(None, "2"),
        prewhere,
    ]

    assert list(query.get_all_expressions()) == expected_expressions
Beispiel #13
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = Query(
        Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
        totals=True,
        limitby=LimitBy(10, Column(None, None, "environment")),
    )

    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = format_query(query, request_settings)

    expected = ("SELECT column1, table1.column2 "
                "FROM my_table FINAL SAMPLE 0.1 "
                "ARRAY JOIN column1 "
                "WHERE eq(column1, 'blabla') "
                "GROUP BY column1, table1.column2 WITH TOTALS "
                "HAVING eq(column1, 123) "
                "ORDER BY column1 ASC "
                "LIMIT 10 BY environment "
                "LIMIT 100 OFFSET 50")

    assert clickhouse_query.get_sql() == expected
Beispiel #14
0
def test_query_parameters() -> None:
    query = Query(
        Table("my_table", ColumnSet([])),
        limitby=(100, "environment"),
        limit=100,
        offset=50,
        totals=True,
        granularity=60,
    )

    assert query.get_limitby() == (100, "environment")
    assert query.get_limit() == 100
    assert query.get_offset() == 50
    assert query.has_totals() is True
    assert query.get_granularity() == 60

    assert query.get_from_clause().table_name == "my_table"
Beispiel #15
0
def test_prewhere(
    query_body: MutableMapping[str, Any],
    keys: Sequence[str],
    new_ast_condition: Optional[Expression],
    new_prewhere_ast_condition: Optional[Expression],
) -> None:
    settings.MAX_PREWHERE_CONDITIONS = 2
    events = get_dataset("events")
    query = parse_query(query_body, events)
    query.set_data_source(TableSource("my_table", ColumnSet([]), None, keys))

    request_settings = HTTPRequestSettings()
    processor = PrewhereProcessor()
    processor.process_query(Query(query), request_settings)

    assert query.get_condition_from_ast() == new_ast_condition
    assert query.get_prewhere_ast() == new_prewhere_ast_condition
Beispiel #16
0
def test_query_experiments() -> None:
    query = Query(
        Table("my_table", ColumnSet([])),
        limitby=LimitBy(100, [
            Column(
                alias=None, table_name="my_table", column_name="environment")
        ]),
        limit=100,
        offset=50,
        granularity=60,
    )

    query.set_experiments({"optimization1": True})
    assert query.get_experiments() == {"optimization1": True}
    assert query.get_experiment_value("optimization1") == True

    assert query.get_experiment_value("optimization2") is None
    query.add_experiment("optimization2", "group1")
    assert query.get_experiment_value("optimization2") == "group1"

    query.set_experiments({"optimization3": 0.5})
    assert query.get_experiments() == {"optimization3": 0.5}
Beispiel #17
0
def test_replace_expression() -> None:
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        prewhere=prewhere,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),))
            )
        ],
        array_join=None,
        condition=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        prewhere=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "2"),
        ),
        having=None,
        order_by=[orderby],
    )

    assert query.get_selected_columns() == expected_query.get_selected_columns()
    assert query.get_condition() == expected_query.get_condition()
    assert query.get_groupby() == expected_query.get_groupby()
    assert query.get_having() == expected_query.get_having()
    assert query.get_orderby() == expected_query.get_orderby()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )
from snuba.query.processors.conditions_enforcer import (
    MandatoryConditionEnforcer,
    OrgIdEnforcer,
    ProjectIdEnforcer,
)
from snuba.request.request_settings import HTTPRequestSettings
from snuba.state import set_config

test_data = [
    pytest.param(
        Query(
            Table("errors", ColumnSet([])),
            selected_columns=[],
            condition=binary_condition(
                BooleanFunctions.AND,
                in_condition(Column(None, None, "project_id"), [Literal(None, 123)]),
                binary_condition(
                    "equals", Column(None, None, "org_id"), Literal(None, 1)
                ),
            ),
        ),
        True,
        id="Valid query. Both mandatory columns are there",
    ),
    pytest.param(
        Query(
            Table("errors", ColumnSet([])),
            selected_columns=[],
            condition=binary_condition(
                BooleanFunctions.AND,
                binary_condition(
Beispiel #19
0
    schema=schema2,
)

merged_columns = ColumnSet([
    ("timestamp", DateTime()),
    ("mismatched1", String(Modifiers(nullable=True))),
    ("mismatched2", String(Modifiers(nullable=True))),
])

test_data = [
    pytest.param(
        Query(
            Table("discover", merged_columns),
            selected_columns=[
                SelectedExpression(
                    name="_snuba_count_unique_sdk_version",
                    expression=FunctionCall(
                        None, "uniq", (Column(None, None, "mismatched1"), )),
                )
            ],
        ),
        Query(
            Table("discover", merged_columns),
            selected_columns=[
                SelectedExpression(
                    name="_snuba_count_unique_sdk_version",
                    expression=FunctionCall(
                        None,
                        "uniq",
                        (FunctionCall(
                            None,
                            "cast",
Beispiel #20
0
import pytest

from snuba.clickhouse.columns import ColumnSet
from snuba.clickhouse.processors import QueryProcessor
from snuba.clickhouse.query import Query
from snuba.query.data_source.simple import Table
from snuba.query.processors.table_rate_limit import TableRateLimit
from snuba.query.query_settings import HTTPQuerySettings
from snuba.state import set_config
from snuba.state.rate_limit import TABLE_RATE_LIMIT_NAME, RateLimitParameters

test_data = [
    pytest.param(
        TableRateLimit(),
        Query(Table("errors_local", ColumnSet([])),
              selected_columns=[],
              condition=None),
        "table_concurrent_limit_transactions_local",
        RateLimitParameters(
            rate_limit_name=TABLE_RATE_LIMIT_NAME,
            bucket="errors_local",
            per_second_limit=5000,
            concurrent_limit=1000,
        ),
        id="Set rate limiter on another table",
    ),
    pytest.param(
        TableRateLimit(),
        Query(Table("errors_local", ColumnSet([])),
              selected_columns=[],
              condition=None),
Beispiel #21
0
 pytest.param(
     Query(
         Table("my_table", ColumnSet([])),
         selected_columns=[
             SelectedExpression("column1", Column(None, None, "column1")),
             SelectedExpression("column2", Column(None, "table1",
                                                  "column2")),
             SelectedExpression("column3", Column("al", None, "column3")),
         ],
         condition=binary_condition(
             "eq",
             lhs=Column("al", None, "column3"),
             rhs=Literal(None, "blabla"),
         ),
         groupby=[
             Column(None, None, "column1"),
             Column(None, "table1", "column2"),
             Column("al", None, "column3"),
             Column(None, None, "column4"),
         ],
         having=binary_condition(
             "eq",
             lhs=Column(None, None, "column1"),
             rhs=Literal(None, 123),
         ),
         order_by=[
             OrderBy(OrderByDirection.ASC, Column(None, None, "column1")),
             OrderBy(OrderByDirection.DESC, Column(None, "table1",
                                                   "column2")),
         ],
     ),
     [