def test_handled_processor_invalid() -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "isHandled",
                    (Column(None, None, "type"),),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled",
    )
    with pytest.raises(InvalidExpressionException):
        processor.process_query(unprocessed, HTTPQuerySettings())
def test_org_rate_limit_processor(unprocessed: Expression,
                                  org_id: int) -> None:
    query = Query(
        QueryEntity(EntityKey.EVENTS, EntityColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    settings = HTTPQuerySettings()

    num_before = len(settings.get_rate_limit_params())
    OrganizationRateLimiterProcessor("org_id").process_query(query, settings)
    assert len(settings.get_rate_limit_params()) == num_before + 1
    rate_limiter = settings.get_rate_limit_params()[-1]
    assert rate_limiter.rate_limit_name == ORGANIZATION_RATE_LIMIT_NAME
    assert rate_limiter.bucket == str(org_id)
    assert rate_limiter.per_second_limit == 1000
    assert rate_limiter.concurrent_limit == 1000
Exemple #3
0
def test_referrer_specified_project(unprocessed: Expression, project_id: int) -> None:
    query = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[SelectedExpression("column2", Column(None, None, "column2"))],
        condition=unprocessed,
    )
    state.set_config("project_referrer_per_second_limit_abusive_delivery_1", 10)
    state.set_config("project_referrer_concurrent_limit_abusive_delivery_1", 10)
    referrer = "abusive_delivery"
    settings = HTTPQuerySettings()
    settings.referrer = referrer

    num_before = len(settings.get_rate_limit_params())
    ProjectReferrerRateLimiter("project_id").process_query(query, settings)
    assert len(settings.get_rate_limit_params()) == num_before + 1
    rate_limiter = settings.get_rate_limit_params()[-1]
    assert rate_limiter.rate_limit_name == PROJECT_REFERRER_RATE_LIMIT_NAME
    assert rate_limiter.bucket == f"{project_id}"
    assert rate_limiter.per_second_limit == 10
    assert rate_limiter.concurrent_limit == 10
def test_handled_processor_invalid() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "isHandled",
                    (Column(None, None, "type"), ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset)
    with pytest.raises(InvalidExpressionException):
        processor.process_query(unprocessed, HTTPRequestSettings())
Exemple #5
0
 def build_selected_expressions(
     raw_expressions: Sequence[Any],
 ) -> List[SelectedExpression]:
     output = []
     for raw_expression in raw_expressions:
         exp = parse_expression(
             tuplify(raw_expression), entity.get_data_model(), set()
         )
         output.append(
             SelectedExpression(
                 # An expression in the query can be a string or a
                 # complex list with an alias. In the second case
                 # we trust the parser to find the alias.
                 name=raw_expression
                 if isinstance(raw_expression, str)
                 else exp.alias,
                 expression=exp,
             )
         )
     return output
Exemple #6
0
def test_project_rate_limit_processor_overridden(
    unprocessed: Expression, project_id: int
) -> None:
    query = Query(
        QueryEntity(EntityKey.EVENTS, EntityColumnSet([])),
        selected_columns=[SelectedExpression("column2", Column(None, None, "column2"))],
        condition=unprocessed,
    )
    settings = HTTPQuerySettings()
    state.set_config(f"project_per_second_limit_{project_id}", 5)
    state.set_config(f"project_concurrent_limit_{project_id}", 10)

    num_before = len(settings.get_rate_limit_params())
    ProjectRateLimiterProcessor("project_id").process_query(query, settings)
    assert len(settings.get_rate_limit_params()) == num_before + 1
    rate_limiter = settings.get_rate_limit_params()[-1]
    assert rate_limiter.rate_limit_name == PROJECT_RATE_LIMIT_NAME
    assert rate_limiter.bucket == str(project_id)
    assert rate_limiter.per_second_limit == 5
    assert rate_limiter.concurrent_limit == 10
Exemple #7
0
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                Column("the_message", None, "message"),
            ),
        ],
    )

    EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(message AS the_message)",
    )

    for idx, column in enumerate(
            unprocessed.get_selected_columns_from_ast()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
def test_invalid_datetime() -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
        ],
        condition=binary_condition(
            ConditionFunctions.EQ,
            Column("my_time", None, "time"),
            Literal(None, ""),
        ),
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            with pytest.raises(InvalidQueryException):
                processor.process_query(unprocessed, HTTPRequestSettings())
def test_add_equivalent_condition(
    initial_condition: Expression,
    join_clause: JoinClause[EntitySource],
    expected_expr: Expression,
) -> None:
    ENTITY_IMPL[EntityKey.EVENTS] = Events()
    ENTITY_IMPL[EntityKey.GROUPEDMESSAGES] = GroupedMessage()

    query = CompositeQuery(
        from_clause=join_clause,
        selected_columns=[
            SelectedExpression(
                "group_id",
                FunctionCall("something", "f", (Column(None, "gr", "id"), )))
        ],
        condition=initial_condition,
    )
    add_equivalent_conditions(query)
    assert query.get_condition() == expected_expr

    ENTITY_IMPL.clear()
Exemple #10
0
def test_selector_function(
    time_condition: Expression,
    beginning_of_time: Optional[datetime],
    exec_both: float,
    trust_secondary: float,
    expected_value: Tuple[str, List[str]],
) -> None:
    query = Query(
        QueryEntity(EntityKey.EVENTS, EntityColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=time_condition,
    )

    set_config("rollout_upgraded_errors_trust", trust_secondary)
    set_config("rollout_upgraded_errors_execute", exec_both)

    previous_time = settings.ERRORS_UPGRADE_BEGINING_OF_TIME
    settings.ERRORS_UPGRADE_BEGINING_OF_TIME = beginning_of_time

    assert v2_selector_function(query, "test") == expected_value

    settings.ERRORS_UPGRADE_BEGINING_OF_TIME = previous_time
test_cases = [
    (
        "not promoted",
        ClickhouseQuery(
            Table("events", columns),
            selected_columns=[
                SelectedExpression(
                    "tags[foo]",
                    FunctionCall(
                        "tags[foo]",
                        "arrayValue",
                        (
                            Column(None, None, "tags.value"),
                            FunctionCall(
                                None,
                                "indexOf",
                                (
                                    Column(None, None, "tags.key"),
                                    Literal(None, "foo"),
                                ),
                            ),
                        ),
                    ),
                )
            ],
        ),
        ClickhouseQuery(
            Table("events", columns),
            selected_columns=[
                SelectedExpression(
                    "tags[foo]",
)
from snuba.query.query_settings import HTTPQuerySettings
from snuba.query.validation.signature import Column as ColType

QUERY_ENTITY = QueryEntity(
    EntityKey.EVENTS,
    ColumnSet([("param1", String()), ("param2", UInt(8)),
               ("other_col", String())]),
)

TEST_CASES = [
    pytest.param(
        Query(
            QUERY_ENTITY,
            selected_columns=[
                SelectedExpression("column1", Column("column1", None,
                                                     "column1")),
            ],
            groupby=[Column("column1", None, "column1")],
            condition=binary_condition(
                "equals",
                FunctionCall("group_id", "f",
                             (Column("something", None, "something"), )),
                Literal(None, 1),
            ),
        ),
        Query(
            QUERY_ENTITY,
            selected_columns=[
                SelectedExpression("column1", Column("column1", None,
                                                     "column1")),
            ],
Exemple #13
0

span_processor_tests = [
    pytest.param(
        build_query(),
        [],
        None,
        id="no spans columns in select clause",
    ),
    pytest.param(
        build_query(selected_columns=[
            spans_op_col, spans_group_col, spans_exclusive_time_col
        ]),
        [
            SelectedExpression(
                "spans_op",
                tupleElement("spans_op", array_join_col(), Literal(None, 1))),
            SelectedExpression(
                "spans_group",
                tupleElement("spans_group", array_join_col(), Literal(None,
                                                                      2)),
            ),
            SelectedExpression(
                "spans_exclusive_time",
                tupleElement("spans_exclusive_time", array_join_col(),
                             Literal(None, 3)),
            ),
        ],
        None,
        id="simple array join with all op, group, exclusive_time",
    ),
Exemple #14
0
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
    ],
    array_join=Column(None, None, "col"),
    condition=binary_condition("equals", Column(None, None, "c4"),
                               Literal(None, "asd")),
    groupby=[Column(None, "t", "c4")],
    having=binary_condition("equals", Column(None, None, "c6"),
                            Literal(None, "asd2")),
    order_by=[OrderBy(OrderByDirection.ASC, Column(None, "t", "c"))],
    limitby=LimitBy(100, Column(None, None, "c8")),
    limit=150,
)

SIMPLE_FORMATTED = {
Exemple #15
0
def test_transform_column_names() -> None:
    """
    Runs a simple query containing selected expressions names that
    do not match the aliases of the expressions themselves.
    It verifies that the names of the columns in the result correspond
    to the SelectedExpression names and not to the expression aliases
    (which are supposed to be internal).
    """
    events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()

    event_id = uuid.uuid4().hex

    event_date = datetime.utcnow()
    write_unprocessed_events(
        events_storage,
        [
            InsertEvent(
                {
                    "event_id": event_id,
                    "group_id": 10,
                    "primary_hash": uuid.uuid4().hex,
                    "project_id": 1,
                    "message": "a message",
                    "platform": "python",
                    "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT),
                    "data": {"received": time.time()},
                    "organization_id": 1,
                    "retention_days": settings.DEFAULT_RETENTION_DAYS,
                }
            )
        ],
    )

    query = Query(
        Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()),
        selected_columns=[
            # The selected expression names are those provided by the
            # user in the query and those the user expect in the response.
            # Aliases will be internal names to prevent shadowing.
            SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")),
            SelectedExpression(
                "message",
                FunctionCall(
                    "_snuba_message",
                    "ifNull",
                    (Column(None, None, "message"), Literal(None, "")),
                ),
            ),
        ],
    )
    query_settings = HTTPRequestSettings()
    apply_query_extensions(
        query,
        {
            "timeseries": {
                "from_date": (event_date - timedelta(minutes=5)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "to_date": (event_date + timedelta(minutes=1)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "granularity": 3600,
            },
            "project": {"project": [1]},
        },
        query_settings,
    )

    dataset = get_dataset("events")
    timer = Timer("test")

    result = parse_and_run_query(
        dataset,
        Request(
            id="asd", body={}, query=query, settings=query_settings, referrer="asd",
        ),
        timer,
    )

    data = result.result["data"]
    assert data == [{"event_id": event_id, "message": "a message"}]
    meta = result.result["meta"]

    assert meta == [
        MetaColumn(name="event_id", type="String"),
        MetaColumn(name="message", type="String"),
    ]
Exemple #16
0
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.logical import Query
from snuba.utils.schemas import Column as EntityColumn
from snuba.web.query import ProjectsFinder

EVENTS_SCHEMA = EntityColumnSet([
    EntityColumn("event_id", UUID()),
    EntityColumn("project_id", UInt(32)),
    EntityColumn("group_id", UInt(32)),
])

SIMPLE_QUERY = Query(
    Entity(EntityKey.EVENTS, EVENTS_SCHEMA),
    selected_columns=[
        SelectedExpression(
            "alias",
            Column("_snuba_project", None, "project_id"),
        )
    ],
    array_join=None,
    condition=binary_condition(
        ConditionFunctions.IN,
        Column("_snuba_project", None, "project_id"),
        FunctionCall(None, "tuple", (Literal(None, 1), Literal(None, 2))),
    ),
)

TEST_CASES = [
    pytest.param(
        SIMPLE_QUERY,
        {1, 2},
        id="Simple Query",
            Column("_snuba_project_id", None, "project_id"),
            Literal(None, 3),
        ),
        binary_condition(
            ConditionFunctions.IN,
            Column("_snuba_project_id", None, "project_id"),
            FunctionCall(None, "array", (Literal(None, 4), Literal(None, 5))),
        ),
    ),
]

queries = [
    Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=c,
    ) for c in conditions
]


def test_project_rate_limit_processor() -> None:

    settings = HTTPQuerySettings(referrer="foo")

    num_before = len(settings.get_rate_limit_params())
    for query in queries:
        ReferrerRateLimiterProcessor().process_query(query, settings)
    # if the limiter is not configured, do not apply it
    assert len(settings.get_rate_limit_params()) == num_before
Exemple #18
0
def generate_subqueries(query: CompositeQuery[Entity]) -> None:
    """
    Generates correct subqueries for each of the entities referenced in
    a join query, and pushes down all expressions that can be executed
    in the subquery.

    Columns in the select clause of the subqueries are referenced
    by providing them a mangled alias that is referenced in the external
    query.

    ```
    SELECT e.a, f(g.b) FROM Events e INNER JOIN Groups g ON ...
    ```

    becomes

    ```
    SELECT e._snuba_a, g._snuba_b
    FROM (
        SELECT a as _snuba_a
        FROM events
    ) e INNER JOIN (
        SELECT f(b) as _snuba_b
        FROM groups
    ) g ON ....
    ```

    Conditions are treated differently compared to other expressions. If
    a condition is entirely contained in a single subquery, we push it
    down entirely in the condition clause of the subquery and remove it
    from the main query entirely.
    """

    from_clause = query.get_from_clause()
    if isinstance(from_clause, CompositeQuery):
        generate_subqueries(from_clause)
        return
    elif isinstance(from_clause, ProcessableQuery):
        return

    # Now this has to be a join, so we can work with it.
    subqueries = from_clause.accept(SubqueriesInitializer())

    alias_generator = _alias_generator()
    query.set_ast_selected_columns([
        SelectedExpression(
            name=s.name,
            expression=_process_root(s.expression, subqueries,
                                     alias_generator),
        ) for s in query.get_selected_columns()
    ])

    array_join = query.get_arrayjoin()
    if array_join is not None:
        query.set_arrayjoin([
            _process_root(el, subqueries, alias_generator) for el in array_join
        ])

    ast_condition = query.get_condition()
    if ast_condition is not None:
        main_conditions = []
        for c in get_first_level_and_conditions(ast_condition):
            subexpression = c.accept(BranchCutter(alias_generator))
            if isinstance(subexpression, SubqueryExpression):
                # The expression is entirely contained in a single subquery
                # after we tried to cut subquery branches with the
                # BranchCutter visitor.
                # so push down the entire condition and remove it from
                # the main query.
                subqueries[subexpression.subquery_alias].add_condition(
                    subexpression.main_expression)
            else:
                # This condition has references to multiple subqueries.
                # We cannot push down the condition. We push down the
                # branches into the select clauses and we reference them
                # from the main query condition.
                main_conditions.append(
                    _push_down_branches(subexpression, subqueries,
                                        alias_generator))

        if main_conditions:
            query.set_ast_condition(combine_and_conditions(main_conditions))
        else:
            query.set_ast_condition(None)

    # TODO: push down the group by when it is the same as the join key.
    query.set_ast_groupby([
        _process_root(e, subqueries, alias_generator)
        for e in query.get_groupby()
    ])

    having = query.get_having()
    if having is not None:
        query.set_ast_having(
            combine_and_conditions([
                _process_root(c, subqueries, alias_generator)
                for c in get_first_level_and_conditions(having)
            ]))

    query.set_ast_orderby([
        replace(
            orderby,
            expression=_process_root(orderby.expression, subqueries,
                                     alias_generator),
        ) for orderby in query.get_orderby()
    ])

    limitby = query.get_limitby()
    if limitby is not None:
        query.set_limitby(
            replace(
                limitby,
                columns=[
                    _process_root(
                        column,
                        subqueries,
                        alias_generator,
                    ) for column in limitby.columns
                ],
            ))

    query.set_from_clause(
        SubqueriesReplacer(subqueries).visit_join_clause(from_clause))
Exemple #19
0
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

SIMPLE_SELECT_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_simple", "simple_t",
                                        "simple_c")),
    ],
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
    ],
    array_join=Column(None, None, "col"),
    condition=binary_condition("equals", Column(None, None, "c4"),
                               Literal(None, "asd")),
    groupby=[Column(None, "t", "c4")],
    having=binary_condition("equals", Column(None, None, "c6"),
 """MATCH {
     MATCH (events)
     SELECT count() AS count BY title
     WHERE project_id=1
     AND timestamp>=toDateTime('2021-01-01T00:30:00')
     AND timestamp<toDateTime('2021-01-20T00:30:00')
 }
 SELECT max(count) AS max_count""",
 CompositeQuery(
     from_clause=LogicalQuery(
         QueryEntity(
             EntityKey.EVENTS,
             get_entity(EntityKey.EVENTS).get_data_model(),
         ),
         selected_columns=[
             SelectedExpression("title",
                                Column("_snuba_title", None, "title")),
             SelectedExpression(
                 "count", FunctionCall("_snuba_count", "count",
                                       tuple())),
         ],
         groupby=[Column("_snuba_title", None, "title")],
         condition=binary_condition(
             "and",
             binary_condition(
                 "equals",
                 Column("_snuba_project_id", None, "project_id"),
                 Literal(None, 1),
             ),
             binary_condition(
                 "and",
                 binary_condition(
Exemple #21
0
from snuba.query.data_source.simple import Table
from snuba.query.expressions import (
    Column,
    FunctionCall,
    Literal,
    SubscriptableReference,
)
from snuba.query.logical import Query as SnubaQuery

test_cases = [
    pytest.param(
        TranslationMappers(),
        SnubaQuery(
            from_clause=QueryEntity(EntityKey.EVENTS, ColumnSet([])),
            selected_columns=[
                SelectedExpression("alias", Column("alias", "table", "column")),
                SelectedExpression(
                    "alias2",
                    FunctionCall(
                        "alias2",
                        "f1",
                        (Column(None, None, "column2"), Column(None, None, "column3")),
                    ),
                ),
                SelectedExpression(
                    name=None,
                    expression=SubscriptableReference(
                        None, Column(None, None, "tags"), Literal(None, "myTag")
                    ),
                ),
            ],
Exemple #22
0
)


class EntityKeySubscription(EntitySubscriptionValidation, EntitySubscription):
    ...


tests = [
    pytest.param(
        LogicalQuery(
            QueryEntity(
                EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
            ),
            selected_columns=[
                SelectedExpression(
                    "time", Column("_snuba_timestamp", None, "timestamp")
                ),
            ],
            condition=binary_condition(
                "equals",
                Column("_snuba_project_id", None, "project_id"),
                Literal(None, 1),
            ),
        ),
        id="no extra clauses",
    ),
    pytest.param(
        LogicalQuery(
            QueryEntity(
                EntityKey.METRICS_COUNTERS,
                get_entity(EntityKey.METRICS_COUNTERS).get_data_model(),
Exemple #23
0
def test_replace_expression() -> None:
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        prewhere=prewhere,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),))
            )
        ],
        array_join=None,
        condition=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        prewhere=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "2"),
        ),
        having=None,
        order_by=[orderby],
    )

    assert query.get_selected_columns() == expected_query.get_selected_columns()
    assert query.get_condition() == expected_query.get_condition()
    assert query.get_groupby() == expected_query.get_groupby()
    assert query.get_having() == expected_query.get_having()
    assert query.get_orderby() == expected_query.get_orderby()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )
Exemple #24
0
    ]
)
GROUPS_SCHEMA = ColumnSet(
    [
        ("id", UInt(32)),
        ("project_id", UInt(32)),
        ("group_id", UInt(32)),
        ("message", String()),
    ]
)

SIMPLE_QUERY = ClickhouseQuery(
    Table("errors_local", ERRORS_SCHEMA, final=True, sampling_rate=0.1),
    selected_columns=[
        SelectedExpression(
            "alias",
            FunctionCall("alias", "something", (Column(None, None, "event_id"),)),
        ),
        SelectedExpression(
            "group_id",
            Column(None, None, "group_id"),
        ),
    ],
    array_join=None,
    condition=binary_condition(
        ConditionFunctions.EQ,
        FunctionCall("alias", "tag", (Column(None, None, "group_id"),)),
        Literal(None, "1"),
    ),
    groupby=[FunctionCall("alias", "tag", (Column(None, None, "message"),))],
    prewhere=binary_condition(
        ConditionFunctions.EQ,
Exemple #25
0
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[],
        ),
        CompositeQuery(
            from_clause=events_groups_join(
                events_node([
                    SelectedExpression(
                        "_snuba_group_id",
                        Column("_snuba_group_id", None, "group_id"),
                    ),
                ]),
                groups_node([
                    SelectedExpression("_snuba_id",
                                       Column("_snuba_id", None, "id"))
                ], ),
            ),
            selected_columns=[],
        ),
        id="Basic join",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[
Exemple #26
0
def test_apdex_format_expressions() -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                FunctionCall(
                    "perf", "apdex", (Column(None, None, "column1"), Literal(None, 300))
                ),
            ),
        ],
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    plus(
                        FunctionCall(
                            None,
                            "countIf",
                            (
                                binary_condition(
                                    ConditionFunctions.LTE,
                                    Column(None, None, "column1"),
                                    Literal(None, 300),
                                ),
                            ),
                        ),
                        divide(
                            FunctionCall(
                                None,
                                "countIf",
                                (
                                    binary_condition(
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            ConditionFunctions.GT,
                                            Column(None, None, "column1"),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            ConditionFunctions.LTE,
                                            Column(None, None, "column1"),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                            ),
                            Literal(None, 2),
                        ),
                    ),
                    FunctionCall(
                        None,
                        "count",
                        (),
                    ),
                    "perf",
                ),
            ),
        ],
    )

    apdex_processor().process_query(unprocessed, HTTPQuerySettings())
    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(plus(countIf(lessOrEquals(column1, 300)), "
        "divide(countIf(greater(column1, 300) AND "
        "lessOrEquals(column1, multiply(300, 4))), 2)), count()) AS perf)"
    )
node_err = IndividualNode(alias="err",
                          data_source=Table("errors_local", ERRORS_SCHEMA))
node_group = IndividualNode(alias="groups",
                            data_source=Table("groupedmessage_local",
                                              GROUPS_SCHEMA))
node_assignee = IndividualNode(alias="assignee",
                               data_source=Table("groupassignee_local",
                                                 GROUPS_ASSIGNEE))

test_cases = [
    pytest.param(
        Query(
            Table("my_table", ColumnSet([])),
            selected_columns=[
                SelectedExpression("column1", Column(None, None, "column1")),
                SelectedExpression("column2", Column(None, "table1",
                                                     "column2")),
                SelectedExpression("column3", Column("al", None, "column3")),
            ],
            condition=binary_condition(
                "eq",
                lhs=Column("al", None, "column3"),
                rhs=Literal(None, "blabla"),
            ),
            groupby=[
                Column(None, None, "column1"),
                Column(None, "table1", "column2"),
                Column("al", None, "column3"),
                Column(None, None, "column4"),
            ],
    assert get_filtered_mapping_keys(query, "tags") == expected_result


test_data = [
    pytest.param(
        {
            "aggregations": [],
            "groupby": [],
            "selected_columns": ["col1"],
            "conditions": [["tags_key", "IN", ["t1", "t2"]]],
        },
        ClickhouseQuery(
            None,
            selected_columns=[
                SelectedExpression(name="col1",
                                   expression=Column("_snuba_col1", None,
                                                     "col1"))
            ],
            condition=in_condition(
                arrayJoin("_snuba_tags_key", Column(None, None, "tags.key")),
                [Literal(None, "t1"), Literal(None, "t2")],
            ),
        ),
        id="no tag in select clause",
    ),  # Individual tag, no change
    pytest.param(
        {
            "aggregations": [],
            "groupby": [],
            "selected_columns": ["tags_key", "tags_value"],
            "conditions": [["col", "IN", ["t1", "t2"]]],
Exemple #29
0
groups_table = Table(
    groups_storage.get_schema().get_table_name(),
    groups_storage.get_schema().get_columns(),
    final=False,
    sampling_rate=None,
    mandatory_conditions=groups_storage.get_schema().get_data_source().
    get_mandatory_conditions(),
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=LogicalQuery(
                from_clause=events_ent,
                selected_columns=[
                    SelectedExpression("project_id",
                                       Column(None, None, "project_id")),
                    SelectedExpression(
                        "count_environment",
                        FunctionCall(
                            "count_environment",
                            "uniq",
                            (SubscriptableReference(
                                None,
                                Column(None, None, "tags"),
                                Literal(None, "environment"),
                            ), ),
                        ),
                    ),
                ],
                groupby=[Column(None, None, "project_id")],
                condition=binary_condition(
Exemple #30
0
from snuba.datasets.entities import EntityKey
from snuba.query import SelectedExpression
from snuba.query.data_source.simple import Entity as QueryEntity
from snuba.query.expressions import Column, CurriedFunctionCall, FunctionCall, Literal
from snuba.query.logical import Query
from snuba.query.processors.basic_functions import BasicFunctionsProcessor
from snuba.query.query_settings import HTTPQuerySettings

test_data = [
    (
        Query(
            QueryEntity(EntityKey.EVENTS, ColumnSet([])),
            selected_columns=[
                SelectedExpression(
                    "alias",
                    FunctionCall("alias", "uniq",
                                 (Column(None, None, "column1"), )),
                ),
                SelectedExpression(
                    "alias2",
                    FunctionCall("alias2", "emptyIfNull",
                                 (Column(None, None, "column2"), )),
                ),
            ],
        ),
        Query(
            QueryEntity(EntityKey.EVENTS, ColumnSet([])),
            selected_columns=[
                SelectedExpression(
                    "alias",
                    FunctionCall(