Example #1
0
         "event_id",
         "group_id",
         "tags[sentry:release]",
         "message",
         "environment",
         "project_id",
     ],
     [],
     None,
     [[["positionCaseInsensitive", ["message", "'abc'"]], "!=", 0]],
     FunctionCall(
         None,
         OPERATOR_TO_FUNCTION["!="],
         (
             FunctionCall(
                 None,
                 "positionCaseInsensitive",
                 (Column("message", None, "message"), Literal(None, "abc")),
             ),
             Literal(None, 0),
         ),
     ),
 ),
 (
     # Add pre-where condition in the expected order
     {
         "conditions": [
             ["d", "=", "1"],
             ["c", "=", "3"],
             ["a", "=", "1"],
             ["b", "=", "2"],
         ],
        None,
        selected_columns=[
            SelectedExpression(name=s.alias, expression=s)
            for s in selected_columns or []
        ],
        condition=condition,
        having=having,
    )


tags_filter_tests = [
    pytest.param(
        build_query(selected_columns=[
            FunctionCall(
                "tags_key",
                "arrayJoin",
                (Column(None, None, "tags.key"), ),
            ),
        ], ),
        set(),
        id="no tag filter",
    ),
    pytest.param(
        build_query(
            selected_columns=[
                FunctionCall(
                    "tags_key",
                    "arrayJoin",
                    (Column(None, None, "tags.key"), ),
                ),
            ],
Example #3
0
import pytest

from snuba.query.expressions import (
    Column,
    CurriedFunctionCall,
    FunctionCall,
    Literal,
)
from snuba.query.parser.expressions import parse_aggregation

test_data = [
    (
        ["count", "event_id", None],
        FunctionCall(None, "count", (Column(None, "event_id", None),)),
    ),  # Simple aggregation
    (
        ["count()", "", None],
        FunctionCall(None, "count", ()),
    ),  # Common way to provide count()
    (
        ["count()", None, None],
        FunctionCall(None, "count", ()),
    ),  # Common way to provide count()
    (
        ["count()", "event_id", None],
        CurriedFunctionCall(
            None, FunctionCall(None, "count", ()), (Column(None, "event_id", None),)
        ),
    ),  # This is probably wrong, but we cannot disambiguate it at this level
    (
        ["uniq", "platform", "uniq_platforms"],
Example #4
0
    ("message", String()),
    ("group_id", UInt(32)),
])
GROUPS_SCHEMA = ColumnSet([
    ("id", UInt(32)),
    ("project_id", UInt(32)),
    ("group_id", UInt(32)),
    ("message", String()),
])

SIMPLE_QUERY = ClickhouseQuery(
    Table("errors_local", ERRORS_SCHEMA, final=True, sampling_rate=0.1),
    selected_columns=[
        SelectedExpression(
            "alias",
            FunctionCall("alias", "something",
                         (Column(None, None, "event_id"), )),
        ),
        SelectedExpression(
            "group_id",
            Column(None, None, "group_id"),
        ),
    ],
    array_join=None,
    condition=binary_condition(
        ConditionFunctions.EQ,
        FunctionCall("alias", "tag", (Column(None, None, "group_id"), )),
        Literal(None, "1"),
    ),
    groupby=[FunctionCall("alias", "tag", (Column(None, None, "message"), ))],
    prewhere=binary_condition(
        ConditionFunctions.EQ,
Example #5
0
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.querylog.query_metadata import ClickhouseQueryProfile, FilterProfile
from snuba.state import safe_dumps

test_cases = [
    pytest.param(
        ClickhouseQuery(
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression("column2", Column("column2", None,
                                                     "column2")),
                SelectedExpression(
                    "something",
                    FunctionCall(
                        "something",
                        "arrayJoin",
                        (Column(None, None, "contexts.key"), ),
                    ),
                ),
            ],
            condition=binary_condition(
                None,
                BooleanFunctions.AND,
                binary_condition(
                    None,
                    ConditionFunctions.GTE,
                    Column(None, None, "timestamp"),
                    Literal(None, datetime(2020, 8, 1)),
                ),
                binary_condition(
                    None,
Example #6
0
 def transform(match: MatchResult, exp: Expression) -> Expression:
     assert isinstance(exp, Column)  # mypy
     return FunctionCall(None, "nullIf", (
         Column(None, None, exp.column_name),
         Literal(None, ""),
     ))
Example #7
0
def unary_condition(
    alias: Optional[str], function_name: str, operand: Expression
) -> FunctionCall:
    return FunctionCall(alias, function_name, (operand,))
Example #8
0
def multiply(lhs: Expression,
             rhs: Expression,
             alias: Optional[str] = None) -> FunctionCall:
    return FunctionCall(alias, "multiply", (lhs, rhs))
Example #9
0
def div(lhs: Expression,
        rhs: Expression,
        alias: Optional[str] = None) -> FunctionCall:
    return FunctionCall(alias, "div", (lhs, rhs))
Example #10
0
def identity(expression: Expression, alias: Optional[str]) -> FunctionCall:
    return FunctionCall(alias, "identity", (expression, ))
Example #11
0
def minus(lhs: Expression,
          rhs: Expression,
          alias: Optional[str] = None) -> FunctionCall:
    return FunctionCall(alias, "minus", (lhs, rhs))
Example #12
0
def tupleElement(alias: Optional[str], tuple_expr: Expression,
                 index: Expression) -> FunctionCall:
    return FunctionCall(alias, "tupleElement", (tuple_expr, index))
Example #13
0
def arrayJoin(alias: Optional[str], content: Expression) -> Expression:
    return FunctionCall(alias, "arrayJoin", (content, ))
Example #14
0
def arrayElement(alias: Optional[str], array: Expression,
                 index: Expression) -> FunctionCall:
    return FunctionCall(alias, "arrayElement", (array, index))
Example #15
0
def unary_condition(function_name: str, operand: Expression) -> FunctionCall:
    return FunctionCall(None, function_name, (operand, ))
Example #16
0
def count(column: Optional[Column] = None, alias: Optional[str] = None):
    return FunctionCall(alias, "count", (column, ) if column else ())
Example #17
0
         QueryEntity(EntityKey.EVENTS, ColumnSet([])),
         selected_columns=[
             SelectedExpression(name=None,
                                expression=Column(None, None, "column1")),
             SelectedExpression(name=None,
                                expression=Column(None, None, "column2")),
         ],
     ),
     Query(
         QueryEntity(EntityKey.EVENTS, ColumnSet([])),
         selected_columns=[
             SelectedExpression(
                 name=None,
                 expression=FunctionCall(
                     None,
                     "nullIf",
                     (Column(None, None, "column1"), Literal(None, "")),
                 ),
             ),
             SelectedExpression(name=None,
                                expression=Column(None, None, "column2")),
         ],
     ),
     id="replace unaliased column",
 ),
 pytest.param(
     Query(
         QueryEntity(EntityKey.EVENTS, ColumnSet([])),
         selected_columns=[
             SelectedExpression(name=None,
                                expression=Column("some_alias", None,
Example #18
0
def literals_tuple(alias: Optional[str],
                   literals: Sequence[Literal]) -> FunctionCall:
    return FunctionCall(alias, "tuple", tuple(literals))
Example #19
0
def binary_condition(
    alias: Optional[str], function_name: str, lhs: Expression, rhs: Expression
) -> FunctionCall:
    return FunctionCall(alias, function_name, (lhs, rhs))
Example #20
0
def test_events_boolean_context() -> None:
    columns = ColumnSet([
        ("device_charging", UInt(8, Modifier(nullable=True))),
        ("contexts", Nested([("key", String()), ("value", String())])),
    ])
    query = ClickhouseQuery(
        TableSource("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "device.charging"),
                            ),
                        ),
                    ),
                ),
            )
        ],
    )

    expected = ClickhouseQuery(
        TableSource("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "multiIf",
                    (
                        binary_condition(
                            None,
                            ConditionFunctions.EQ,
                            FunctionCall(
                                None,
                                "toString",
                                (Column(None, None, "device_charging"), ),
                            ),
                            Literal(None, ""),
                        ),
                        Literal(None, ""),
                        binary_condition(
                            None,
                            ConditionFunctions.IN,
                            FunctionCall(
                                None,
                                "toString",
                                (Column(None, None, "device_charging"), ),
                            ),
                            literals_tuple(
                                None,
                                [Literal(None, "1"),
                                 Literal(None, "True")]),
                        ),
                        Literal(None, "True"),
                        Literal(None, "False"),
                    ),
                ),
            )
        ],
    )

    settings = HTTPRequestSettings()
    MappingColumnPromoter({
        "contexts": {
            "device.charging": "device_charging"
        }
    }).process_query(query, settings)
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert (query.get_selected_columns_from_ast() ==
            expected.get_selected_columns_from_ast())
Example #21
0
from snuba.query.processors.quota_processor import ResourceQuotaProcessor
from snuba.query.processors.tags_expander import TagsExpanderProcessor
from snuba.query.processors.timeseries_processor import TimeSeriesProcessor
from snuba.query.query_settings import QuerySettings
from snuba.query.validation.validators import EntityRequiredColumnValidator

transaction_translator = TranslationMappers(
    columns=[
        ColumnToFunction(
            None,
            "ip_address",
            "coalesce",
            (
                FunctionCall(
                    None,
                    "IPv4NumToString",
                    (Column(None, None, "ip_address_v4"), ),
                ),
                FunctionCall(
                    None,
                    "IPv6NumToString",
                    (Column(None, None, "ip_address_v6"), ),
                ),
            ),
        ),
        ColumnToFunction(None, "user", "nullIf",
                         (Column(None, None, "user"), Literal(None, ""))),
        # These column aliases originally existed in the ``discover`` dataset,
        # but now live here to maintain compatibility between the composite
        # ``discover`` dataset and the standalone ``transaction`` dataset. In
        # the future, these aliases should be defined on the Transaction entity
Example #22
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder]
        if settings.ERRORS_ROLLOUT_ALL:
            storage = discover_storage
            sampled_pipeline_builder = SampledSimplePipelineBuilder(
                query_plan_builder=discover_storage_plan_builder)

            pipeline_builder = PipelineDelegator(
                query_pipeline_builders={
                    "primary": discover_pipeline_builder,
                    "sampler": sampled_pipeline_builder,
                },
                selector_func=sampling_selector_func,
                callback_func=sampling_callback_func,
            )
        else:
            storage = events_storage
            pipeline_builder = events_pipeline_builder

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Example #23
0
def test_impact_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            FunctionCall(
                "perf",
                "impact",
                (
                    Column(None, "column1", None),
                    Literal(None, 300),
                    Column(None, "user", None),
                ),
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            plus(
                minus(
                    Literal(None, 1),
                    div(
                        plus(
                            countIf(
                                binary_condition(
                                    None,
                                    ConditionFunctions.LTE,
                                    Column(None, "column1", None),
                                    Literal(None, 300),
                                ),
                            ),
                            div(
                                countIf(
                                    binary_condition(
                                        None,
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            None,
                                            ConditionFunctions.GT,
                                            Column(None, "column1", None),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            None,
                                            ConditionFunctions.LTE,
                                            Column(None, "column1", None),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                                Literal(None, 2),
                            ),
                        ),
                        count(),
                    ),
                ),
                multiply(
                    minus(
                        Literal(None, 1),
                        div(
                            Literal(None, 1),
                            FunctionCall(
                                None,
                                "sqrt",
                                (
                                    FunctionCall(
                                        None,
                                        "uniq",
                                        Column(
                                            alias=None,
                                            column_name="user",
                                            table_name=None,
                                        ),
                                    )
                                ),
                            ),
                        ),
                    ),
                    Literal(None, 3),
                ),
            ),
        ],
    )

    ImpactProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), "
        "div(countIf(and(greater(column1, 300), lessOrEquals(column1, "
        "multiply(300, 4)))), 2)), count())), "
        "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))"
    )
Example #24
0
from snuba.clickhouse.columns import ColumnSet
from snuba.clickhouse.formatter import ClickhouseExpressionFormatter
from snuba.datasets.schemas.tables import TableSource
from snuba.datasets.transactions import TransactionsDataset
from snuba.query.dsl import multiply
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.logical import Query, SelectedExpression
from snuba.query.processors.timeseries_column_processor import TimeSeriesColumnProcessor
from snuba.request.request_settings import HTTPRequestSettings

tests = [
    (
        3600,
        FunctionCall(
            "my_time",
            "toStartOfHour",
            (Column(None, None, "finish_ts"), Literal(None, "Universal")),
        ),
        "(toStartOfHour(finish_ts, 'Universal') AS my_time)",
    ),
    (
        60,
        FunctionCall(
            "my_time",
            "toStartOfMinute",
            (Column(None, None, "finish_ts"), Literal(None, "Universal")),
        ),
        "(toStartOfMinute(finish_ts, 'Universal') AS my_time)",
    ),
    (
        86400,
Example #25
0
         "event_id",
         "group_id",
         "tags[sentry:release]",
         "message",
         "environment",
         "project_id",
     ],
     [],
     None,
     FunctionCall(
         None,
         OPERATOR_TO_FUNCTION["!="],
         (
             FunctionCall(
                 None,
                 "positionCaseInsensitive",
                 (Column("_snuba_message", None,
                         "message"), Literal(None, "abc")),
             ),
             Literal(None, 0),
         ),
     ),
     False,
 ),
 (
     # Add pre-where condition in the expected order
     {
         "conditions": [
             ["d", "=", "1"],
             ["c", "=", "3"],
             [["and", [["equals", ["a", "'1'"]], ["equals", ["b", "'2'"]]]],
Example #26
0
         "groupby": ["column2", "column3"],
         "aggregations": [["test_func", "column4", "test_func_alias"]],
     },
     Query(
         {},
         TableSource("events", ColumnSet([])),
         selected_columns=[
             SelectedExpression("column2", Column("column2", None,
                                                  "column2")),
             SelectedExpression("column3", Column("column3", None,
                                                  "column3")),
             SelectedExpression(
                 "test_func_alias",
                 FunctionCall(
                     "test_func_alias",
                     "test_func",
                     (Column("column4", None, "column4"), ),
                 ),
             ),
             SelectedExpression("column1", Column("column1", None,
                                                  "column1")),
         ],
         groupby=[
             Column("column2", None, "column2"),
             Column("column3", None, "column3"),
         ],
     ),
     id="Select composed by select, groupby and aggregations",
 ),
 pytest.param(
     {
Example #27
0
     "table1.column1",
     "table1.column1",
 ),  # Declutter aliases - column name is the same as the alias. Do not alias
 (Column(None, None,
         "column1"), "column1", "column1"),  # Basic Column with no table
 (
     Column("alias", "table1", "column1"),
     "(table1.column1 AS alias)",
     "(table1.column1 AS alias)",
 ),  # Column with table and alias
 (
     FunctionCall(
         None,
         "f1",
         (
             Column(None, "table1", "tags"),
             Column(None, "table1", "param2"),
             Literal(None, None),
             Literal(None, "test_string"),
         ),
     ),
     "f1(table1.tags, table1.param2, NULL, 'test_string')",
     "f1(table1.tags, table1.param2, NULL, '$S')",
 ),  # Simple function call with columns and literals
 (
     FunctionCall(
         "alias",
         "f1",
         (Column(None, "table1",
                 "param1"), Column("alias1", "table1", "param2")),
     ),
     "(f1(table1.param1, (table1.param2 AS alias1)) AS alias)",
Example #28
0
def binary_condition(function_name: str, lhs: Expression,
                     rhs: Expression) -> FunctionCall:
    return FunctionCall(None, function_name, (lhs, rhs))
Example #29
0
    ConditionFunctions,
    binary_condition,
)
from snuba.query.expressions import (
    Argument,
    Column,
    Expression,
    FunctionCall,
    Lambda,
    Literal,
)
from snuba.query.parser.functions import parse_function_to_expr
from snuba.util import tuplify

test_data = [
    (tuplify(["count", []]), FunctionCall(None, "count", ())),
    (
        tuplify(["notEmpty", ["foo"]]),
        FunctionCall(None, "notEmpty", (Column(None, None, "foo"), )),
    ),
    (
        tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]),
        FunctionCall(
            None,
            "notEmpty",
            (FunctionCall(None, "arrayElement",
                          (Column(None, None, "foo"), Literal(None, 1))), ),
        ),
    ),
    (
        tuplify(["foo", ["bar", ["qux"], "baz"]]),
Example #30
0
)
from snuba.query.parser.conditions import parse_conditions_to_expr
from snuba.util import tuplify

test_conditions = [
    (
        [],
        None,
    ),
    (
        [[[]], []],
        None,
    ),
    (
        [["a", "=", 1]],
        FunctionCall(None, ConditionFunctions.EQ,
                     (Column(None, None, "a"), Literal(None, 1))),
    ),
    (
        [["a", "=", "'nice \n a newline\n'"]],
        FunctionCall(
            None,
            ConditionFunctions.EQ,
            (Column(None, None, "a"), Literal(None, "'nice \n a newline\n'")),
        ),
    ),
    (
        [[["a", "=", 1]]],
        FunctionCall(None, ConditionFunctions.EQ,
                     (Column(None, None, "a"), Literal(None, 1))),
    ),
    (