"event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], [], None, [[["positionCaseInsensitive", ["message", "'abc'"]], "!=", 0]], FunctionCall( None, OPERATOR_TO_FUNCTION["!="], ( FunctionCall( None, "positionCaseInsensitive", (Column("message", None, "message"), Literal(None, "abc")), ), Literal(None, 0), ), ), ), ( # Add pre-where condition in the expected order { "conditions": [ ["d", "=", "1"], ["c", "=", "3"], ["a", "=", "1"], ["b", "=", "2"], ],
None, selected_columns=[ SelectedExpression(name=s.alias, expression=s) for s in selected_columns or [] ], condition=condition, having=having, ) tags_filter_tests = [ pytest.param( build_query(selected_columns=[ FunctionCall( "tags_key", "arrayJoin", (Column(None, None, "tags.key"), ), ), ], ), set(), id="no tag filter", ), pytest.param( build_query( selected_columns=[ FunctionCall( "tags_key", "arrayJoin", (Column(None, None, "tags.key"), ), ), ],
import pytest from snuba.query.expressions import ( Column, CurriedFunctionCall, FunctionCall, Literal, ) from snuba.query.parser.expressions import parse_aggregation test_data = [ ( ["count", "event_id", None], FunctionCall(None, "count", (Column(None, "event_id", None),)), ), # Simple aggregation ( ["count()", "", None], FunctionCall(None, "count", ()), ), # Common way to provide count() ( ["count()", None, None], FunctionCall(None, "count", ()), ), # Common way to provide count() ( ["count()", "event_id", None], CurriedFunctionCall( None, FunctionCall(None, "count", ()), (Column(None, "event_id", None),) ), ), # This is probably wrong, but we cannot disambiguate it at this level ( ["uniq", "platform", "uniq_platforms"],
("message", String()), ("group_id", UInt(32)), ]) GROUPS_SCHEMA = ColumnSet([ ("id", UInt(32)), ("project_id", UInt(32)), ("group_id", UInt(32)), ("message", String()), ]) SIMPLE_QUERY = ClickhouseQuery( Table("errors_local", ERRORS_SCHEMA, final=True, sampling_rate=0.1), selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "something", (Column(None, None, "event_id"), )), ), SelectedExpression( "group_id", Column(None, None, "group_id"), ), ], array_join=None, condition=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Column(None, None, "group_id"), )), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Column(None, None, "message"), ))], prewhere=binary_condition( ConditionFunctions.EQ,
from snuba.query.expressions import Column, FunctionCall, Literal from snuba.querylog.query_metadata import ClickhouseQueryProfile, FilterProfile from snuba.state import safe_dumps test_cases = [ pytest.param( ClickhouseQuery( TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column("column2", None, "column2")), SelectedExpression( "something", FunctionCall( "something", "arrayJoin", (Column(None, None, "contexts.key"), ), ), ), ], condition=binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GTE, Column(None, None, "timestamp"), Literal(None, datetime(2020, 8, 1)), ), binary_condition( None,
def transform(match: MatchResult, exp: Expression) -> Expression: assert isinstance(exp, Column) # mypy return FunctionCall(None, "nullIf", ( Column(None, None, exp.column_name), Literal(None, ""), ))
def unary_condition( alias: Optional[str], function_name: str, operand: Expression ) -> FunctionCall: return FunctionCall(alias, function_name, (operand,))
def multiply(lhs: Expression, rhs: Expression, alias: Optional[str] = None) -> FunctionCall: return FunctionCall(alias, "multiply", (lhs, rhs))
def div(lhs: Expression, rhs: Expression, alias: Optional[str] = None) -> FunctionCall: return FunctionCall(alias, "div", (lhs, rhs))
def identity(expression: Expression, alias: Optional[str]) -> FunctionCall: return FunctionCall(alias, "identity", (expression, ))
def minus(lhs: Expression, rhs: Expression, alias: Optional[str] = None) -> FunctionCall: return FunctionCall(alias, "minus", (lhs, rhs))
def tupleElement(alias: Optional[str], tuple_expr: Expression, index: Expression) -> FunctionCall: return FunctionCall(alias, "tupleElement", (tuple_expr, index))
def arrayJoin(alias: Optional[str], content: Expression) -> Expression: return FunctionCall(alias, "arrayJoin", (content, ))
def arrayElement(alias: Optional[str], array: Expression, index: Expression) -> FunctionCall: return FunctionCall(alias, "arrayElement", (array, index))
def unary_condition(function_name: str, operand: Expression) -> FunctionCall: return FunctionCall(None, function_name, (operand, ))
def count(column: Optional[Column] = None, alias: Optional[str] = None): return FunctionCall(alias, "count", (column, ) if column else ())
QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column1")), SelectedExpression(name=None, expression=Column(None, None, "column2")), ], ), Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression( name=None, expression=FunctionCall( None, "nullIf", (Column(None, None, "column1"), Literal(None, "")), ), ), SelectedExpression(name=None, expression=Column(None, None, "column2")), ], ), id="replace unaliased column", ), pytest.param( Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column("some_alias", None,
def literals_tuple(alias: Optional[str], literals: Sequence[Literal]) -> FunctionCall: return FunctionCall(alias, "tuple", tuple(literals))
def binary_condition( alias: Optional[str], function_name: str, lhs: Expression, rhs: Expression ) -> FunctionCall: return FunctionCall(alias, function_name, (lhs, rhs))
def test_events_boolean_context() -> None: columns = ColumnSet([ ("device_charging", UInt(8, Modifier(nullable=True))), ("contexts", Nested([("key", String()), ("value", String())])), ]) query = ClickhouseQuery( TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], ) expected = ClickhouseQuery( TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "multiIf", ( binary_condition( None, ConditionFunctions.EQ, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), Literal(None, ""), ), Literal(None, ""), binary_condition( None, ConditionFunctions.IN, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), literals_tuple( None, [Literal(None, "1"), Literal(None, "True")]), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], ) settings = HTTPRequestSettings() MappingColumnPromoter({ "contexts": { "device.charging": "device_charging" } }).process_query(query, settings) EventsBooleanContextsProcessor().process_query(query, settings) assert (query.get_selected_columns_from_ast() == expected.get_selected_columns_from_ast())
from snuba.query.processors.quota_processor import ResourceQuotaProcessor from snuba.query.processors.tags_expander import TagsExpanderProcessor from snuba.query.processors.timeseries_processor import TimeSeriesProcessor from snuba.query.query_settings import QuerySettings from snuba.query.validation.validators import EntityRequiredColumnValidator transaction_translator = TranslationMappers( columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToFunction(None, "user", "nullIf", (Column(None, None, "user"), Literal(None, ""))), # These column aliases originally existed in the ``discover`` dataset, # but now live here to maintain compatibility between the composite # ``discover`` dataset and the standalone ``transaction`` dataset. In # the future, these aliases should be defined on the Transaction entity
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def test_impact_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), FunctionCall( "perf", "impact", ( Column(None, "column1", None), Literal(None, 300), Column(None, "user", None), ), ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), plus( minus( Literal(None, 1), div( plus( countIf( binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), Literal(None, 300), ), ), div( countIf( binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GT, Column(None, "column1", None), Literal(None, 300), ), binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), multiply( Literal(None, 300), Literal(None, 4) ), ), ), ), Literal(None, 2), ), ), count(), ), ), multiply( minus( Literal(None, 1), div( Literal(None, 1), FunctionCall( None, "sqrt", ( FunctionCall( None, "uniq", Column( alias=None, column_name="user", table_name=None, ), ) ), ), ), ), Literal(None, 3), ), ), ], ) ImpactProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == ( "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), " "div(countIf(and(greater(column1, 300), lessOrEquals(column1, " "multiply(300, 4)))), 2)), count())), " "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))" )
from snuba.clickhouse.columns import ColumnSet from snuba.clickhouse.formatter import ClickhouseExpressionFormatter from snuba.datasets.schemas.tables import TableSource from snuba.datasets.transactions import TransactionsDataset from snuba.query.dsl import multiply from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.logical import Query, SelectedExpression from snuba.query.processors.timeseries_column_processor import TimeSeriesColumnProcessor from snuba.request.request_settings import HTTPRequestSettings tests = [ ( 3600, FunctionCall( "my_time", "toStartOfHour", (Column(None, None, "finish_ts"), Literal(None, "Universal")), ), "(toStartOfHour(finish_ts, 'Universal') AS my_time)", ), ( 60, FunctionCall( "my_time", "toStartOfMinute", (Column(None, None, "finish_ts"), Literal(None, "Universal")), ), "(toStartOfMinute(finish_ts, 'Universal') AS my_time)", ), ( 86400,
"event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], [], None, FunctionCall( None, OPERATOR_TO_FUNCTION["!="], ( FunctionCall( None, "positionCaseInsensitive", (Column("_snuba_message", None, "message"), Literal(None, "abc")), ), Literal(None, 0), ), ), False, ), ( # Add pre-where condition in the expected order { "conditions": [ ["d", "=", "1"], ["c", "=", "3"], [["and", [["equals", ["a", "'1'"]], ["equals", ["b", "'2'"]]]],
"groupby": ["column2", "column3"], "aggregations": [["test_func", "column4", "test_func_alias"]], }, Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column("column2", None, "column2")), SelectedExpression("column3", Column("column3", None, "column3")), SelectedExpression( "test_func_alias", FunctionCall( "test_func_alias", "test_func", (Column("column4", None, "column4"), ), ), ), SelectedExpression("column1", Column("column1", None, "column1")), ], groupby=[ Column("column2", None, "column2"), Column("column3", None, "column3"), ], ), id="Select composed by select, groupby and aggregations", ), pytest.param( {
"table1.column1", "table1.column1", ), # Declutter aliases - column name is the same as the alias. Do not alias (Column(None, None, "column1"), "column1", "column1"), # Basic Column with no table ( Column("alias", "table1", "column1"), "(table1.column1 AS alias)", "(table1.column1 AS alias)", ), # Column with table and alias ( FunctionCall( None, "f1", ( Column(None, "table1", "tags"), Column(None, "table1", "param2"), Literal(None, None), Literal(None, "test_string"), ), ), "f1(table1.tags, table1.param2, NULL, 'test_string')", "f1(table1.tags, table1.param2, NULL, '$S')", ), # Simple function call with columns and literals ( FunctionCall( "alias", "f1", (Column(None, "table1", "param1"), Column("alias1", "table1", "param2")), ), "(f1(table1.param1, (table1.param2 AS alias1)) AS alias)",
def binary_condition(function_name: str, lhs: Expression, rhs: Expression) -> FunctionCall: return FunctionCall(None, function_name, (lhs, rhs))
ConditionFunctions, binary_condition, ) from snuba.query.expressions import ( Argument, Column, Expression, FunctionCall, Lambda, Literal, ) from snuba.query.parser.functions import parse_function_to_expr from snuba.util import tuplify test_data = [ (tuplify(["count", []]), FunctionCall(None, "count", ())), ( tuplify(["notEmpty", ["foo"]]), FunctionCall(None, "notEmpty", (Column(None, None, "foo"), )), ), ( tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]), FunctionCall( None, "notEmpty", (FunctionCall(None, "arrayElement", (Column(None, None, "foo"), Literal(None, 1))), ), ), ), ( tuplify(["foo", ["bar", ["qux"], "baz"]]),
) from snuba.query.parser.conditions import parse_conditions_to_expr from snuba.util import tuplify test_conditions = [ ( [], None, ), ( [[[]], []], None, ), ( [["a", "=", 1]], FunctionCall(None, ConditionFunctions.EQ, (Column(None, None, "a"), Literal(None, 1))), ), ( [["a", "=", "'nice \n a newline\n'"]], FunctionCall( None, ConditionFunctions.EQ, (Column(None, None, "a"), Literal(None, "'nice \n a newline\n'")), ), ), ( [[["a", "=", 1]]], FunctionCall(None, ConditionFunctions.EQ, (Column(None, None, "a"), Literal(None, 1))), ), (