def test_outcomes_columns_validation(key: EntityKey) -> None: entity = get_entity(key) query_entity = QueryEntity(key, entity.get_data_model()) bad_query = LogicalQuery( query_entity, selected_columns=[ SelectedExpression("asdf", Column("_snuba_asdf", None, "asdf")), ], ) good_query = LogicalQuery( query_entity, selected_columns=[ SelectedExpression( column.name, Column(f"_snuba_{column.name}", None, column.name)) for column in entity.get_data_model().columns ], ) validator = EntityContainsColumnsValidator( entity.get_data_model(), validation_mode=ColumnValidationMode.ERROR) with pytest.raises(InvalidQueryException): validator.validate(bad_query) validator.validate(good_query)
def test_nested_query() -> None: """ Simply builds a nested query. """ nested = LogicalQuery( Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])), selected_columns=[ SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")) ], ) composite = CompositeQuery( from_clause=nested, selected_columns=[ SelectedExpression("output", Column("output", None, "string_evt_id")) ], ) # The iterator methods on the composite query do not descend into # the nested query assert composite.get_all_ast_referenced_columns() == { Column("output", None, "string_evt_id") } # The schema of the nested query is the selected clause of that query. assert composite.get_from_clause().get_columns() == ColumnSet([ ("string_evt_id", Any()) ])
def query() -> ClickhouseQuery: return ClickhouseQuery( LogicalQuery( {"conditions": [("project_id", "IN", [2])]}, TableSource("my_table", ColumnSet([])), condition=build_in("project_id", [2]), ))
def test_col_split( dataset_name: str, id_column: str, project_column: str, timestamp_column: str, first_query_data: Sequence[MutableMapping[str, Any]], second_query_data: Sequence[MutableMapping[str, Any]], ) -> None: def do_query( query: ClickhouseQuery, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: selected_cols = query.get_selected_columns() assert selected_cols == [ c.expression.column_name for c in query.get_selected_columns_from_ast() or [] if isinstance(c.expression, Column) ] if selected_cols == list(first_query_data[0].keys()): return QueryResult({"data": first_query_data}, {}) elif selected_cols == list(second_query_data[0].keys()): return QueryResult({"data": second_query_data}, {}) else: raise ValueError(f"Unexpected selected columns: {selected_cols}") events = get_dataset(dataset_name) query = ClickhouseQuery( LogicalQuery( { "selected_columns": list(second_query_data[0].keys()), "conditions": [""], "orderby": "events.event_id", "sample": 10, "limit": 100, "offset": 50, }, events.get_all_storages()[0].get_schema().get_data_source(), selected_columns=[ SelectedExpression(name=col_name, expression=Column(None, None, col_name)) for col_name in second_query_data[0].keys() ], )) strategy = SimpleQueryPlanExecutionStrategy( ClickhouseCluster("localhost", 1024, "default", "", "default", 80, set(), True), [], [ ColumnSplitQueryStrategy(id_column, project_column, timestamp_column), TimeSplitQueryStrategy(timestamp_col=timestamp_column), ], ) strategy.execute(query, HTTPRequestSettings(), do_query)
def query_fn(cond: Optional[Expression]) -> LogicalQuery: return LogicalQuery( QueryEntity(key, entity.get_data_model()), selected_columns=[ SelectedExpression( "time", Column("_snuba_timestamp", None, "timestamp")), ], condition=cond, )
def build_query(self) -> ProcessableQuery[Entity]: return LogicalQuery( from_clause=self.__data_source, selected_columns=list( sorted(self.__selected_expressions, key=lambda selected: selected.name)), condition=combine_and_conditions(self.__conditions) if self.__conditions else None, )
def test_join_query() -> None: events_query = LogicalQuery( Entity( EntityKey.EVENTS, ColumnSet([("event_id", String()), ("group_id", UInt(32))]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "group_id")), SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")), ], ) groups_query = LogicalQuery( Entity( EntityKey.GROUPEDMESSAGES, ColumnSet([("id", UInt(32)), ("message", String())]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "id")) ], ) join_query = CompositeQuery(from_clause=JoinClause( left_node=IndividualNode("e", events_query), right_node=IndividualNode("g", groups_query), keys=[ JoinCondition( left=JoinConditionExpression("e", "group_id"), right=JoinConditionExpression("g", "group_id"), ) ], join_type=JoinType.INNER, )) data_source = join_query.get_from_clause() assert "e.string_evt_id" in data_source.get_columns() assert "g.group_id" in data_source.get_columns()
def test_entity_validation_failure(key: EntityKey, condition: Optional[Expression]) -> None: entity = get_entity(key) query = LogicalQuery( QueryEntity(key, entity.get_data_model()), selected_columns=[ SelectedExpression("time", Column("_snuba_timestamp", None, "timestamp")), ], condition=condition, ) assert not entity.validate_required_conditions(query)
def build_node( alias: str, from_clause: Entity, selected_columns: Sequence[SelectedExpression], condition: Optional[Expression], ) -> IndividualNode[Entity]: return IndividualNode( alias=alias, data_source=LogicalQuery( from_clause=from_clause, selected_columns=selected_columns, condition=condition, ), )
def test_entity_validation(key: EntityKey, condition: Optional[Expression]) -> None: query = LogicalQuery( QueryEntity(key, get_entity(key).get_data_model()), selected_columns=[ SelectedExpression("time", Column("_snuba_timestamp", None, "timestamp")), ], condition=condition, ) validator = EntityRequiredColumnValidator({"project_id"}) validator.validate(query)
def test_no_time_based_validation(key: EntityKey, condition: Expression) -> None: entity = get_entity(key) query = LogicalQuery( QueryEntity(key, entity.get_data_model()), selected_columns=[ SelectedExpression("time", Column("_snuba_timestamp", None, "timestamp")), ], condition=condition, ) assert entity.required_time_column is not None validator = NoTimeBasedConditionValidator(entity.required_time_column) validator.validate(query)
def visit_query_exp( self, node: Node, visited_children: Iterable[Any] ) -> Union[LogicalQuery, CompositeQuery[QueryEntity]]: args: MutableMapping[str, Any] = {} ( data_source, args["selected_columns"], args["groupby"], args["array_join"], args["condition"], args["having"], args["order_by"], args["limitby"], args["limit"], args["offset"], args["granularity"], args["totals"], _, ) = visited_children keys = list(args.keys()) for k in keys: if isinstance(args[k], Node): del args[k] if "groupby" in args: if "selected_columns" not in args: args["selected_columns"] = args["groupby"] else: args["selected_columns"] = args["groupby"] + args["selected_columns"] args["groupby"] = map(lambda gb: gb.expression, args["groupby"]) if isinstance(data_source, (CompositeQuery, LogicalQuery, JoinClause)): args["from_clause"] = data_source return CompositeQuery(**args) args.update({"prewhere": None, "from_clause": data_source}) if isinstance(data_source, QueryEntity): # TODO: How sample rate gets stored needs to be addressed in a future PR args["sample"] = data_source.sample return LogicalQuery(**args)
def test_no_split( dataset_name: str, id_column: str, project_column: str, timestamp_column: str ) -> None: events = get_dataset(dataset_name) query = ClickhouseQuery( LogicalQuery( { "selected_columns": ["event_id"], "conditions": [""], "orderby": "event_id", "sample": 10, "limit": 100, "offset": 50, }, events.get_all_storages()[0].get_schema().get_data_source(), ) ) def do_query( query: ClickhouseQuery, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: assert query == query return QueryResult({}, {}) strategy = SimpleQueryPlanExecutionStrategy( ClickhouseCluster("localhost", 1024, "default", "", "default", 80, set(), True), [], [ ColumnSplitQueryStrategy( id_column=id_column, project_column=project_column, timestamp_column=timestamp_column, ), TimeSplitQueryStrategy(timestamp_col=timestamp_column), ], ) strategy.execute(query, HTTPRequestSettings(), do_query)
from_clause=LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), selected_columns=[ SelectedExpression("title", Column("_snuba_title", None, "title")), SelectedExpression( "count", FunctionCall("_snuba_count", "count", tuple())), ], groupby=[Column("_snuba_title", None, "title")], condition=binary_condition( "and", binary_condition( "equals", Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), binary_condition( "and", binary_condition( "greaterOrEquals", Column("_snuba_timestamp", None, "timestamp"), Literal(None, datetime.datetime(2021, 1, 15, 0, 0)), ), binary_condition( "less", Column("_snuba_timestamp", None, "timestamp"), Literal(None, datetime.datetime(2021, 1, 20, 0, 0)), ), ), ), ),
TEST_CASES = [ pytest.param( CompositeQuery( from_clause=LogicalQuery( from_clause=events_ent, selected_columns=[ SelectedExpression("project_id", Column(None, None, "project_id")), SelectedExpression( "count_release", FunctionCall( "count_release", "uniq", (SubscriptableReference( None, Column(None, None, "tags"), Literal(None, "sentry:release"), ), ), ), ), ], groupby=[Column(None, None, "project_id")], condition=binary_condition( ConditionFunctions.EQ, Column(None, None, "project_id"), Literal(None, 1), ), ), selected_columns=[ SelectedExpression( "average",
def test_events_boolean_context() -> None: columns = ColumnSet([ ("device_charging", Nullable(UInt(8))), ("contexts", Nested([("key", String()), ("value", String())])), ]) query = ClickhouseQuery( LogicalQuery( {}, TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], )) expected = ClickhouseQuery( LogicalQuery( {}, TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "multiIf", ( binary_condition( None, ConditionFunctions.EQ, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), Literal(None, ""), ), Literal(None, ""), binary_condition( None, ConditionFunctions.IN, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), literals_tuple(None, [ Literal(None, "1"), Literal(None, "True") ]), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], )) settings = HTTPRequestSettings() MappingColumnPromoter({ "contexts": { "device.charging": "device_charging" } }).process_query(query, settings) EventsBooleanContextsProcessor().process_query(query, settings) assert (query.get_selected_columns_from_ast() == expected.get_selected_columns_from_ast())
left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), ) ], join_type=JoinType.INNER, ) LOGICAL_QUERY = LogicalQuery( from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5), selected_columns=[ SelectedExpression("c1", Column("_snuba_c1", "t", "c")), SelectedExpression( "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))), ], array_join=Column(None, None, "col"), condition=binary_condition("equals", Column(None, None, "c4"), Literal(None, "asd")), groupby=[Column(None, "t", "c4")], having=binary_condition("equals", Column(None, None, "c6"), Literal(None, "asd2")), order_by=[OrderBy(OrderByDirection.ASC, Column(None, "t", "c"))], limitby=LimitBy(100, Column(None, None, "c8")), limit=150, ) SIMPLE_FORMATTED = { "FROM": { "ENTITY": EntityKey.EVENTS, "SAMPLE": "0.5" }, "SELECT": [["c1", "(t.c AS _snuba_c1)"],
groups_node([ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "id"), ), SelectedExpression("_snuba_id", Column("_snuba_id", None, "id")), ], ), ), right_node=IndividualNode( alias="as", data_source=LogicalQuery( from_clause=Entity(EntityKey.GROUPASSIGNEE, GROUPS_ASSIGNEE), selected_columns=[ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "group_id"), ), ], ), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "_snuba_group_id"), right=JoinConditionExpression("as", "_snuba_group_id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression(
class EntityKeySubscription(EntitySubscriptionValidation, EntitySubscription): ... tests = [ pytest.param( LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ SelectedExpression( "time", Column("_snuba_timestamp", None, "timestamp") ), ], condition=binary_condition( "equals", Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), ), id="no extra clauses", ), pytest.param( LogicalQuery( QueryEntity( EntityKey.METRICS_COUNTERS, get_entity(EntityKey.METRICS_COUNTERS).get_data_model(), ),
from snuba.query.conditions import binary_condition from snuba.query.data_source.simple import Entity as QueryEntity from snuba.query.exceptions import InvalidQueryException from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.logical import Query as LogicalQuery from snuba.query.validation.validators import SubscriptionAllowedClausesValidator tests = [ pytest.param( LogicalQuery( QueryEntity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ SelectedExpression( "time", Column("_snuba_timestamp", None, "timestamp")), ], condition=binary_condition( "equals", Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), ), id="no extra clauses", ), ] @pytest.mark.parametrize("query", tests) # type: ignore def test_subscription_clauses_validation(query: LogicalQuery) -> None: validator = SubscriptionAllowedClausesValidator() validator.validate(query)
from snuba.query import LimitBy, OrderBy, OrderByDirection, SelectedExpression from snuba.query.composite import CompositeQuery from snuba.query.logical import Query as LogicalQuery from snuba.query.snql.parser import parse_snql_query test_cases = [ pytest.param( "MATCH (events) SELECT 4-5, c GRANULARITY 60", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ SelectedExpression( "4-5", FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))), ), SelectedExpression("c", Column("_snuba_c", None, "c")), ], granularity=60, ), id="granularity on whole query", ), pytest.param( "MATCH (events) SELECT 4-5, c TOTALS true", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[
from_clause=LogicalQuery( from_clause=events_ent, selected_columns=[ SelectedExpression("project_id", Column(None, None, "project_id")), SelectedExpression( "count_environment", FunctionCall( "count_environment", "uniq", (SubscriptableReference( None, Column(None, None, "tags"), Literal(None, "environment"), ), ), ), ), ], groupby=[Column(None, None, "project_id")], condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.EQ, Column(None, None, "project_id"), Literal(None, 1), ), binary_condition( ConditionFunctions.GTE, Column(None, None, "timestamp"), Literal(None, datetime(2020, 1, 1, 12, 0)), ), ), ),
test_cases = [ ( "not promoted", ClickhouseQuery( LogicalQuery( {}, TableSource("events", columns), selected_columns=[ SelectedExpression( "tags[foo]", FunctionCall( "tags[foo]", "arrayValue", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", ( Column(None, None, "tags.key"), Literal(None, "foo"), ), ), ), ), ) ], ) ), ClickhouseQuery( LogicalQuery( {},