def test_nested_query() -> None: """ Simply builds a nested query. """ nested = LogicalQuery( Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])), selected_columns=[ SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")) ], ) composite = CompositeQuery( from_clause=nested, selected_columns=[ SelectedExpression("output", Column("output", None, "string_evt_id")) ], ) # The iterator methods on the composite query do not descend into # the nested query assert composite.get_all_ast_referenced_columns() == { Column("output", None, "string_evt_id") } # The schema of the nested query is the selected clause of that query. assert composite.get_from_clause().get_columns() == ColumnSet([ ("string_evt_id", Any()) ])
def test_add_equivalent_condition( initial_condition: Expression, join_clause: JoinClause[EntitySource], expected_expr: Expression, ) -> None: ENTITY_IMPL[EntityKey.EVENTS] = Events() ENTITY_IMPL[EntityKey.GROUPEDMESSAGES] = GroupedMessage() query = CompositeQuery( from_clause=join_clause, selected_columns=[ SelectedExpression( "group_id", FunctionCall("something", "f", (Column(None, "gr", "id"), ))) ], condition=initial_condition, ) add_equivalent_conditions(query) assert query.get_condition() == expected_expr ENTITY_IMPL.clear()
def test_join_query() -> None: events_query = LogicalQuery( Entity( EntityKey.EVENTS, ColumnSet([("event_id", String()), ("group_id", UInt(32))]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "group_id")), SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")), ], ) groups_query = LogicalQuery( Entity( EntityKey.GROUPEDMESSAGES, ColumnSet([("id", UInt(32)), ("message", String())]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "id")) ], ) join_query = CompositeQuery(from_clause=JoinClause( left_node=IndividualNode("e", events_query), right_node=IndividualNode("g", groups_query), keys=[ JoinCondition( left=JoinConditionExpression("e", "group_id"), right=JoinConditionExpression("g", "group_id"), ) ], join_type=JoinType.INNER, )) data_source = join_query.get_from_clause() assert "e.string_evt_id" in data_source.get_columns() assert "g.group_id" in data_source.get_columns()
def _plan_composite_query(query: CompositeQuery[Entity], settings: RequestSettings) -> CompositeQueryPlan: """ Produces a composite query plan out of a composite query. This is the bulk of the logic of The Composite Planner. It is kept in its own function because it needs to be used by the data source visitor when planning subqueries (which can be composite as well). """ planned_data_source = CompositeDataSourcePlanner(settings).visit( query.get_from_clause()) root_db_processors, aliased_db_processors = planned_data_source.get_db_processors( ) return CompositeQueryPlan( # This is a mypy issue: https://github.com/python/mypy/issues/7520 # At the time of writing generics in dataclasses are not properly # supported and mypy expects TQuery instead of CompositeQuery here. # If the issue is not fixed before we start enforcing this we will # have to restructure the query plan. query=CompositeQuery( from_clause=planned_data_source.translated_source, selected_columns=query.get_selected_columns(), array_join=query.get_arrayjoin(), condition=query.get_condition(), groupby=query.get_groupby(), having=query.get_having(), order_by=query.get_orderby(), limitby=query.get_limitby(), limit=query.get_limit(), offset=query.get_offset(), totals=query.has_totals(), granularity=query.get_granularity(), ), execution_strategy=CompositeExecutionStrategy( get_cluster(planned_data_source.storage_set_key), root_db_processors, aliased_db_processors, composite_processors=[SemiJoinOptimizer()], ), storage_set_key=planned_data_source.storage_set_key, root_processors=planned_data_source.root_processors, aliased_processors=planned_data_source.aliased_processors, )
def visit_query_exp( self, node: Node, visited_children: Iterable[Any] ) -> Union[LogicalQuery, CompositeQuery[QueryEntity]]: args: MutableMapping[str, Any] = {} ( data_source, args["selected_columns"], args["groupby"], args["array_join"], args["condition"], args["having"], args["order_by"], args["limitby"], args["limit"], args["offset"], args["granularity"], args["totals"], _, ) = visited_children keys = list(args.keys()) for k in keys: if isinstance(args[k], Node): del args[k] if "groupby" in args: if "selected_columns" not in args: args["selected_columns"] = args["groupby"] else: args["selected_columns"] = args["groupby"] + args["selected_columns"] args["groupby"] = map(lambda gb: gb.expression, args["groupby"]) if isinstance(data_source, (CompositeQuery, LogicalQuery, JoinClause)): args["from_clause"] = data_source return CompositeQuery(**args) args.update({"prewhere": None, "from_clause": data_source}) if isinstance(data_source, QueryEntity): # TODO: How sample rate gets stored needs to be addressed in a future PR args["sample"] = data_source.sample return LogicalQuery(**args)
CompositeQuery( from_clause=LogicalQuery( from_clause=events_ent, selected_columns=[ SelectedExpression("project_id", Column(None, None, "project_id")), SelectedExpression( "count_environment", FunctionCall( "count_environment", "uniq", (SubscriptableReference( None, Column(None, None, "tags"), Literal(None, "environment"), ), ), ), ), ], groupby=[Column(None, None, "project_id")], condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.EQ, Column(None, None, "project_id"), Literal(None, 1), ), binary_condition( ConditionFunctions.GTE, Column(None, None, "timestamp"), Literal(None, datetime(2020, 1, 1, 12, 0)), ), ), ), selected_columns=[ SelectedExpression( "average", FunctionCall("average", "avg", (Column(None, None, "count_environment"), )), ), ], ),
CompositeQuery( from_clause=LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), selected_columns=[ SelectedExpression("title", Column("_snuba_title", None, "title")), SelectedExpression( "count", FunctionCall("_snuba_count", "count", tuple())), ], groupby=[Column("_snuba_title", None, "title")], condition=binary_condition( "and", binary_condition( "equals", Column("_snuba_project_id", None, "project_id"), Literal(None, 1), ), binary_condition( "and", binary_condition( "greaterOrEquals", Column("_snuba_timestamp", None, "timestamp"), Literal(None, datetime.datetime(2021, 1, 15, 0, 0)), ), binary_condition( "less", Column("_snuba_timestamp", None, "timestamp"), Literal(None, datetime.datetime(2021, 1, 20, 0, 0)), ), ), ), ), selected_columns=[ SelectedExpression( "max_count", FunctionCall( "_snuba_max_count", "max", (Column("_snuba_count", None, "_snuba_count"), ), ), ), ], limit=1000, offset=0, ),
CompositeQuery( from_clause=Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression( "sub_average", FunctionCall("sub_average", "avg", (Column(None, None, "column2"), )), ), SelectedExpression("column3", Column(None, None, "column3")), ], condition=binary_condition( "eq", lhs=Column("al", None, "column3"), rhs=Literal(None, "blabla"), ), groupby=[Column(None, None, "column2")], ), selected_columns=[ SelectedExpression( "average", FunctionCall("average", "avg", (Column(None, None, "sub_average"), )), ), SelectedExpression("alias", Column("alias", None, "column3")), ], groupby=[Column(None, None, "alias")], ),
"BY": "c8" }, "LIMIT": 150, } TEST_JOIN = [ pytest.param( LOGICAL_QUERY, SIMPLE_FORMATTED, id="Simple logical query", ), pytest.param( CompositeQuery( from_clause=LOGICAL_QUERY, selected_columns=[ SelectedExpression( "f", FunctionCall("f", "avg", (Column(None, "t", "c"), ))) ], ), { "FROM": SIMPLE_FORMATTED, "SELECT": [["f", ["f", "avg", ["t.c"]]]], "GROUPBY": [], "ORDERBY": [], }, id="Nested Query", ), pytest.param( CompositeQuery( from_clause=BASIC_JOIN, selected_columns=[
FunctionCall(None, "tuple", (Literal(None, 1), Literal(None, 2))), ), ) TEST_CASES = [ pytest.param( SIMPLE_QUERY, {1, 2}, id="Simple Query", ), pytest.param( CompositeQuery( from_clause=SIMPLE_QUERY, selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "something", (Column(None, None, "alias"), )), ) ], ), {1, 2}, id="Nested query. Project from the inner query", ), ] @pytest.mark.parametrize( "query, expected_proj", TEST_CASES, ) def test_count_columns(
alias="gr", data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), ) ], join_type=JoinType.INNER, ) TEST_CASES = [ pytest.param( CompositeQuery( from_clause=BASIC_JOIN, selected_columns=[], ), CompositeQuery( from_clause=events_groups_join( events_node([ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "group_id"), ), ]), groups_node([ SelectedExpression("_snuba_id", Column("_snuba_id", None, "id")) ], ), ), selected_columns=[],
TEST_CASES = [ pytest.param( SIMPLE_QUERY, 3, {"errors_local"}, True, 0.1, id="Simple Query", ), pytest.param( CompositeQuery( from_clause=SIMPLE_QUERY, selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "something", (Column(None, None, "alias"),)), ) ], ), 3, {"errors_local"}, True, None, id="Nested query. Count the inner query", ), pytest.param( CompositeQuery( from_clause=JoinClause( left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY), right_node=IndividualNode(
CompositeQuery( from_clause=JoinClause( left_node=IndividualNode( "e", QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), ), right_node=IndividualNode( "t", QueryEntity( EntityKey.TRANSACTIONS, get_entity(EntityKey.TRANSACTIONS).get_data_model(), ), ), keys=[ JoinCondition( JoinConditionExpression("e", "event_id"), JoinConditionExpression("t", "event_id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression( "4-5", FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))), ), SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")), ], ),
clickhouse_assignees_node, clickhouse_events_node, clickhouse_groups_node, events_groups_join, ) TEST_CASES = [ pytest.param( CompositeQuery( from_clause=events_groups_join( clickhouse_events_node([ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "group_id"), ), ]), clickhouse_groups_node([ SelectedExpression("_snuba_id", Column("_snuba_id", None, "id")) ], ), ), selected_columns=[], ), {"gr": JoinModifier.ANY}, id="Simple two table query with no reference. Semi join", ), pytest.param( CompositeQuery( from_clause=events_groups_join( clickhouse_events_node([ SelectedExpression(
CompositeQuery( from_clause=events_groups_join( clickhouse_events_node( [ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "group_id"), ), ], binary_condition( BooleanFunctions.AND, binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.GTE, Column(None, None, "timestamp"), Literal(None, datetime(2020, 8, 1)), ), binary_condition( ConditionFunctions.LT, Column(None, None, "timestamp"), Literal(None, datetime(2020, 9, 1)), ), ), binary_condition( ConditionFunctions.EQ, build_mapping_expr( "tags[asd]", None, "tags", Literal(None, "asd"), ), Literal(None, "sdf"), ), ), [Column("_snuba_group_id", None, "group_id")], ), clickhouse_groups_node([ SelectedExpression("_snuba_id", Column("_snuba_id", None, "id")) ], ), ), selected_columns=[], ),