def visit_join_clause(self, node: JoinClause[Entity]) -> JoinClause[Entity]: """ This tweaks the names of the columns in the ON clause as they cannot reference the entity fields directly (as in the original query) but they have to reference the mangled aliases generated by SubqueriesInitializer. This is needed because Clickhouse does not know about the names of the SelectedExpression nodes in the AST. It is only aware of aliases as a way to reference expressions from the external query to the subqueries. """ left = node.left_node.accept(self) right = self.visit_individual_node(node.right_node) keys = [ JoinCondition( left=JoinConditionExpression(k.left.table_alias, aliasify_column(k.left.column)), right=JoinConditionExpression(k.right.table_alias, aliasify_column(k.right.column)), ) for k in node.keys ] return JoinClause(left, right, keys, node.join_type, node.join_modifier)
def test_complex_joins() -> None: e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA) node_err = IndividualNode(alias="err", data_source=e) g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA) node_group = IndividualNode(alias="groups", data_source=g) a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE) query = Query( from_clause=a, selected_columns=[ SelectedExpression("id", Column("id", None, "id")), SelectedExpression("assigned_user", Column("assigned_user", None, "user")), ], ) node_query = IndividualNode(alias="assignee", data_source=query) join = JoinClause( left_node=JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), right_node=node_query, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("assignee", "id"), ) ], join_type=JoinType.INNER, ) assert join.get_column_sets() == { "err": ERRORS_SCHEMA, "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]), "groups": GROUPS_SCHEMA, }
def test_simple_join() -> None: e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA) node_err = IndividualNode(alias="err", data_source=e) g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA) node_group = IndividualNode(alias="groups", data_source=g) join = JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, join_modifier=JoinModifier.SEMI, ) assert join.get_column_sets() == { "err": ERRORS_SCHEMA, "groups": GROUPS_SCHEMA } joined_cols = join.get_columns() assert "err.group_id" in joined_cols assert "err.event_id" in joined_cols assert "groups.id" in joined_cols assert "groups.message" in joined_cols with pytest.raises(AssertionError): JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "missing_col"), right=JoinConditionExpression("groups", "another_missing_col"), ) ], join_type=JoinType.INNER, )
def join_clause(lhs_alias: str, lhs: Union[str, JoinClause[QueryEntity]], rhs: str) -> JoinClause[QueryEntity]: rhs_alias, rhs = rhs.split(":", 1) return JoinClause( left_node=node(lhs_alias, lhs) if isinstance(lhs, str) else lhs, right_node=node(rhs_alias, rhs), keys=[ JoinCondition( left=JoinConditionExpression(lhs_alias, "event_id"), right=JoinConditionExpression(rhs_alias, "event_id"), ) ], join_type=JoinType.INNER, )
def events_groups_join( left: IndividualNode[TNode], right: IndividualNode[TNode], ) -> JoinClause[TNode]: return JoinClause( left_node=left, right_node=right, keys=[ JoinCondition( left=JoinConditionExpression("ev", "_snuba_group_id"), right=JoinConditionExpression("gr", "_snuba_id"), ) ], join_type=JoinType.INNER, )
def build_join_conditions(self, rhs: Node) -> None: if rhs.relationship is None: return join_conditions = [] for lhs_column, rhs_column in rhs.relationship.columns: join_conditions.append( JoinCondition( left=JoinConditionExpression(self.entity_data.alias, lhs_column), right=JoinConditionExpression(rhs.entity_data.alias, rhs_column), )) # The join conditions are put into the right hand side since the left hand side # can have many children, each with different join conditions. This way each child # tracks how it is joined to its parent, since each child has exactly one parent. rhs.join_conditions = join_conditions
def test_join_query() -> None: events_query = LogicalQuery( Entity( EntityKey.EVENTS, ColumnSet([("event_id", String()), ("group_id", UInt(32))]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "group_id")), SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")), ], ) groups_query = LogicalQuery( Entity( EntityKey.GROUPEDMESSAGES, ColumnSet([("id", UInt(32)), ("message", String())]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "id")) ], ) join_query = CompositeQuery(from_clause=JoinClause( left_node=IndividualNode("e", events_query), right_node=IndividualNode("g", groups_query), keys=[ JoinCondition( left=JoinConditionExpression("e", "group_id"), right=JoinConditionExpression("g", "group_id"), ) ], join_type=JoinType.INNER, )) data_source = join_query.get_from_clause() assert "e.string_evt_id" in data_source.get_columns() assert "g.group_id" in data_source.get_columns()
id="Query with a subquery", ), pytest.param( CompositeQuery( from_clause=JoinClause( left_node=IndividualNode( alias="err", data_source=events_ent, ), right_node=IndividualNode( alias="groups", data_source=groups_ent, ), keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression( "f_release", FunctionCall( "f_release", "f", (Column(None, "err", "release"), ), ), ), SelectedExpression( "_snuba_right",
"e", QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), ), right_node=IndividualNode( "t", QueryEntity( EntityKey.TRANSACTIONS, get_entity(EntityKey.TRANSACTIONS).get_data_model(), ), ), keys=[ JoinCondition( JoinConditionExpression("e", "event_id"), JoinConditionExpression("t", "event_id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression( "4-5", FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))), ), SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")), ], condition=binary_condition( "and", binary_condition(
"SELECT column1, (avg(column2) AS sub_average), column3 " "FROM my_table " "WHERE eq((column3 AS al), 'blabla') " "GROUP BY column2" ") " "GROUP BY alias"), id="Composite query", ), pytest.param( CompositeQuery( from_clause=JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression("error_id", Column("error_id", "err", "event_id")), SelectedExpression("message", Column("message", "groups", "message")), ], condition=binary_condition("eq", Column(None, "groups", "id"), Literal(None, 1)), ), [ "SELECT (err.event_id AS error_id), (groups.message AS message)",
GroupAssignee, GroupedMessage, ) TEST_CASES = [ pytest.param( JoinClause( IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)), IndividualNode( "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)), [ JoinCondition( JoinConditionExpression("ev", "group_id"), JoinConditionExpression("gr", "id"), ) ], JoinType.INNER, None, ), { QualifiedCol(EntityKey.EVENTS, "group_id"): { QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"), }, QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"): { QualifiedCol(EntityKey.EVENTS, "group_id"), }, QualifiedCol(EntityKey.EVENTS, "project_id"): { QualifiedCol(EntityKey.GROUPEDMESSAGES, "project_id"), },
EVENTS_SCHEMA, GROUPS_SCHEMA, ) BASIC_JOIN = JoinClause( left_node=IndividualNode( alias="ev", data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None), ), right_node=IndividualNode( alias="gr", data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), ) ], join_type=JoinType.INNER, ) LOGICAL_QUERY = LogicalQuery( from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5), selected_columns=[ SelectedExpression("c1", Column("_snuba_c1", "t", "c")), SelectedExpression( "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))), ], array_join=Column(None, None, "col"), condition=binary_condition("equals", Column(None, None, "c4"), Literal(None, "asd")),
left_node=IndividualNode( "e", QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), ), right_node=IndividualNode( "t", QueryEntity( EntityKey.TRANSACTIONS, get_entity(EntityKey.TRANSACTIONS).get_data_model(), ), ), keys=[ JoinCondition( JoinConditionExpression("e", "event_id"), JoinConditionExpression("t", "event_id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression( "4-5", FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))), ), SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")), ], ), id="Basic join match", ), pytest.param(
"_snuba_col1", Column("_snuba_col1", None, "something")), ]), ), right_node=clickhouse_assignees_node([ SelectedExpression( "_snuba_group_id", Column("_snuba_group_id", None, "group_id"), ), SelectedExpression( "_snuba_col1", Column("_snuba_col1", None, "something")), ]), keys=[ JoinCondition( left=JoinConditionExpression("ev", "_snuba_group_id"), right=JoinConditionExpression("as", "_snuba_group_id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression("group_id", Column("_snuba_col1", "gr", "_snuba_col1")) ], ), { "gr": None, "as": JoinModifier.ANY }, id="Multi table join, make only the right one a semi join.", ),
) assert condition.transform( partial(_replace_col, "ev", "project_id", "gr", "project_id")) == binary_condition( ConditionFunctions.EQ, Column(None, "gr", "project_id"), Literal(None, 1)) ENTITY_GROUP_JOIN = JoinClause( IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)), IndividualNode( "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)), [ JoinCondition( JoinConditionExpression("ev", "group_id"), JoinConditionExpression("gr", "id"), ) ], JoinType.INNER, None, ) TEST_REPLACEMENT = [ pytest.param( binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"), Literal(None, 1)), ENTITY_GROUP_JOIN, binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"), Literal(None, 1)), id="No condition to add", ),