Esempio n. 1
0
    def visit_join_clause(self, node: JoinClause[Table]) -> JoinClause[Table]:
        # TODO: Skip this optimization if the left side of the query
        # runs aggregations that imply the cardinality of the right
        # side is important.

        join_cols = set()

        for condition in node.keys:
            if condition.left.table_alias == node.right_node.alias:
                join_cols.add(condition.left.column)
            elif condition.right.table_alias == node.right_node.alias:
                join_cols.add(condition.right.column)

        for c in self.__referenced_columns:
            if c.table_name == node.right_node.alias and c.column_name not in join_cols:
                return node

        return JoinClause(
            left_node=node.left_node.accept(self),
            right_node=node.right_node,
            keys=node.keys,
            join_type=node.join_type,
            join_modifier=JoinModifier.ANY
            if node.join_type == JoinType.INNER else JoinModifier.SEMI,
        )
Esempio n. 2
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinDataSourcePlan:
        left_node = node.left_node.accept(self)
        right_node = self.visit_individual_node(node.right_node)

        # TODO: Actually return multiple plans for each subquery (one per storage
        # set) and rank them picking a combination that fits in a single storage
        # set or valid storage set combination.
        assert is_valid_storage_set_combination(
            left_node.storage_set_key, right_node.storage_set_key
        ), f"Incompatible storage sets found in plan: {left_node.storage_set_key} {right_node.storage_set_key}"

        # mypy does not know that the method above only produces individual nodes.
        assert isinstance(right_node.translated_source, IndividualNode)
        return JoinDataSourcePlan(
            translated_source=JoinClause(
                left_node=left_node.translated_source,
                right_node=right_node.translated_source,
                keys=node.keys,
                join_type=node.join_type,
                join_modifier=node.join_modifier,
            ),
            processors={
                **left_node.processors,
                **right_node.processors
            },
            storage_set_key=left_node.storage_set_key,
        )
Esempio n. 3
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinClause[Entity]:
        """
        This tweaks the names of the columns in the ON clause as they
        cannot reference the entity fields directly (as in the original
        query) but they have to reference the mangled aliases generated
        by SubqueriesInitializer.

        This is needed because Clickhouse does not know about the
        names of the SelectedExpression nodes in the AST. It is only
        aware of aliases as a way to reference expressions from the
        external query to the subqueries.
        """
        left = node.left_node.accept(self)
        right = self.visit_individual_node(node.right_node)
        keys = [
            JoinCondition(
                left=JoinConditionExpression(k.left.table_alias,
                                             aliasify_column(k.left.column)),
                right=JoinConditionExpression(k.right.table_alias,
                                              aliasify_column(k.right.column)),
            ) for k in node.keys
        ]
        return JoinClause(left, right, keys, node.join_type,
                          node.join_modifier)
Esempio n. 4
0
def test_complex_joins() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE)
    query = Query(
        from_clause=a,
        selected_columns=[
            SelectedExpression("id", Column("id", None, "id")),
            SelectedExpression("assigned_user",
                               Column("assigned_user", None, "user")),
        ],
    )
    node_query = IndividualNode(alias="assignee", data_source=query)

    join = JoinClause(
        left_node=JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "group_id"),
                    right=JoinConditionExpression("groups", "id"),
                )
            ],
            join_type=JoinType.INNER,
        ),
        right_node=node_query,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("assignee", "id"),
            )
        ],
        join_type=JoinType.INNER,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]),
        "groups": GROUPS_SCHEMA,
    }
Esempio n. 5
0
def test_simple_join() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    join = JoinClause(
        left_node=node_err,
        right_node=node_group,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("groups", "id"),
            )
        ],
        join_type=JoinType.INNER,
        join_modifier=JoinModifier.SEMI,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "groups": GROUPS_SCHEMA
    }

    joined_cols = join.get_columns()
    assert "err.group_id" in joined_cols
    assert "err.event_id" in joined_cols
    assert "groups.id" in joined_cols
    assert "groups.message" in joined_cols

    with pytest.raises(AssertionError):
        JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "missing_col"),
                    right=JoinConditionExpression("groups",
                                                  "another_missing_col"),
                )
            ],
            join_type=JoinType.INNER,
        )
Esempio n. 6
0
def events_groups_join(
    left: IndividualNode[TNode], right: IndividualNode[TNode],
) -> JoinClause[TNode]:
    return JoinClause(
        left_node=left,
        right_node=right,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("ev", "_snuba_group_id"),
                right=JoinConditionExpression("gr", "_snuba_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Esempio n. 7
0
def join_clause(lhs_alias: str, lhs: Union[str, JoinClause[QueryEntity]],
                rhs: str) -> JoinClause[QueryEntity]:
    rhs_alias, rhs = rhs.split(":", 1)
    return JoinClause(
        left_node=node(lhs_alias, lhs) if isinstance(lhs, str) else lhs,
        right_node=node(rhs_alias, rhs),
        keys=[
            JoinCondition(
                left=JoinConditionExpression(lhs_alias, "event_id"),
                right=JoinConditionExpression(rhs_alias, "event_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Esempio n. 8
0
def build_join_clause_loop(
    node_list: Node,
    lhs: Optional[Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]],
) -> Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]:
    rhs = node_list.entity_data
    if lhs is None:
        lhs = rhs
    else:
        assert node_list.relationship is not None  # mypy
        lhs = JoinClause(
            left_node=lhs,
            right_node=rhs,
            keys=node_list.join_conditions,
            join_type=node_list.relationship.join_type,
        )

    if node_list.child is None:
        return lhs

    return build_join_clause_loop(node_list.child, lhs)
Esempio n. 9
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Esempio n. 10
0
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
 AND t.project_id=1
 AND t.finish_ts>=toDateTime('2021-01-01T00:30:00')
 AND t.finish_ts<toDateTime('2021-01-07T00:30:00')""",
 CompositeQuery(
     from_clause=JoinClause(
         left_node=IndividualNode(
             "e",
             QueryEntity(
                 EntityKey.EVENTS,
                 get_entity(EntityKey.EVENTS).get_data_model(),
             ),
         ),
         right_node=IndividualNode(
             "t",
             QueryEntity(
                 EntityKey.TRANSACTIONS,
                 get_entity(EntityKey.TRANSACTIONS).get_data_model(),
             ),
         ),
         keys=[
             JoinCondition(
                 JoinConditionExpression("e", "event_id"),
                 JoinConditionExpression("t", "event_id"),
             )
         ],
         join_type=JoinType.INNER,
     ),
     selected_columns=[
         SelectedExpression(
             "4-5",
             FunctionCall(None, "minus",
Esempio n. 12
0
      "SELECT column1, (avg(column2) AS sub_average), column3 "
      "FROM my_table "
      "WHERE eq((column3 AS al), 'blabla') "
      "GROUP BY column2"
      ") "
      "GROUP BY alias"),
     id="Composite query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=node_err,
             right_node=node_group,
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression("error_id",
                                Column("error_id", "err", "event_id")),
             SelectedExpression("message",
                                Column("message", "groups", "message")),
         ],
         condition=binary_condition("eq", Column(None, "groups", "id"),
                                    Literal(None, 1)),
     ),
     [
Esempio n. 13
0
    GROUPS_SCHEMA,
    Events,
    GroupAssignee,
    GroupedMessage,
)

TEST_CASES = [
    pytest.param(
        JoinClause(
            IndividualNode("ev",
                           EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA,
                                        None)),
            IndividualNode(
                "gr",
                EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
            [
                JoinCondition(
                    JoinConditionExpression("ev", "group_id"),
                    JoinConditionExpression("gr", "id"),
                )
            ],
            JoinType.INNER,
            None,
        ),
        {
            QualifiedCol(EntityKey.EVENTS, "group_id"): {
                QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"),
            },
            QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"): {
                QualifiedCol(EntityKey.EVENTS, "group_id"),
            },
            QualifiedCol(EntityKey.EVENTS, "project_id"): {
Esempio n. 14
0
        )

    assert condition.transform(
        partial(_replace_col, "ev", "project_id", "gr",
                "project_id")) == binary_condition(
                    ConditionFunctions.EQ, Column(None, "gr", "project_id"),
                    Literal(None, 1))


ENTITY_GROUP_JOIN = JoinClause(
    IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)),
    IndividualNode(
        "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
    [
        JoinCondition(
            JoinConditionExpression("ev", "group_id"),
            JoinConditionExpression("gr", "id"),
        )
    ],
    JoinType.INNER,
    None,
)

TEST_REPLACEMENT = [
    pytest.param(
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        ENTITY_GROUP_JOIN,
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        id="No condition to add",
Esempio n. 15
0
from snuba.query.formatters.tracing import TExpression, format_query
from snuba.query.logical import Query as LogicalQuery
from tests.query.joins.equivalence_schema import (
    EVENTS_SCHEMA,
    GROUPS_SCHEMA,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
Esempio n. 16
0
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
Esempio n. 17
0
)
from tests.query.joins.join_structures import (
    events_groups_join,
    events_node,
    groups_node,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[],
        ),
Esempio n. 18
0
     ),
     3,
     {"errors_local"},
     True,
     None,
     id="Nested query. Count the inner query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY),
             right_node=IndividualNode(
                 alias="groups", data_source=Table("groups_local", GROUPS_SCHEMA)
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "event_id",
                 FunctionCall("alias", "something", (Column(None, "err", "alias"),)),
             ),
             SelectedExpression(
                 "group_id",
                 Column("group_id", "groups", "group_id"),
             ),
Esempio n. 19
0
 from_clause=JoinClause(
     left_node=events_groups_join(
         clickhouse_events_node([
             SelectedExpression(
                 "_snuba_group_id",
                 Column("_snuba_group_id", None, "group_id"),
             ),
         ]),
         clickhouse_groups_node([
             SelectedExpression("_snuba_id",
                                Column("_snuba_id", None, "id")),
             SelectedExpression(
                 "_snuba_col1",
                 Column("_snuba_col1", None, "something")),
         ]),
     ),
     right_node=clickhouse_assignees_node([
         SelectedExpression(
             "_snuba_group_id",
             Column("_snuba_group_id", None, "group_id"),
         ),
         SelectedExpression(
             "_snuba_col1", Column("_snuba_col1", None,
                                   "something")),
     ]),
     keys=[
         JoinCondition(
             left=JoinConditionExpression("ev", "_snuba_group_id"),
             right=JoinConditionExpression("as", "_snuba_group_id"),
         )
     ],
     join_type=JoinType.INNER,
 ),
Esempio n. 20
0
     id="Special array join functions",
 ),
 pytest.param(
     "MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c",
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "4-5",
                 FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
             ),