Exemple #1
0
    def visit_join_clause(self, node: JoinClause[Table]) -> JoinClause[Table]:
        # TODO: Skip this optimization if the left side of the query
        # runs aggregations that imply the cardinality of the right
        # side is important.

        join_cols = set()

        for condition in node.keys:
            if condition.left.table_alias == node.right_node.alias:
                join_cols.add(condition.left.column)
            elif condition.right.table_alias == node.right_node.alias:
                join_cols.add(condition.right.column)

        for c in self.__referenced_columns:
            if c.table_name == node.right_node.alias and c.column_name not in join_cols:
                return node

        return JoinClause(
            left_node=node.left_node.accept(self),
            right_node=node.right_node,
            keys=node.keys,
            join_type=node.join_type,
            join_modifier=JoinModifier.ANY
            if node.join_type == JoinType.INNER else JoinModifier.SEMI,
        )
Exemple #2
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinDataSourcePlan:
        left_node = node.left_node.accept(self)
        right_node = self.visit_individual_node(node.right_node)

        # TODO: Actually return multiple plans for each subquery (one per storage
        # set) and rank them picking a combination that fits in a single storage
        # set or valid storage set combination.
        assert is_valid_storage_set_combination(
            left_node.storage_set_key, right_node.storage_set_key
        ), f"Incompatible storage sets found in plan: {left_node.storage_set_key} {right_node.storage_set_key}"

        # mypy does not know that the method above only produces individual nodes.
        assert isinstance(right_node.translated_source, IndividualNode)
        return JoinDataSourcePlan(
            translated_source=JoinClause(
                left_node=left_node.translated_source,
                right_node=right_node.translated_source,
                keys=node.keys,
                join_type=node.join_type,
                join_modifier=node.join_modifier,
            ),
            processors={
                **left_node.processors,
                **right_node.processors
            },
            storage_set_key=left_node.storage_set_key,
        )
Exemple #3
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinClause[Entity]:
        """
        This tweaks the names of the columns in the ON clause as they
        cannot reference the entity fields directly (as in the original
        query) but they have to reference the mangled aliases generated
        by SubqueriesInitializer.

        This is needed because Clickhouse does not know about the
        names of the SelectedExpression nodes in the AST. It is only
        aware of aliases as a way to reference expressions from the
        external query to the subqueries.
        """
        left = node.left_node.accept(self)
        right = self.visit_individual_node(node.right_node)
        keys = [
            JoinCondition(
                left=JoinConditionExpression(k.left.table_alias,
                                             aliasify_column(k.left.column)),
                right=JoinConditionExpression(k.right.table_alias,
                                              aliasify_column(k.right.column)),
            ) for k in node.keys
        ]
        return JoinClause(left, right, keys, node.join_type,
                          node.join_modifier)
Exemple #4
0
def test_complex_joins() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE)
    query = Query(
        from_clause=a,
        selected_columns=[
            SelectedExpression("id", Column("id", None, "id")),
            SelectedExpression("assigned_user",
                               Column("assigned_user", None, "user")),
        ],
    )
    node_query = IndividualNode(alias="assignee", data_source=query)

    join = JoinClause(
        left_node=JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "group_id"),
                    right=JoinConditionExpression("groups", "id"),
                )
            ],
            join_type=JoinType.INNER,
        ),
        right_node=node_query,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("assignee", "id"),
            )
        ],
        join_type=JoinType.INNER,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]),
        "groups": GROUPS_SCHEMA,
    }
Exemple #5
0
def test_simple_join() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    join = JoinClause(
        left_node=node_err,
        right_node=node_group,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("groups", "id"),
            )
        ],
        join_type=JoinType.INNER,
        join_modifier=JoinModifier.SEMI,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "groups": GROUPS_SCHEMA
    }

    joined_cols = join.get_columns()
    assert "err.group_id" in joined_cols
    assert "err.event_id" in joined_cols
    assert "groups.id" in joined_cols
    assert "groups.message" in joined_cols

    with pytest.raises(AssertionError):
        JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "missing_col"),
                    right=JoinConditionExpression("groups",
                                                  "another_missing_col"),
                )
            ],
            join_type=JoinType.INNER,
        )
Exemple #6
0
def events_groups_join(
    left: IndividualNode[TNode], right: IndividualNode[TNode],
) -> JoinClause[TNode]:
    return JoinClause(
        left_node=left,
        right_node=right,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("ev", "_snuba_group_id"),
                right=JoinConditionExpression("gr", "_snuba_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Exemple #7
0
def join_clause(lhs_alias: str, lhs: Union[str, JoinClause[QueryEntity]],
                rhs: str) -> JoinClause[QueryEntity]:
    rhs_alias, rhs = rhs.split(":", 1)
    return JoinClause(
        left_node=node(lhs_alias, lhs) if isinstance(lhs, str) else lhs,
        right_node=node(rhs_alias, rhs),
        keys=[
            JoinCondition(
                left=JoinConditionExpression(lhs_alias, "event_id"),
                right=JoinConditionExpression(rhs_alias, "event_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Exemple #8
0
def build_join_clause_loop(
    node_list: Node,
    lhs: Optional[Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]],
) -> Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]:
    rhs = node_list.entity_data
    if lhs is None:
        lhs = rhs
    else:
        assert node_list.relationship is not None  # mypy
        lhs = JoinClause(
            left_node=lhs,
            right_node=rhs,
            keys=node_list.join_conditions,
            join_type=node_list.relationship.join_type,
        )

    if node_list.child is None:
        return lhs

    return build_join_clause_loop(node_list.child, lhs)
Exemple #9
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Exemple #10
0
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
 AND t.project_id=1
 AND t.finish_ts>=toDateTime('2021-01-01T00:30:00')
 AND t.finish_ts<toDateTime('2021-01-07T00:30:00')""",
 CompositeQuery(
     from_clause=JoinClause(
         left_node=IndividualNode(
             "e",
             QueryEntity(
                 EntityKey.EVENTS,
                 get_entity(EntityKey.EVENTS).get_data_model(),
             ),
         ),
         right_node=IndividualNode(
             "t",
             QueryEntity(
                 EntityKey.TRANSACTIONS,
                 get_entity(EntityKey.TRANSACTIONS).get_data_model(),
             ),
         ),
         keys=[
             JoinCondition(
                 JoinConditionExpression("e", "event_id"),
                 JoinConditionExpression("t", "event_id"),
             )
         ],
         join_type=JoinType.INNER,
     ),
     selected_columns=[
         SelectedExpression(
             "4-5",
             FunctionCall(None, "minus",
      "SELECT column1, (avg(column2) AS sub_average), column3 "
      "FROM my_table "
      "WHERE eq((column3 AS al), 'blabla') "
      "GROUP BY column2"
      ") "
      "GROUP BY alias"),
     id="Composite query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=node_err,
             right_node=node_group,
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression("error_id",
                                Column("error_id", "err", "event_id")),
             SelectedExpression("message",
                                Column("message", "groups", "message")),
         ],
         condition=binary_condition("eq", Column(None, "groups", "id"),
                                    Literal(None, 1)),
     ),
     [
Exemple #13
0
    GROUPS_SCHEMA,
    Events,
    GroupAssignee,
    GroupedMessage,
)

TEST_CASES = [
    pytest.param(
        JoinClause(
            IndividualNode("ev",
                           EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA,
                                        None)),
            IndividualNode(
                "gr",
                EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
            [
                JoinCondition(
                    JoinConditionExpression("ev", "group_id"),
                    JoinConditionExpression("gr", "id"),
                )
            ],
            JoinType.INNER,
            None,
        ),
        {
            QualifiedCol(EntityKey.EVENTS, "group_id"): {
                QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"),
            },
            QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"): {
                QualifiedCol(EntityKey.EVENTS, "group_id"),
            },
            QualifiedCol(EntityKey.EVENTS, "project_id"): {
        )

    assert condition.transform(
        partial(_replace_col, "ev", "project_id", "gr",
                "project_id")) == binary_condition(
                    ConditionFunctions.EQ, Column(None, "gr", "project_id"),
                    Literal(None, 1))


ENTITY_GROUP_JOIN = JoinClause(
    IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)),
    IndividualNode(
        "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
    [
        JoinCondition(
            JoinConditionExpression("ev", "group_id"),
            JoinConditionExpression("gr", "id"),
        )
    ],
    JoinType.INNER,
    None,
)

TEST_REPLACEMENT = [
    pytest.param(
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        ENTITY_GROUP_JOIN,
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        id="No condition to add",
Exemple #15
0
from snuba.query.formatters.tracing import TExpression, format_query
from snuba.query.logical import Query as LogicalQuery
from tests.query.joins.equivalence_schema import (
    EVENTS_SCHEMA,
    GROUPS_SCHEMA,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
Exemple #17
0
)
from tests.query.joins.join_structures import (
    events_groups_join,
    events_node,
    groups_node,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[],
        ),
Exemple #18
0
     ),
     3,
     {"errors_local"},
     True,
     None,
     id="Nested query. Count the inner query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY),
             right_node=IndividualNode(
                 alias="groups", data_source=Table("groups_local", GROUPS_SCHEMA)
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "event_id",
                 FunctionCall("alias", "something", (Column(None, "err", "alias"),)),
             ),
             SelectedExpression(
                 "group_id",
                 Column("group_id", "groups", "group_id"),
             ),
Exemple #19
0
 from_clause=JoinClause(
     left_node=events_groups_join(
         clickhouse_events_node([
             SelectedExpression(
                 "_snuba_group_id",
                 Column("_snuba_group_id", None, "group_id"),
             ),
         ]),
         clickhouse_groups_node([
             SelectedExpression("_snuba_id",
                                Column("_snuba_id", None, "id")),
             SelectedExpression(
                 "_snuba_col1",
                 Column("_snuba_col1", None, "something")),
         ]),
     ),
     right_node=clickhouse_assignees_node([
         SelectedExpression(
             "_snuba_group_id",
             Column("_snuba_group_id", None, "group_id"),
         ),
         SelectedExpression(
             "_snuba_col1", Column("_snuba_col1", None,
                                   "something")),
     ]),
     keys=[
         JoinCondition(
             left=JoinConditionExpression("ev", "_snuba_group_id"),
             right=JoinConditionExpression("as", "_snuba_group_id"),
         )
     ],
     join_type=JoinType.INNER,
 ),
Exemple #20
0
     id="Special array join functions",
 ),
 pytest.param(
     "MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c",
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "4-5",
                 FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
             ),