Exemple #1
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinClause[Entity]:
        """
        This tweaks the names of the columns in the ON clause as they
        cannot reference the entity fields directly (as in the original
        query) but they have to reference the mangled aliases generated
        by SubqueriesInitializer.

        This is needed because Clickhouse does not know about the
        names of the SelectedExpression nodes in the AST. It is only
        aware of aliases as a way to reference expressions from the
        external query to the subqueries.
        """
        left = node.left_node.accept(self)
        right = self.visit_individual_node(node.right_node)
        keys = [
            JoinCondition(
                left=JoinConditionExpression(k.left.table_alias,
                                             aliasify_column(k.left.column)),
                right=JoinConditionExpression(k.right.table_alias,
                                              aliasify_column(k.right.column)),
            ) for k in node.keys
        ]
        return JoinClause(left, right, keys, node.join_type,
                          node.join_modifier)
Exemple #2
0
def test_complex_joins() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE)
    query = Query(
        from_clause=a,
        selected_columns=[
            SelectedExpression("id", Column("id", None, "id")),
            SelectedExpression("assigned_user",
                               Column("assigned_user", None, "user")),
        ],
    )
    node_query = IndividualNode(alias="assignee", data_source=query)

    join = JoinClause(
        left_node=JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "group_id"),
                    right=JoinConditionExpression("groups", "id"),
                )
            ],
            join_type=JoinType.INNER,
        ),
        right_node=node_query,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("assignee", "id"),
            )
        ],
        join_type=JoinType.INNER,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]),
        "groups": GROUPS_SCHEMA,
    }
Exemple #3
0
def test_simple_join() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    join = JoinClause(
        left_node=node_err,
        right_node=node_group,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("groups", "id"),
            )
        ],
        join_type=JoinType.INNER,
        join_modifier=JoinModifier.SEMI,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "groups": GROUPS_SCHEMA
    }

    joined_cols = join.get_columns()
    assert "err.group_id" in joined_cols
    assert "err.event_id" in joined_cols
    assert "groups.id" in joined_cols
    assert "groups.message" in joined_cols

    with pytest.raises(AssertionError):
        JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "missing_col"),
                    right=JoinConditionExpression("groups",
                                                  "another_missing_col"),
                )
            ],
            join_type=JoinType.INNER,
        )
Exemple #4
0
def join_clause(lhs_alias: str, lhs: Union[str, JoinClause[QueryEntity]],
                rhs: str) -> JoinClause[QueryEntity]:
    rhs_alias, rhs = rhs.split(":", 1)
    return JoinClause(
        left_node=node(lhs_alias, lhs) if isinstance(lhs, str) else lhs,
        right_node=node(rhs_alias, rhs),
        keys=[
            JoinCondition(
                left=JoinConditionExpression(lhs_alias, "event_id"),
                right=JoinConditionExpression(rhs_alias, "event_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Exemple #5
0
def events_groups_join(
    left: IndividualNode[TNode], right: IndividualNode[TNode],
) -> JoinClause[TNode]:
    return JoinClause(
        left_node=left,
        right_node=right,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("ev", "_snuba_group_id"),
                right=JoinConditionExpression("gr", "_snuba_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
Exemple #6
0
    def build_join_conditions(self, rhs: Node) -> None:
        if rhs.relationship is None:
            return

        join_conditions = []
        for lhs_column, rhs_column in rhs.relationship.columns:
            join_conditions.append(
                JoinCondition(
                    left=JoinConditionExpression(self.entity_data.alias,
                                                 lhs_column),
                    right=JoinConditionExpression(rhs.entity_data.alias,
                                                  rhs_column),
                ))

        # The join conditions are put into the right hand side since the left hand side
        # can have many children, each with different join conditions. This way each child
        # tracks how it is joined to its parent, since each child has exactly one parent.
        rhs.join_conditions = join_conditions
Exemple #7
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Exemple #8
0
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
                 ),
             ),
             SelectedExpression(
                 "_snuba_right",
         "e",
         QueryEntity(
             EntityKey.EVENTS,
             get_entity(EntityKey.EVENTS).get_data_model(),
         ),
     ),
     right_node=IndividualNode(
         "t",
         QueryEntity(
             EntityKey.TRANSACTIONS,
             get_entity(EntityKey.TRANSACTIONS).get_data_model(),
         ),
     ),
     keys=[
         JoinCondition(
             JoinConditionExpression("e", "event_id"),
             JoinConditionExpression("t", "event_id"),
         )
     ],
     join_type=JoinType.INNER,
 ),
 selected_columns=[
     SelectedExpression(
         "4-5",
         FunctionCall(None, "minus",
                      (Literal(None, 4), Literal(None, 5))),
     ),
     SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")),
 ],
 condition=binary_condition(
     "and",
     binary_condition(
      "SELECT column1, (avg(column2) AS sub_average), column3 "
      "FROM my_table "
      "WHERE eq((column3 AS al), 'blabla') "
      "GROUP BY column2"
      ") "
      "GROUP BY alias"),
     id="Composite query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=node_err,
             right_node=node_group,
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression("error_id",
                                Column("error_id", "err", "event_id")),
             SelectedExpression("message",
                                Column("message", "groups", "message")),
         ],
         condition=binary_condition("eq", Column(None, "groups", "id"),
                                    Literal(None, 1)),
     ),
     [
         "SELECT (err.event_id AS error_id), (groups.message AS message)",
Exemple #11
0
    GroupAssignee,
    GroupedMessage,
)

TEST_CASES = [
    pytest.param(
        JoinClause(
            IndividualNode("ev",
                           EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA,
                                        None)),
            IndividualNode(
                "gr",
                EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
            [
                JoinCondition(
                    JoinConditionExpression("ev", "group_id"),
                    JoinConditionExpression("gr", "id"),
                )
            ],
            JoinType.INNER,
            None,
        ),
        {
            QualifiedCol(EntityKey.EVENTS, "group_id"): {
                QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"),
            },
            QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"): {
                QualifiedCol(EntityKey.EVENTS, "group_id"),
            },
            QualifiedCol(EntityKey.EVENTS, "project_id"): {
                QualifiedCol(EntityKey.GROUPEDMESSAGES, "project_id"),
            },
Exemple #12
0
    EVENTS_SCHEMA,
    GROUPS_SCHEMA,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
    ],
    array_join=Column(None, None, "col"),
    condition=binary_condition("equals", Column(None, None, "c4"),
                               Literal(None, "asd")),
Exemple #13
0
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "4-5",
                 FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
             ),
             SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")),
         ],
     ),
     id="Basic join match",
 ),
 pytest.param(
Exemple #14
0
                         "_snuba_col1",
                         Column("_snuba_col1", None, "something")),
                 ]),
             ),
             right_node=clickhouse_assignees_node([
                 SelectedExpression(
                     "_snuba_group_id",
                     Column("_snuba_group_id", None, "group_id"),
                 ),
                 SelectedExpression(
                     "_snuba_col1", Column("_snuba_col1", None,
                                           "something")),
             ]),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("ev", "_snuba_group_id"),
                     right=JoinConditionExpression("as", "_snuba_group_id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression("group_id",
                                Column("_snuba_col1", "gr", "_snuba_col1"))
         ],
     ),
     {
         "gr": None,
         "as": JoinModifier.ANY
     },
     id="Multi table join, make only the right one a semi join.",
 ),
        )

    assert condition.transform(
        partial(_replace_col, "ev", "project_id", "gr",
                "project_id")) == binary_condition(
                    ConditionFunctions.EQ, Column(None, "gr", "project_id"),
                    Literal(None, 1))


ENTITY_GROUP_JOIN = JoinClause(
    IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)),
    IndividualNode(
        "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
    [
        JoinCondition(
            JoinConditionExpression("ev", "group_id"),
            JoinConditionExpression("gr", "id"),
        )
    ],
    JoinType.INNER,
    None,
)

TEST_REPLACEMENT = [
    pytest.param(
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        ENTITY_GROUP_JOIN,
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        id="No condition to add",
    ),