コード例 #1
0
    def visit_join_clause(self, node: JoinClause[Table]) -> JoinClause[Table]:
        # TODO: Skip this optimization if the left side of the query
        # runs aggregations that imply the cardinality of the right
        # side is important.

        join_cols = set()

        for condition in node.keys:
            if condition.left.table_alias == node.right_node.alias:
                join_cols.add(condition.left.column)
            elif condition.right.table_alias == node.right_node.alias:
                join_cols.add(condition.right.column)

        for c in self.__referenced_columns:
            if c.table_name == node.right_node.alias and c.column_name not in join_cols:
                return node

        return JoinClause(
            left_node=node.left_node.accept(self),
            right_node=node.right_node,
            keys=node.keys,
            join_type=node.join_type,
            join_modifier=JoinModifier.ANY
            if node.join_type == JoinType.INNER else JoinModifier.SEMI,
        )
コード例 #2
0
ファイル: composite.py プロジェクト: pombredanne/snuba
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinDataSourcePlan:
        left_node = node.left_node.accept(self)
        right_node = self.visit_individual_node(node.right_node)

        # TODO: Actually return multiple plans for each subquery (one per storage
        # set) and rank them picking a combination that fits in a single storage
        # set or valid storage set combination.
        assert is_valid_storage_set_combination(
            left_node.storage_set_key, right_node.storage_set_key
        ), f"Incompatible storage sets found in plan: {left_node.storage_set_key} {right_node.storage_set_key}"

        # mypy does not know that the method above only produces individual nodes.
        assert isinstance(right_node.translated_source, IndividualNode)
        return JoinDataSourcePlan(
            translated_source=JoinClause(
                left_node=left_node.translated_source,
                right_node=right_node.translated_source,
                keys=node.keys,
                join_type=node.join_type,
                join_modifier=node.join_modifier,
            ),
            processors={
                **left_node.processors,
                **right_node.processors
            },
            storage_set_key=left_node.storage_set_key,
        )
コード例 #3
0
    def visit_join_clause(self,
                          node: JoinClause[Entity]) -> JoinClause[Entity]:
        """
        This tweaks the names of the columns in the ON clause as they
        cannot reference the entity fields directly (as in the original
        query) but they have to reference the mangled aliases generated
        by SubqueriesInitializer.

        This is needed because Clickhouse does not know about the
        names of the SelectedExpression nodes in the AST. It is only
        aware of aliases as a way to reference expressions from the
        external query to the subqueries.
        """
        left = node.left_node.accept(self)
        right = self.visit_individual_node(node.right_node)
        keys = [
            JoinCondition(
                left=JoinConditionExpression(k.left.table_alias,
                                             aliasify_column(k.left.column)),
                right=JoinConditionExpression(k.right.table_alias,
                                              aliasify_column(k.right.column)),
            ) for k in node.keys
        ]
        return JoinClause(left, right, keys, node.join_type,
                          node.join_modifier)
コード例 #4
0
ファイル: test_join.py プロジェクト: pombredanne/snuba
def test_complex_joins() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE)
    query = Query(
        from_clause=a,
        selected_columns=[
            SelectedExpression("id", Column("id", None, "id")),
            SelectedExpression("assigned_user",
                               Column("assigned_user", None, "user")),
        ],
    )
    node_query = IndividualNode(alias="assignee", data_source=query)

    join = JoinClause(
        left_node=JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "group_id"),
                    right=JoinConditionExpression("groups", "id"),
                )
            ],
            join_type=JoinType.INNER,
        ),
        right_node=node_query,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("assignee", "id"),
            )
        ],
        join_type=JoinType.INNER,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]),
        "groups": GROUPS_SCHEMA,
    }
コード例 #5
0
ファイル: test_join.py プロジェクト: pombredanne/snuba
def test_simple_join() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    join = JoinClause(
        left_node=node_err,
        right_node=node_group,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("groups", "id"),
            )
        ],
        join_type=JoinType.INNER,
        join_modifier=JoinModifier.SEMI,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "groups": GROUPS_SCHEMA
    }

    joined_cols = join.get_columns()
    assert "err.group_id" in joined_cols
    assert "err.event_id" in joined_cols
    assert "groups.id" in joined_cols
    assert "groups.message" in joined_cols

    with pytest.raises(AssertionError):
        JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "missing_col"),
                    right=JoinConditionExpression("groups",
                                                  "another_missing_col"),
                )
            ],
            join_type=JoinType.INNER,
        )
コード例 #6
0
def events_groups_join(
    left: IndividualNode[TNode], right: IndividualNode[TNode],
) -> JoinClause[TNode]:
    return JoinClause(
        left_node=left,
        right_node=right,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("ev", "_snuba_group_id"),
                right=JoinConditionExpression("gr", "_snuba_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
コード例 #7
0
def join_clause(lhs_alias: str, lhs: Union[str, JoinClause[QueryEntity]],
                rhs: str) -> JoinClause[QueryEntity]:
    rhs_alias, rhs = rhs.split(":", 1)
    return JoinClause(
        left_node=node(lhs_alias, lhs) if isinstance(lhs, str) else lhs,
        right_node=node(rhs_alias, rhs),
        keys=[
            JoinCondition(
                left=JoinConditionExpression(lhs_alias, "event_id"),
                right=JoinConditionExpression(rhs_alias, "event_id"),
            )
        ],
        join_type=JoinType.INNER,
    )
コード例 #8
0
ファイル: joins.py プロジェクト: getsentry/snuba
def build_join_clause_loop(
    node_list: Node,
    lhs: Optional[Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]],
) -> Union[IndividualNode[QueryEntity], JoinClause[QueryEntity]]:
    rhs = node_list.entity_data
    if lhs is None:
        lhs = rhs
    else:
        assert node_list.relationship is not None  # mypy
        lhs = JoinClause(
            left_node=lhs,
            right_node=rhs,
            keys=node_list.join_conditions,
            join_type=node_list.relationship.join_type,
        )

    if node_list.child is None:
        return lhs

    return build_join_clause_loop(node_list.child, lhs)
コード例 #9
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
コード例 #10
0
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
コード例 #11
0
 AND t.project_id=1
 AND t.finish_ts>=toDateTime('2021-01-01T00:30:00')
 AND t.finish_ts<toDateTime('2021-01-07T00:30:00')""",
 CompositeQuery(
     from_clause=JoinClause(
         left_node=IndividualNode(
             "e",
             QueryEntity(
                 EntityKey.EVENTS,
                 get_entity(EntityKey.EVENTS).get_data_model(),
             ),
         ),
         right_node=IndividualNode(
             "t",
             QueryEntity(
                 EntityKey.TRANSACTIONS,
                 get_entity(EntityKey.TRANSACTIONS).get_data_model(),
             ),
         ),
         keys=[
             JoinCondition(
                 JoinConditionExpression("e", "event_id"),
                 JoinConditionExpression("t", "event_id"),
             )
         ],
         join_type=JoinType.INNER,
     ),
     selected_columns=[
         SelectedExpression(
             "4-5",
             FunctionCall(None, "minus",
コード例 #12
0
      "SELECT column1, (avg(column2) AS sub_average), column3 "
      "FROM my_table "
      "WHERE eq((column3 AS al), 'blabla') "
      "GROUP BY column2"
      ") "
      "GROUP BY alias"),
     id="Composite query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=node_err,
             right_node=node_group,
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression("error_id",
                                Column("error_id", "err", "event_id")),
             SelectedExpression("message",
                                Column("message", "groups", "message")),
         ],
         condition=binary_condition("eq", Column(None, "groups", "id"),
                                    Literal(None, 1)),
     ),
     [
コード例 #13
0
    GROUPS_SCHEMA,
    Events,
    GroupAssignee,
    GroupedMessage,
)

TEST_CASES = [
    pytest.param(
        JoinClause(
            IndividualNode("ev",
                           EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA,
                                        None)),
            IndividualNode(
                "gr",
                EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
            [
                JoinCondition(
                    JoinConditionExpression("ev", "group_id"),
                    JoinConditionExpression("gr", "id"),
                )
            ],
            JoinType.INNER,
            None,
        ),
        {
            QualifiedCol(EntityKey.EVENTS, "group_id"): {
                QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"),
            },
            QualifiedCol(EntityKey.GROUPEDMESSAGES, "id"): {
                QualifiedCol(EntityKey.EVENTS, "group_id"),
            },
            QualifiedCol(EntityKey.EVENTS, "project_id"): {
コード例 #14
0
        )

    assert condition.transform(
        partial(_replace_col, "ev", "project_id", "gr",
                "project_id")) == binary_condition(
                    ConditionFunctions.EQ, Column(None, "gr", "project_id"),
                    Literal(None, 1))


ENTITY_GROUP_JOIN = JoinClause(
    IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)),
    IndividualNode(
        "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
    [
        JoinCondition(
            JoinConditionExpression("ev", "group_id"),
            JoinConditionExpression("gr", "id"),
        )
    ],
    JoinType.INNER,
    None,
)

TEST_REPLACEMENT = [
    pytest.param(
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        ENTITY_GROUP_JOIN,
        binary_condition(ConditionFunctions.EQ, Column(None, "ev", "event_id"),
                         Literal(None, 1)),
        id="No condition to add",
コード例 #15
0
from snuba.query.formatters.tracing import TExpression, format_query
from snuba.query.logical import Query as LogicalQuery
from tests.query.joins.equivalence_schema import (
    EVENTS_SCHEMA,
    GROUPS_SCHEMA,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
コード例 #16
0
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "f_release",
                 FunctionCall(
                     "f_release",
                     "f",
                     (Column(None, "err", "release"), ),
コード例 #17
0
)
from tests.query.joins.join_structures import (
    events_groups_join,
    events_node,
    groups_node,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[],
        ),
コード例 #18
0
ファイル: test_count_columns.py プロジェクト: getsentry/snuba
     ),
     3,
     {"errors_local"},
     True,
     None,
     id="Nested query. Count the inner query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY),
             right_node=IndividualNode(
                 alias="groups", data_source=Table("groups_local", GROUPS_SCHEMA)
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "event_id",
                 FunctionCall("alias", "something", (Column(None, "err", "alias"),)),
             ),
             SelectedExpression(
                 "group_id",
                 Column("group_id", "groups", "group_id"),
             ),
コード例 #19
0
ファイル: test_semi_join.py プロジェクト: getsentry/snuba
 from_clause=JoinClause(
     left_node=events_groups_join(
         clickhouse_events_node([
             SelectedExpression(
                 "_snuba_group_id",
                 Column("_snuba_group_id", None, "group_id"),
             ),
         ]),
         clickhouse_groups_node([
             SelectedExpression("_snuba_id",
                                Column("_snuba_id", None, "id")),
             SelectedExpression(
                 "_snuba_col1",
                 Column("_snuba_col1", None, "something")),
         ]),
     ),
     right_node=clickhouse_assignees_node([
         SelectedExpression(
             "_snuba_group_id",
             Column("_snuba_group_id", None, "group_id"),
         ),
         SelectedExpression(
             "_snuba_col1", Column("_snuba_col1", None,
                                   "something")),
     ]),
     keys=[
         JoinCondition(
             left=JoinConditionExpression("ev", "_snuba_group_id"),
             right=JoinConditionExpression("as", "_snuba_group_id"),
         )
     ],
     join_type=JoinType.INNER,
 ),
コード例 #20
0
     id="Special array join functions",
 ),
 pytest.param(
     "MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c",
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "4-5",
                 FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
             ),