Example #1
0
def test_nested_query() -> None:
    """
    Simply builds a nested query.
    """

    nested = LogicalQuery(
        Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])),
        selected_columns=[
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id"))
        ],
    )

    composite = CompositeQuery(
        from_clause=nested,
        selected_columns=[
            SelectedExpression("output", Column("output", None,
                                                "string_evt_id"))
        ],
    )

    # The iterator methods on the composite query do not descend into
    # the nested query
    assert composite.get_all_ast_referenced_columns() == {
        Column("output", None, "string_evt_id")
    }

    # The schema of the nested query is the selected clause of that query.
    assert composite.get_from_clause().get_columns() == ColumnSet([
        ("string_evt_id", Any())
    ])
def test_add_equivalent_condition(
    initial_condition: Expression,
    join_clause: JoinClause[EntitySource],
    expected_expr: Expression,
) -> None:
    ENTITY_IMPL[EntityKey.EVENTS] = Events()
    ENTITY_IMPL[EntityKey.GROUPEDMESSAGES] = GroupedMessage()

    query = CompositeQuery(
        from_clause=join_clause,
        selected_columns=[
            SelectedExpression(
                "group_id",
                FunctionCall("something", "f", (Column(None, "gr", "id"), )))
        ],
        condition=initial_condition,
    )
    add_equivalent_conditions(query)
    assert query.get_condition() == expected_expr

    ENTITY_IMPL.clear()
Example #3
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Example #4
0
def _plan_composite_query(query: CompositeQuery[Entity],
                          settings: RequestSettings) -> CompositeQueryPlan:
    """
    Produces a composite query plan out of a composite query.

    This is the bulk of the logic of The Composite Planner. It is kept
    in its own function because it needs to be used by the data source
    visitor when planning subqueries (which can be composite as well).
    """

    planned_data_source = CompositeDataSourcePlanner(settings).visit(
        query.get_from_clause())

    root_db_processors, aliased_db_processors = planned_data_source.get_db_processors(
    )

    return CompositeQueryPlan(
        # This is a mypy issue: https://github.com/python/mypy/issues/7520
        # At the time of writing generics in dataclasses are not properly
        # supported and mypy expects TQuery instead of CompositeQuery here.
        # If the issue is not fixed before we start enforcing this we will
        # have to restructure the query plan.
        query=CompositeQuery(
            from_clause=planned_data_source.translated_source,
            selected_columns=query.get_selected_columns(),
            array_join=query.get_arrayjoin(),
            condition=query.get_condition(),
            groupby=query.get_groupby(),
            having=query.get_having(),
            order_by=query.get_orderby(),
            limitby=query.get_limitby(),
            limit=query.get_limit(),
            offset=query.get_offset(),
            totals=query.has_totals(),
            granularity=query.get_granularity(),
        ),
        execution_strategy=CompositeExecutionStrategy(
            get_cluster(planned_data_source.storage_set_key),
            root_db_processors,
            aliased_db_processors,
            composite_processors=[SemiJoinOptimizer()],
        ),
        storage_set_key=planned_data_source.storage_set_key,
        root_processors=planned_data_source.root_processors,
        aliased_processors=planned_data_source.aliased_processors,
    )
Example #5
0
    def visit_query_exp(
        self, node: Node, visited_children: Iterable[Any]
    ) -> Union[LogicalQuery, CompositeQuery[QueryEntity]]:
        args: MutableMapping[str, Any] = {}
        (
            data_source,
            args["selected_columns"],
            args["groupby"],
            args["array_join"],
            args["condition"],
            args["having"],
            args["order_by"],
            args["limitby"],
            args["limit"],
            args["offset"],
            args["granularity"],
            args["totals"],
            _,
        ) = visited_children

        keys = list(args.keys())
        for k in keys:
            if isinstance(args[k], Node):
                del args[k]

        if "groupby" in args:
            if "selected_columns" not in args:
                args["selected_columns"] = args["groupby"]
            else:
                args["selected_columns"] = args["groupby"] + args["selected_columns"]

            args["groupby"] = map(lambda gb: gb.expression, args["groupby"])

        if isinstance(data_source, (CompositeQuery, LogicalQuery, JoinClause)):
            args["from_clause"] = data_source
            return CompositeQuery(**args)

        args.update({"prewhere": None, "from_clause": data_source})
        if isinstance(data_source, QueryEntity):
            # TODO: How sample rate gets stored needs to be addressed in a future PR
            args["sample"] = data_source.sample

        return LogicalQuery(**args)
Example #6
0
 CompositeQuery(
     from_clause=LogicalQuery(
         from_clause=events_ent,
         selected_columns=[
             SelectedExpression("project_id",
                                Column(None, None, "project_id")),
             SelectedExpression(
                 "count_environment",
                 FunctionCall(
                     "count_environment",
                     "uniq",
                     (SubscriptableReference(
                         None,
                         Column(None, None, "tags"),
                         Literal(None, "environment"),
                     ), ),
                 ),
             ),
         ],
         groupby=[Column(None, None, "project_id")],
         condition=binary_condition(
             BooleanFunctions.AND,
             binary_condition(
                 ConditionFunctions.EQ,
                 Column(None, None, "project_id"),
                 Literal(None, 1),
             ),
             binary_condition(
                 ConditionFunctions.GTE,
                 Column(None, None, "timestamp"),
                 Literal(None, datetime(2020, 1, 1, 12, 0)),
             ),
         ),
     ),
     selected_columns=[
         SelectedExpression(
             "average",
             FunctionCall("average", "avg",
                          (Column(None, None, "count_environment"), )),
         ),
     ],
 ),
 CompositeQuery(
     from_clause=LogicalQuery(
         QueryEntity(
             EntityKey.EVENTS,
             get_entity(EntityKey.EVENTS).get_data_model(),
         ),
         selected_columns=[
             SelectedExpression("title",
                                Column("_snuba_title", None, "title")),
             SelectedExpression(
                 "count", FunctionCall("_snuba_count", "count",
                                       tuple())),
         ],
         groupby=[Column("_snuba_title", None, "title")],
         condition=binary_condition(
             "and",
             binary_condition(
                 "equals",
                 Column("_snuba_project_id", None, "project_id"),
                 Literal(None, 1),
             ),
             binary_condition(
                 "and",
                 binary_condition(
                     "greaterOrEquals",
                     Column("_snuba_timestamp", None, "timestamp"),
                     Literal(None, datetime.datetime(2021, 1, 15, 0,
                                                     0)),
                 ),
                 binary_condition(
                     "less",
                     Column("_snuba_timestamp", None, "timestamp"),
                     Literal(None, datetime.datetime(2021, 1, 20, 0,
                                                     0)),
                 ),
             ),
         ),
     ),
     selected_columns=[
         SelectedExpression(
             "max_count",
             FunctionCall(
                 "_snuba_max_count",
                 "max",
                 (Column("_snuba_count", None, "_snuba_count"), ),
             ),
         ),
     ],
     limit=1000,
     offset=0,
 ),
Example #8
0
 CompositeQuery(
     from_clause=Query(
         Table("my_table", ColumnSet([])),
         selected_columns=[
             SelectedExpression("column1",
                                Column(None, None, "column1")),
             SelectedExpression(
                 "sub_average",
                 FunctionCall("sub_average", "avg",
                              (Column(None, None, "column2"), )),
             ),
             SelectedExpression("column3",
                                Column(None, None, "column3")),
         ],
         condition=binary_condition(
             "eq",
             lhs=Column("al", None, "column3"),
             rhs=Literal(None, "blabla"),
         ),
         groupby=[Column(None, None, "column2")],
     ),
     selected_columns=[
         SelectedExpression(
             "average",
             FunctionCall("average", "avg",
                          (Column(None, None, "sub_average"), )),
         ),
         SelectedExpression("alias", Column("alias", None, "column3")),
     ],
     groupby=[Column(None, None, "alias")],
 ),
Example #9
0
        "BY": "c8"
    },
    "LIMIT": 150,
}

TEST_JOIN = [
    pytest.param(
        LOGICAL_QUERY,
        SIMPLE_FORMATTED,
        id="Simple logical query",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=LOGICAL_QUERY,
            selected_columns=[
                SelectedExpression(
                    "f", FunctionCall("f", "avg", (Column(None, "t", "c"), )))
            ],
        ),
        {
            "FROM": SIMPLE_FORMATTED,
            "SELECT": [["f", ["f", "avg", ["t.c"]]]],
            "GROUPBY": [],
            "ORDERBY": [],
        },
        id="Nested Query",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[
Example #10
0
        FunctionCall(None, "tuple", (Literal(None, 1), Literal(None, 2))),
    ),
)

TEST_CASES = [
    pytest.param(
        SIMPLE_QUERY,
        {1, 2},
        id="Simple Query",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=SIMPLE_QUERY,
            selected_columns=[
                SelectedExpression(
                    "alias",
                    FunctionCall("alias", "something",
                                 (Column(None, None, "alias"), )),
                )
            ],
        ),
        {1, 2},
        id="Nested query. Project from the inner query",
    ),
]


@pytest.mark.parametrize(
    "query, expected_proj",
    TEST_CASES,
)
def test_count_columns(
Example #11
0
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=BASIC_JOIN,
            selected_columns=[],
        ),
        CompositeQuery(
            from_clause=events_groups_join(
                events_node([
                    SelectedExpression(
                        "_snuba_group_id",
                        Column("_snuba_group_id", None, "group_id"),
                    ),
                ]),
                groups_node([
                    SelectedExpression("_snuba_id",
                                       Column("_snuba_id", None, "id"))
                ], ),
            ),
            selected_columns=[],
Example #12
0
TEST_CASES = [
    pytest.param(
        SIMPLE_QUERY,
        3,
        {"errors_local"},
        True,
        0.1,
        id="Simple Query",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=SIMPLE_QUERY,
            selected_columns=[
                SelectedExpression(
                    "alias",
                    FunctionCall("alias", "something", (Column(None, None, "alias"),)),
                )
            ],
        ),
        3,
        {"errors_local"},
        True,
        None,
        id="Nested query. Count the inner query",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=JoinClause(
                left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY),
                right_node=IndividualNode(
Example #13
0
 CompositeQuery(
     from_clause=JoinClause(
         left_node=IndividualNode(
             "e",
             QueryEntity(
                 EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
             ),
         ),
         right_node=IndividualNode(
             "t",
             QueryEntity(
                 EntityKey.TRANSACTIONS,
                 get_entity(EntityKey.TRANSACTIONS).get_data_model(),
             ),
         ),
         keys=[
             JoinCondition(
                 JoinConditionExpression("e", "event_id"),
                 JoinConditionExpression("t", "event_id"),
             )
         ],
         join_type=JoinType.INNER,
     ),
     selected_columns=[
         SelectedExpression(
             "4-5",
             FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
         ),
         SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")),
     ],
 ),
Example #14
0
    clickhouse_assignees_node,
    clickhouse_events_node,
    clickhouse_groups_node,
    events_groups_join,
)

TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=events_groups_join(
                clickhouse_events_node([
                    SelectedExpression(
                        "_snuba_group_id",
                        Column("_snuba_group_id", None, "group_id"),
                    ),
                ]),
                clickhouse_groups_node([
                    SelectedExpression("_snuba_id",
                                       Column("_snuba_id", None, "id"))
                ], ),
            ),
            selected_columns=[],
        ),
        {"gr": JoinModifier.ANY},
        id="Simple two table query with no reference. Semi join",
    ),
    pytest.param(
        CompositeQuery(
            from_clause=events_groups_join(
                clickhouse_events_node([
                    SelectedExpression(
Example #15
0
 CompositeQuery(
     from_clause=events_groups_join(
         clickhouse_events_node(
             [
                 SelectedExpression(
                     "_snuba_group_id",
                     Column("_snuba_group_id", None, "group_id"),
                 ),
             ],
             binary_condition(
                 BooleanFunctions.AND,
                 binary_condition(
                     BooleanFunctions.AND,
                     binary_condition(
                         ConditionFunctions.GTE,
                         Column(None, None, "timestamp"),
                         Literal(None, datetime(2020, 8, 1)),
                     ),
                     binary_condition(
                         ConditionFunctions.LT,
                         Column(None, None, "timestamp"),
                         Literal(None, datetime(2020, 9, 1)),
                     ),
                 ),
                 binary_condition(
                     ConditionFunctions.EQ,
                     build_mapping_expr(
                         "tags[asd]",
                         None,
                         "tags",
                         Literal(None, "asd"),
                     ),
                     Literal(None, "sdf"),
                 ),
             ),
             [Column("_snuba_group_id", None, "group_id")],
         ),
         clickhouse_groups_node([
             SelectedExpression("_snuba_id",
                                Column("_snuba_id", None, "id"))
         ], ),
     ),
     selected_columns=[],
 ),