Ejemplo n.º 1
0
def test_outcomes_columns_validation(key: EntityKey) -> None:
    entity = get_entity(key)

    query_entity = QueryEntity(key, entity.get_data_model())

    bad_query = LogicalQuery(
        query_entity,
        selected_columns=[
            SelectedExpression("asdf", Column("_snuba_asdf", None, "asdf")),
        ],
    )

    good_query = LogicalQuery(
        query_entity,
        selected_columns=[
            SelectedExpression(
                column.name, Column(f"_snuba_{column.name}", None,
                                    column.name))
            for column in entity.get_data_model().columns
        ],
    )

    validator = EntityContainsColumnsValidator(
        entity.get_data_model(), validation_mode=ColumnValidationMode.ERROR)

    with pytest.raises(InvalidQueryException):
        validator.validate(bad_query)

    validator.validate(good_query)
Ejemplo n.º 2
0
def test_nested_query() -> None:
    """
    Simply builds a nested query.
    """

    nested = LogicalQuery(
        Entity(EntityKey.EVENTS, ColumnSet([("event_id", String())])),
        selected_columns=[
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id"))
        ],
    )

    composite = CompositeQuery(
        from_clause=nested,
        selected_columns=[
            SelectedExpression("output", Column("output", None,
                                                "string_evt_id"))
        ],
    )

    # The iterator methods on the composite query do not descend into
    # the nested query
    assert composite.get_all_ast_referenced_columns() == {
        Column("output", None, "string_evt_id")
    }

    # The schema of the nested query is the selected clause of that query.
    assert composite.get_from_clause().get_columns() == ColumnSet([
        ("string_evt_id", Any())
    ])
Ejemplo n.º 3
0
def query() -> ClickhouseQuery:
    return ClickhouseQuery(
        LogicalQuery(
            {"conditions": [("project_id", "IN", [2])]},
            TableSource("my_table", ColumnSet([])),
            condition=build_in("project_id", [2]),
        ))
Ejemplo n.º 4
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        selected_cols = query.get_selected_columns()
        assert selected_cols == [
            c.expression.column_name
            for c in query.get_selected_columns_from_ast() or []
            if isinstance(c.expression, Column)
        ]
        if selected_cols == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_cols == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(f"Unexpected selected columns: {selected_cols}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": list(second_query_data[0].keys()),
                "conditions": [""],
                "orderby": "events.event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
            selected_columns=[
                SelectedExpression(name=col_name,
                                   expression=Column(None, None, col_name))
                for col_name in second_query_data[0].keys()
            ],
        ))

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
Ejemplo n.º 5
0
 def query_fn(cond: Optional[Expression]) -> LogicalQuery:
     return LogicalQuery(
         QueryEntity(key, entity.get_data_model()),
         selected_columns=[
             SelectedExpression(
                 "time", Column("_snuba_timestamp", None, "timestamp")),
         ],
         condition=cond,
     )
Ejemplo n.º 6
0
 def build_query(self) -> ProcessableQuery[Entity]:
     return LogicalQuery(
         from_clause=self.__data_source,
         selected_columns=list(
             sorted(self.__selected_expressions,
                    key=lambda selected: selected.name)),
         condition=combine_and_conditions(self.__conditions)
         if self.__conditions else None,
     )
Ejemplo n.º 7
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Ejemplo n.º 8
0
def test_entity_validation_failure(key: EntityKey,
                                   condition: Optional[Expression]) -> None:
    entity = get_entity(key)
    query = LogicalQuery(
        QueryEntity(key, entity.get_data_model()),
        selected_columns=[
            SelectedExpression("time",
                               Column("_snuba_timestamp", None, "timestamp")),
        ],
        condition=condition,
    )

    assert not entity.validate_required_conditions(query)
Ejemplo n.º 9
0
def build_node(
    alias: str,
    from_clause: Entity,
    selected_columns: Sequence[SelectedExpression],
    condition: Optional[Expression],
) -> IndividualNode[Entity]:
    return IndividualNode(
        alias=alias,
        data_source=LogicalQuery(
            from_clause=from_clause,
            selected_columns=selected_columns,
            condition=condition,
        ),
    )
Ejemplo n.º 10
0
def test_entity_validation(key: EntityKey,
                           condition: Optional[Expression]) -> None:
    query = LogicalQuery(
        QueryEntity(key,
                    get_entity(key).get_data_model()),
        selected_columns=[
            SelectedExpression("time",
                               Column("_snuba_timestamp", None, "timestamp")),
        ],
        condition=condition,
    )

    validator = EntityRequiredColumnValidator({"project_id"})
    validator.validate(query)
def test_no_time_based_validation(key: EntityKey,
                                  condition: Expression) -> None:
    entity = get_entity(key)
    query = LogicalQuery(
        QueryEntity(key, entity.get_data_model()),
        selected_columns=[
            SelectedExpression("time",
                               Column("_snuba_timestamp", None, "timestamp")),
        ],
        condition=condition,
    )

    assert entity.required_time_column is not None
    validator = NoTimeBasedConditionValidator(entity.required_time_column)
    validator.validate(query)
Ejemplo n.º 12
0
    def visit_query_exp(
        self, node: Node, visited_children: Iterable[Any]
    ) -> Union[LogicalQuery, CompositeQuery[QueryEntity]]:
        args: MutableMapping[str, Any] = {}
        (
            data_source,
            args["selected_columns"],
            args["groupby"],
            args["array_join"],
            args["condition"],
            args["having"],
            args["order_by"],
            args["limitby"],
            args["limit"],
            args["offset"],
            args["granularity"],
            args["totals"],
            _,
        ) = visited_children

        keys = list(args.keys())
        for k in keys:
            if isinstance(args[k], Node):
                del args[k]

        if "groupby" in args:
            if "selected_columns" not in args:
                args["selected_columns"] = args["groupby"]
            else:
                args["selected_columns"] = args["groupby"] + args["selected_columns"]

            args["groupby"] = map(lambda gb: gb.expression, args["groupby"])

        if isinstance(data_source, (CompositeQuery, LogicalQuery, JoinClause)):
            args["from_clause"] = data_source
            return CompositeQuery(**args)

        args.update({"prewhere": None, "from_clause": data_source})
        if isinstance(data_source, QueryEntity):
            # TODO: How sample rate gets stored needs to be addressed in a future PR
            args["sample"] = data_source.sample

        return LogicalQuery(**args)
Ejemplo n.º 13
0
def test_no_split(
    dataset_name: str, id_column: str, project_column: str, timestamp_column: str
) -> None:
    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": ["event_id"],
                "conditions": [""],
                "orderby": "event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
        )
    )

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        assert query == query
        return QueryResult({}, {})

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80, set(), True),
        [],
        [
            ColumnSplitQueryStrategy(
                id_column=id_column,
                project_column=project_column,
                timestamp_column=timestamp_column,
            ),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
 from_clause=LogicalQuery(
     QueryEntity(
         EntityKey.EVENTS,
         get_entity(EntityKey.EVENTS).get_data_model(),
     ),
     selected_columns=[
         SelectedExpression("title",
                            Column("_snuba_title", None, "title")),
         SelectedExpression(
             "count", FunctionCall("_snuba_count", "count",
                                   tuple())),
     ],
     groupby=[Column("_snuba_title", None, "title")],
     condition=binary_condition(
         "and",
         binary_condition(
             "equals",
             Column("_snuba_project_id", None, "project_id"),
             Literal(None, 1),
         ),
         binary_condition(
             "and",
             binary_condition(
                 "greaterOrEquals",
                 Column("_snuba_timestamp", None, "timestamp"),
                 Literal(None, datetime.datetime(2021, 1, 15, 0,
                                                 0)),
             ),
             binary_condition(
                 "less",
                 Column("_snuba_timestamp", None, "timestamp"),
                 Literal(None, datetime.datetime(2021, 1, 20, 0,
                                                 0)),
             ),
         ),
     ),
 ),
Ejemplo n.º 15
0
TEST_CASES = [
    pytest.param(
        CompositeQuery(
            from_clause=LogicalQuery(
                from_clause=events_ent,
                selected_columns=[
                    SelectedExpression("project_id",
                                       Column(None, None, "project_id")),
                    SelectedExpression(
                        "count_release",
                        FunctionCall(
                            "count_release",
                            "uniq",
                            (SubscriptableReference(
                                None,
                                Column(None, None, "tags"),
                                Literal(None, "sentry:release"),
                            ), ),
                        ),
                    ),
                ],
                groupby=[Column(None, None, "project_id")],
                condition=binary_condition(
                    ConditionFunctions.EQ,
                    Column(None, None, "project_id"),
                    Literal(None, 1),
                ),
            ),
            selected_columns=[
                SelectedExpression(
                    "average",
Ejemplo n.º 16
0
def test_events_boolean_context() -> None:
    columns = ColumnSet([
        ("device_charging", Nullable(UInt(8))),
        ("contexts", Nested([("key", String()), ("value", String())])),
    ])
    query = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "arrayElement",
                        (
                            Column(None, None, "contexts.value"),
                            FunctionCall(
                                None,
                                "indexOf",
                                (
                                    Column(None, None, "contexts.key"),
                                    Literal(None, "device.charging"),
                                ),
                            ),
                        ),
                    ),
                )
            ],
        ))

    expected = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "multiIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.EQ,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                Literal(None, ""),
                            ),
                            Literal(None, ""),
                            binary_condition(
                                None,
                                ConditionFunctions.IN,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                literals_tuple(None, [
                                    Literal(None, "1"),
                                    Literal(None, "True")
                                ]),
                            ),
                            Literal(None, "True"),
                            Literal(None, "False"),
                        ),
                    ),
                )
            ],
        ))

    settings = HTTPRequestSettings()
    MappingColumnPromoter({
        "contexts": {
            "device.charging": "device_charging"
        }
    }).process_query(query, settings)
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert (query.get_selected_columns_from_ast() ==
            expected.get_selected_columns_from_ast())
Ejemplo n.º 17
0
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
    from_clause=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, 0.5),
    selected_columns=[
        SelectedExpression("c1", Column("_snuba_c1", "t", "c")),
        SelectedExpression(
            "f1", FunctionCall("_snuba_f1", "f", (Column(None, "t", "c2"), ))),
    ],
    array_join=Column(None, None, "col"),
    condition=binary_condition("equals", Column(None, None, "c4"),
                               Literal(None, "asd")),
    groupby=[Column(None, "t", "c4")],
    having=binary_condition("equals", Column(None, None, "c6"),
                            Literal(None, "asd2")),
    order_by=[OrderBy(OrderByDirection.ASC, Column(None, "t", "c"))],
    limitby=LimitBy(100, Column(None, None, "c8")),
    limit=150,
)

SIMPLE_FORMATTED = {
    "FROM": {
        "ENTITY": EntityKey.EVENTS,
        "SAMPLE": "0.5"
    },
    "SELECT": [["c1", "(t.c AS _snuba_c1)"],
Ejemplo n.º 18
0
         groups_node([
             SelectedExpression(
                 "_snuba_group_id",
                 Column("_snuba_group_id", None, "id"),
             ),
             SelectedExpression("_snuba_id",
                                Column("_snuba_id", None, "id")),
         ], ),
     ),
     right_node=IndividualNode(
         alias="as",
         data_source=LogicalQuery(
             from_clause=Entity(EntityKey.GROUPASSIGNEE,
                                GROUPS_ASSIGNEE),
             selected_columns=[
                 SelectedExpression(
                     "_snuba_group_id",
                     Column("_snuba_group_id", None, "group_id"),
                 ),
             ],
         ),
     ),
     keys=[
         JoinCondition(
             left=JoinConditionExpression("ev", "_snuba_group_id"),
             right=JoinConditionExpression("as", "_snuba_group_id"),
         )
     ],
     join_type=JoinType.INNER,
 ),
 selected_columns=[
     SelectedExpression(
Ejemplo n.º 19
0

class EntityKeySubscription(EntitySubscriptionValidation, EntitySubscription):
    ...


tests = [
    pytest.param(
        LogicalQuery(
            QueryEntity(
                EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
            ),
            selected_columns=[
                SelectedExpression(
                    "time", Column("_snuba_timestamp", None, "timestamp")
                ),
            ],
            condition=binary_condition(
                "equals",
                Column("_snuba_project_id", None, "project_id"),
                Literal(None, 1),
            ),
        ),
        id="no extra clauses",
    ),
    pytest.param(
        LogicalQuery(
            QueryEntity(
                EntityKey.METRICS_COUNTERS,
                get_entity(EntityKey.METRICS_COUNTERS).get_data_model(),
            ),
from snuba.query.conditions import binary_condition
from snuba.query.data_source.simple import Entity as QueryEntity
from snuba.query.exceptions import InvalidQueryException
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.logical import Query as LogicalQuery
from snuba.query.validation.validators import SubscriptionAllowedClausesValidator

tests = [
    pytest.param(
        LogicalQuery(
            QueryEntity(EntityKey.EVENTS,
                        get_entity(EntityKey.EVENTS).get_data_model()),
            selected_columns=[
                SelectedExpression(
                    "time", Column("_snuba_timestamp", None, "timestamp")),
            ],
            condition=binary_condition(
                "equals",
                Column("_snuba_project_id", None, "project_id"),
                Literal(None, 1),
            ),
        ),
        id="no extra clauses",
    ),
]


@pytest.mark.parametrize("query", tests)  # type: ignore
def test_subscription_clauses_validation(query: LogicalQuery) -> None:
    validator = SubscriptionAllowedClausesValidator()
    validator.validate(query)
Ejemplo n.º 21
0
from snuba.query import LimitBy, OrderBy, OrderByDirection, SelectedExpression
from snuba.query.composite import CompositeQuery
from snuba.query.logical import Query as LogicalQuery
from snuba.query.snql.parser import parse_snql_query


test_cases = [
    pytest.param(
        "MATCH (events) SELECT 4-5, c GRANULARITY 60",
        LogicalQuery(
            QueryEntity(
                EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
            ),
            selected_columns=[
                SelectedExpression(
                    "4-5",
                    FunctionCall(None, "minus", (Literal(None, 4), Literal(None, 5))),
                ),
                SelectedExpression("c", Column("_snuba_c", None, "c")),
            ],
            granularity=60,
        ),
        id="granularity on whole query",
    ),
    pytest.param(
        "MATCH (events) SELECT 4-5, c TOTALS true",
        LogicalQuery(
            QueryEntity(
                EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()
            ),
            selected_columns=[
Ejemplo n.º 22
0
 from_clause=LogicalQuery(
     from_clause=events_ent,
     selected_columns=[
         SelectedExpression("project_id",
                            Column(None, None, "project_id")),
         SelectedExpression(
             "count_environment",
             FunctionCall(
                 "count_environment",
                 "uniq",
                 (SubscriptableReference(
                     None,
                     Column(None, None, "tags"),
                     Literal(None, "environment"),
                 ), ),
             ),
         ),
     ],
     groupby=[Column(None, None, "project_id")],
     condition=binary_condition(
         BooleanFunctions.AND,
         binary_condition(
             ConditionFunctions.EQ,
             Column(None, None, "project_id"),
             Literal(None, 1),
         ),
         binary_condition(
             ConditionFunctions.GTE,
             Column(None, None, "timestamp"),
             Literal(None, datetime(2020, 1, 1, 12, 0)),
         ),
     ),
 ),
Ejemplo n.º 23
0
test_cases = [
    (
        "not promoted",
        ClickhouseQuery(
            LogicalQuery(
                {},
                TableSource("events", columns),
                selected_columns=[
                    SelectedExpression(
                        "tags[foo]",
                        FunctionCall(
                            "tags[foo]",
                            "arrayValue",
                            (
                                Column(None, None, "tags.value"),
                                FunctionCall(
                                    None,
                                    "indexOf",
                                    (
                                        Column(None, None, "tags.key"),
                                        Literal(None, "foo"),
                                    ),
                                ),
                            ),
                        ),
                    )
                ],
            )
        ),
        ClickhouseQuery(
            LogicalQuery(
                {},