Example #1
0
def test_complex_joins() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE)
    query = Query(
        from_clause=a,
        selected_columns=[
            SelectedExpression("id", Column("id", None, "id")),
            SelectedExpression("assigned_user",
                               Column("assigned_user", None, "user")),
        ],
    )
    node_query = IndividualNode(alias="assignee", data_source=query)

    join = JoinClause(
        left_node=JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "group_id"),
                    right=JoinConditionExpression("groups", "id"),
                )
            ],
            join_type=JoinType.INNER,
        ),
        right_node=node_query,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("assignee", "id"),
            )
        ],
        join_type=JoinType.INNER,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]),
        "groups": GROUPS_SCHEMA,
    }
Example #2
0
def test_simple_join() -> None:
    e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA)
    node_err = IndividualNode(alias="err", data_source=e)

    g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA)
    node_group = IndividualNode(alias="groups", data_source=g)

    join = JoinClause(
        left_node=node_err,
        right_node=node_group,
        keys=[
            JoinCondition(
                left=JoinConditionExpression("err", "group_id"),
                right=JoinConditionExpression("groups", "id"),
            )
        ],
        join_type=JoinType.INNER,
        join_modifier=JoinModifier.SEMI,
    )

    assert join.get_column_sets() == {
        "err": ERRORS_SCHEMA,
        "groups": GROUPS_SCHEMA
    }

    joined_cols = join.get_columns()
    assert "err.group_id" in joined_cols
    assert "err.event_id" in joined_cols
    assert "groups.id" in joined_cols
    assert "groups.message" in joined_cols

    with pytest.raises(AssertionError):
        JoinClause(
            left_node=node_err,
            right_node=node_group,
            keys=[
                JoinCondition(
                    left=JoinConditionExpression("err", "missing_col"),
                    right=JoinConditionExpression("groups",
                                                  "another_missing_col"),
                )
            ],
            join_type=JoinType.INNER,
        )
Example #3
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
Example #4
0
    def visit_entity_match(
        self,
        node: Node,
        visited_children: Tuple[Any, str, Any, Any, EntityKey,
                                Union[Optional[float], Node], Any, Any],
    ) -> IndividualNode[QueryEntity]:
        _, alias, _, _, name, sample, _, _ = visited_children
        if isinstance(sample, Node):
            sample = None

        return IndividualNode(
            alias, QueryEntity(name,
                               get_entity(name).get_data_model(), sample))
Example #5
0
def build_node(
    alias: str,
    from_clause: Entity,
    selected_columns: Sequence[SelectedExpression],
    condition: Optional[Expression],
) -> IndividualNode[Entity]:
    return IndividualNode(
        alias=alias,
        data_source=LogicalQuery(
            from_clause=from_clause,
            selected_columns=selected_columns,
            condition=condition,
        ),
    )
Example #6
0
def build_clickhouse_node(
    alias: str,
    from_clause: Table,
    selected_columns: Sequence[SelectedExpression],
    condition: Optional[Expression],
) -> IndividualNode[Table]:
    return IndividualNode(
        alias=alias,
        data_source=ClickhouseQuery(
            from_clause=from_clause,
            selected_columns=selected_columns,
            condition=condition,
        ),
    )
Example #7
0
    def visit_individual_node(
            self, node: IndividualNode[Entity]) -> JoinDataSourcePlan:
        assert isinstance(
            node.data_source, ProcessableQuery
        ), "Invalid composite query. All nodes must be subqueries."

        sub_query_plan = self.__plans[node.alias]
        return JoinDataSourcePlan(
            translated_source=IndividualNode(alias=node.alias,
                                             data_source=sub_query_plan.query),
            processors={
                node.alias:
                SubqueryProcessors(
                    plan_processors=sub_query_plan.plan_query_processors,
                    db_processors=sub_query_plan.db_query_processors,
                )
            },
            storage_set_key=sub_query_plan.storage_set_key,
        )
Example #8
0
    EVENTS_SCHEMA,
    GROUPS_ASSIGNEE,
    GROUPS_SCHEMA,
    Events,
    GroupAssignee,
    GroupedMessage,
)
from tests.query.joins.join_structures import (
    events_groups_join,
    events_node,
    groups_node,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

TEST_CASES = [
     id="subquery has their dates adjusted",
 ),
 pytest.param(
     """MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c
     WHERE e.project_id=1
     AND e.timestamp>=toDateTime('2021-01-01T00:30:00')
     AND e.timestamp<toDateTime('2021-01-03T00:30:00')
     AND t.project_id=1
     AND t.finish_ts>=toDateTime('2021-01-01T00:30:00')
     AND t.finish_ts<toDateTime('2021-01-07T00:30:00')""",
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS,
                     get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
Example #10
0
ERRORS_SCHEMA = ColumnSet([
    ("event_id", UUID()),
    ("project_id", UInt(32)),
    ("message", String()),
    ("group_id", UInt(32)),
])
GROUPS_SCHEMA = ColumnSet([
    ("id", UInt(32)),
    ("project_id", UInt(32)),
    ("group_id", UInt(32)),
    ("message", String()),
])
GROUPS_ASSIGNEE = ColumnSet([("id", UInt(32)), ("user", String())])

node_err = IndividualNode(alias="err",
                          data_source=Table("errors_local", ERRORS_SCHEMA))
node_group = IndividualNode(alias="groups",
                            data_source=Table("groupedmessage_local",
                                              GROUPS_SCHEMA))
node_assignee = IndividualNode(alias="assignee",
                               data_source=Table("groupassignee_local",
                                                 GROUPS_ASSIGNEE))

test_cases = [
    pytest.param(
        Query(
            Table("my_table", ColumnSet([])),
            selected_columns=[
                SelectedExpression("column1", Column(None, None, "column1")),
                SelectedExpression("column2", Column(None, "table1",
                                                     "column2")),
Example #11
0
    get_equivalent_columns,
)
from tests.query.joins.equivalence_schema import (
    EVENTS_SCHEMA,
    GROUPS_ASSIGNEE,
    GROUPS_SCHEMA,
    Events,
    GroupAssignee,
    GroupedMessage,
)

TEST_CASES = [
    pytest.param(
        JoinClause(
            IndividualNode("ev",
                           EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA,
                                        None)),
            IndividualNode(
                "gr",
                EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)),
            [
                JoinCondition(
                    JoinConditionExpression("ev", "group_id"),
                    JoinConditionExpression("gr", "id"),
                )
            ],
            JoinType.INNER,
            None,
        ),
        {
            QualifiedCol(EntityKey.EVENTS, "group_id"): {
Example #12
0
def node(alias: str, name: str) -> IndividualNode[QueryEntity]:
    return IndividualNode(
        alias,
        QueryEntity(EntityKey(name),
                    get_entity(EntityKey(name)).get_data_model()),
    )
Example #13
0
 def visit_individual_node(
         self, node: IndividualNode[Entity]) -> IndividualNode[Entity]:
     return IndividualNode(node.alias,
                           self.__subqueries[node.alias].build_query())
Example #14
0
    JoinCondition,
    JoinConditionExpression,
    JoinType,
)
from snuba.query.data_source.simple import Entity
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.formatters.tracing import TExpression, format_query
from snuba.query.logical import Query as LogicalQuery
from tests.query.joins.equivalence_schema import (
    EVENTS_SCHEMA,
    GROUPS_SCHEMA,
)

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
    ],
    join_type=JoinType.INNER,
)

LOGICAL_QUERY = LogicalQuery(
Example #15
0
                     ),
                     Literal(None, 1),
                 ),
             ),
             Literal(None, 1),
         ),
     ),
     id="Special array join functions",
 ),
 pytest.param(
     "MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c",
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 "e",
                 QueryEntity(
                     EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(),
                 ),
             ),
             right_node=IndividualNode(
                 "t",
                 QueryEntity(
                     EntityKey.TRANSACTIONS,
                     get_entity(EntityKey.TRANSACTIONS).get_data_model(),
                 ),
             ),
             keys=[
                 JoinCondition(
                     JoinConditionExpression("e", "event_id"),
                     JoinConditionExpression("t", "event_id"),
                 )
             ],
Example #16
0
             SelectedExpression(
                 "alias",
                 FunctionCall("alias", "something", (Column(None, None, "alias"),)),
             )
         ],
     ),
     3,
     {"errors_local"},
     True,
     None,
     id="Nested query. Count the inner query",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY),
             right_node=IndividualNode(
                 alias="groups", data_source=Table("groups_local", GROUPS_SCHEMA)
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
                 "event_id",
                 FunctionCall("alias", "something", (Column(None, "err", "alias"),)),
Example #17
0
def test_entity_node() -> None:
    e = Entity(key=EntityKey.ERRORS, schema=ERRORS_SCHEMA)
    node = IndividualNode(alias="err", data_source=e)

    assert node.get_column_sets() == {"err": e.schema}
Example #18
0
         ),
         selected_columns=[
             SelectedExpression(
                 "average",
                 FunctionCall("average", "avg",
                              (Column(None, None, "count_environment"), )),
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(
         ),
         selected_columns=[
             SelectedExpression(
                 "average",
                 FunctionCall("average", "avg",
                              (Column(None, None, "count_environment"), )),
             ),
         ],
     ),
     id="Query with a subquery",
 ),
 pytest.param(
     CompositeQuery(
         from_clause=JoinClause(
             left_node=IndividualNode(
                 alias="err",
                 data_source=events_ent,
             ),
             right_node=IndividualNode(
                 alias="groups",
                 data_source=groups_ent,
             ),
             keys=[
                 JoinCondition(
                     left=JoinConditionExpression("err", "group_id"),
                     right=JoinConditionExpression("groups", "id"),
                 )
             ],
             join_type=JoinType.INNER,
         ),
         selected_columns=[
             SelectedExpression(