Ejemplo n.º 1
0
 def _resolve_failure_count(
     self,
     _: Mapping[str, Union[str, Column, SelectType, int, float]],
     alias: Optional[str] = None,
 ) -> SelectType:
     statuses = [
         indexer.resolve(status) for status in constants.NON_FAILURE_STATUS
     ]
     return self._resolve_count_if(
         Function(
             "equals",
             [
                 Column("metric_id"),
                 self.resolve_metric("transaction.duration"),
             ],
         ),
         Function(
             "notIn",
             [
                 self.builder.column("transaction.status"),
                 list(status for status in statuses if status is not None),
             ],
         ),
         alias,
     )
Ejemplo n.º 2
0
 def _snql_on_session_status_factory(session_status,
                                     metric_ids,
                                     alias=None):
     return Function(
         aggregate,
         [
             Column("value"),
             Function(
                 "and",
                 [
                     Function(
                         "equals",
                         [
                             Column(
                                 f"tags[{resolve_weak('session.status')}]"),
                             resolve_weak(session_status),
                         ],
                     ),
                     Function("in", [Column("metric_id"),
                                     list(metric_ids)]),
                 ],
             ),
         ],
         alias,
     )
Ejemplo n.º 3
0
    def test_sub_query(self) -> None:
        inner_query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")
             ]).set_groupby([Column("project_id"),
                             Column("tags[custom_tag]")]).set_where([
                                 Condition(Column("type"), Op.NEQ,
                                           "transaction"),
                                 Condition(Column("project_id"), Op.EQ,
                                           self.project_id),
                                 Condition(Column("timestamp"), Op.GTE,
                                           self.base_time),
                                 Condition(Column("timestamp"), Op.LT,
                                           self.next_time),
                             ]))

        query = (Query("discover", inner_query).set_select(
            [Function("avg", [Column("count")], "avg_count")]).set_orderby([
                OrderBy(Function("avg", [Column("count")], "avg_count"),
                        Direction.ASC)
            ]).set_limit(1000))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert data["data"] == [{"avg_count": 1.0}]
Ejemplo n.º 4
0
 def _resolve_user_misery_function(
     self,
     args: Mapping[str, Union[str, Column, SelectType, int, float]],
     alias: Optional[str] = None,
 ) -> SelectType:
     return Function(
         "divide",
         [
             Function(
                 "plus",
                 [
                     self.builder.resolve_function("count_miserable(user)"),
                     constants.MISERY_ALPHA,
                 ],
             ),
             Function(
                 "plus",
                 [
                     Function(
                         "nullIf", [self.builder.resolve_function("count_unique(user)"), 0]
                     ),
                     constants.MISERY_ALPHA + constants.MISERY_BETA,
                 ],
             ),
         ],
         alias,
     )
Ejemplo n.º 5
0
 def test_counter_sum_aggregation_on_session_status(self):
     for status, func in [
         ("init", init_sessions),
         ("crashed", crashed_sessions),
         ("errored_preaggr", errored_preaggr_sessions),
     ]:
         assert func(self.metric_ids, alias=status) == Function(
             "sumIf",
             [
                 Column("value"),
                 Function(
                     "and",
                     [
                         Function(
                             "equals",
                             [
                                 Column(
                                     f"tags[{resolve_weak('session.status')}]"
                                 ),
                                 resolve_weak(status),
                             ],
                         ),
                         Function(
                             "in",
                             [Column("metric_id"),
                              list(self.metric_ids)]),
                     ],
                 ),
             ],
             status,
         )
Ejemplo n.º 6
0
    def limiting_conditions(self) -> Optional[List[Condition]]:
        if not self.initialized or not self._groups:
            # First query may run without limiting conditions
            # When there are no groups there is nothing to limit
            return None

        group_columns = [
            col for col in self._groupby if col not in self.skip_columns
        ]

        if not group_columns:
            return []

        # Create conditions from the groups in group by
        group_values = [
            Function("tuple", [row[column.name] for column in group_columns])
            for row in self._groups
        ]

        return [
            # E.g. (release, environment) IN [(1, 2), (3, 4), ...]
            Condition(Function("tuple", group_columns), Op.IN, group_values)
        ] + [
            # These conditions are redundant but might lead to better query performance
            # Eg. [release IN [1, 3]], [environment IN [2, 4]]
            Condition(column, Op.IN,
                      [row[column.name] for row in self._groups])
            for column in group_columns
        ]
Ejemplo n.º 7
0
 def expected_query(match, select, extra_groupby, metric_name):
     function, column, alias = select
     return Query(
         dataset="metrics",
         match=Entity(match),
         select=[
             Function(
                 OP_TO_SNUBA_FUNCTION[match][alias],
                 [
                     Column("value"),
                     Function(
                         "equals",
                         [Column("metric_id"),
                          resolve_weak(metric_name)]),
                 ],
                 alias=f"{alias}({metric_name})",
             )
         ],
         groupby=[Column("tags[8]"), Column("tags[2]")] + extra_groupby,
         where=[
             Condition(Column("org_id"), Op.EQ, 1),
             Condition(Column("project_id"), Op.IN, [1]),
             Condition(Column("timestamp"), Op.GTE,
                       datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
             Condition(Column("timestamp"), Op.LT,
                       datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
             Condition(Column("tags[6]"), Op.IN, [10]),
             Condition(Column("metric_id"), Op.IN,
                       [resolve_weak(metric_name)]),
         ],
         limit=Limit(MAX_POINTS),
         offset=Offset(0),
         granularity=Granularity(query_definition.rollup),
     )
Ejemplo n.º 8
0
 def test_set_uniq_aggregation_on_session_status(self):
     for status, func in [
         ("init", all_users),
         ("crashed", crashed_users),
     ]:
         assert func(self.metric_ids, alias=status) == Function(
             "uniqIf",
             [
                 Column("value"),
                 Function(
                     "and",
                     [
                         Function(
                             "equals",
                             [
                                 Column(
                                     f"tags[{resolve_weak('session.status')}]"
                                 ),
                                 resolve_weak(status),
                             ],
                         ),
                         Function(
                             "in",
                             [Column("metric_id"),
                              list(self.metric_ids)]),
                     ],
                 ),
             ],
             status,
         )
Ejemplo n.º 9
0
def percentage(arg1_snql, arg2_snql, metric_ids, alias=None):
    return Function(
        "multiply",
        [
            100,
            Function("minus", [1, Function("divide", [arg1_snql, arg2_snql])]),
        ],
        alias,
    )
Ejemplo n.º 10
0
    def test_percentage_in_snql(self):
        alias = "foo.percentage"
        init_session_snql = init_sessions(self.metric_ids, "init_sessions")
        crashed_session_snql = crashed_sessions(self.metric_ids,
                                                "crashed_sessions")

        assert percentage(
            crashed_session_snql, init_session_snql,
            alias=alias) == Function("minus", [
                1,
                Function("divide", [crashed_session_snql, init_session_snql])
            ], alias)
Ejemplo n.º 11
0
 def get_column_for_status(function_name: str, prefix: str, status: str) -> Function:
     return Function(
         f"{function_name}If",
         [
             Column("value"),
             Function(
                 "equals",
                 [Column(tag_key_session_status), indexer.resolve(status)],
             ),
         ],
         alias=f"{prefix}_{status}",
     )
Ejemplo n.º 12
0
 def __build_conditional_aggregate_for_metric(
         self, entity: MetricEntity) -> Function:
     snuba_function = OP_TO_SNUBA_FUNCTION[entity][self.op]
     return Function(
         snuba_function,
         [
             Column("value"),
             Function("equals",
                      [Column("metric_id"),
                       resolve_weak(self.metric_name)]),
         ],
         alias=f"{self.op}({self.metric_name})",
     )
Ejemplo n.º 13
0
def resolve_team_key_transaction_alias(
    builder: QueryBuilder, resolve_metric_index: bool = False
) -> SelectType:
    org_id = builder.params.get("organization_id")
    project_ids = builder.params.get("project_id")
    team_ids = builder.params.get("team_id")

    if org_id is None or team_ids is None or project_ids is None:
        raise TypeError("Team key transactions parameters cannot be None")

    team_key_transactions = list(
        TeamKeyTransaction.objects.filter(
            organization_id=org_id,
            project_team__in=ProjectTeam.objects.filter(
                project_id__in=project_ids, team_id__in=team_ids
            ),
        )
        .order_by("transaction", "project_team__project_id")
        .values_list("project_team__project_id", "transaction")
        .distinct("transaction", "project_team__project_id")[
            : fields.MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS
        ]
    )

    count = len(team_key_transactions)
    if resolve_metric_index:
        team_key_transactions = [
            (project, indexer.resolve(transaction))
            for project, transaction in team_key_transactions
        ]

    # NOTE: this raw count is not 100% accurate because if it exceeds
    # `MAX_QUERYABLE_TEAM_KEY_TRANSACTIONS`, it will not be reflected
    sentry_sdk.set_tag("team_key_txns.count", count)
    sentry_sdk.set_tag(
        "team_key_txns.count.grouped", format_grouped_length(count, [10, 100, 250, 500])
    )

    if count == 0:
        return Function("toInt8", [0], constants.TEAM_KEY_TRANSACTION_ALIAS)

    return Function(
        "in",
        [
            (builder.column("project_id"), builder.column("transaction")),
            team_key_transactions,
        ],
        constants.TEAM_KEY_TRANSACTION_ALIAS,
    )
Ejemplo n.º 14
0
def sessions_errored_set(metric_ids, alias=None):
    return Function(
        "uniqIf",
        [
            Column("value"),
            Function(
                "in",
                [
                    Column("metric_id"),
                    list(metric_ids),
                ],
            ),
        ],
        alias,
    )
Ejemplo n.º 15
0
 def expected_query(match, select, extra_groupby):
     function, column, alias = select
     return Query(
         dataset="metrics",
         match=Entity(match),
         select=[Function(function, [Column(column)], alias)],
         groupby=[
             Column("metric_id"),
             Column("tags[8]"),
             Column("tags[2]")
         ] + extra_groupby,
         where=[
             Condition(Column("org_id"), Op.EQ, 1),
             Condition(Column("project_id"), Op.IN, [1]),
             Condition(Column("metric_id"), Op.IN, [9, 11, 7]),
             Condition(Column("timestamp"), Op.GTE,
                       datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
             Condition(Column("timestamp"), Op.LT,
                       datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
             Condition(Column("tags[6]"), Op.IN, [10]),
         ],
         limit=Limit(MAX_POINTS),
         offset=Offset(0),
         granularity=Granularity(query_definition.rollup),
     )
Ejemplo n.º 16
0
def _translate_conditions(org_id: int, input_: Any) -> Any:
    if isinstance(input_, Column):
        # The only filterable tag keys are release and environment.
        assert input_.name in ("release", "environment")
        # It greatly simplifies code if we just assume that they exist.
        # Alternative would be:
        #   * if tag key or value does not exist in AND-clause, return no data
        #   * if tag key or value does not exist in OR-clause, remove condition
        return Column(resolve_tag_key(input_.name))

    if isinstance(input_, str):
        # Assuming this is the right-hand side, we need to fetch a tag value.
        # It's OK if the tag value resolves to None, the snuba query will then
        # return no results, as is intended behavior

        return indexer.resolve(input_)

    if isinstance(input_, Function):
        return Function(function=input_.function,
                        parameters=_translate_conditions(
                            org_id, input_.parameters))

    if isinstance(input_, Condition):
        return Condition(
            lhs=_translate_conditions(org_id, input_.lhs),
            op=input_.op,
            rhs=_translate_conditions(org_id, input_.rhs),
        )

    if isinstance(input_, (int, float)):
        return input_

    assert isinstance(input_, (tuple, list)), input_
    return [_translate_conditions(org_id, item) for item in input_]
Ejemplo n.º 17
0
def resolve_tags(input_: Any) -> Any:
    """Translate tags in snuba condition

    This assumes that all strings are either tag names or tag values, so do not
    pass Column("metric_id") or Column("project_id") into this function.

    """
    if isinstance(input_, list):
        return [resolve_tags(item) for item in input_]
    if isinstance(input_, Function):
        if input_.function == "ifNull":
            # This was wrapped automatically by QueryBuilder, remove wrapper
            return resolve_tags(input_.parameters[0])
        return Function(
            function=input_.function,
            parameters=input_.parameters and [resolve_tags(item) for item in input_.parameters],
        )
    if isinstance(input_, Condition):
        return Condition(lhs=resolve_tags(input_.lhs), op=input_.op, rhs=resolve_tags(input_.rhs))
    if isinstance(input_, BooleanCondition):
        return input_.__class__(conditions=[resolve_tags(item) for item in input_.conditions])
    if isinstance(input_, Column):
        # HACK: Some tags already take the form "tags[...]" in discover, take that into account:
        if input_.subscriptable == "tags":
            name = input_.key
        else:
            name = input_.name
        return Column(name=resolve_tag_key(name))
    if isinstance(input_, str):
        return resolve_weak(input_)

    return input_
Ejemplo n.º 18
0
 def test_set_sum_aggregation_for_errored_sessions(self):
     alias = "whatever"
     assert sessions_errored_set(self.metric_ids, alias) == Function(
         "uniqIf",
         [
             Column("value"),
             Function(
                 "in",
                 [
                     Column("metric_id"),
                     list(self.metric_ids),
                 ],
             ),
         ],
         alias,
     )
Ejemplo n.º 19
0
    def test_join_query(self) -> None:
        ev = Entity("events", "ev")
        gm = Entity("groupedmessage", "gm")
        join = Join([Relationship(ev, "grouped", gm)])
        query = (Query("discover", join).set_select([
            Column("group_id", ev),
            Column("status", gm),
            Function("avg", [Column("retention_days", ev)], "avg"),
        ]).set_groupby([Column("group_id", ev),
                        Column("status", gm)]).set_where([
                            Condition(Column("project_id", ev), Op.EQ,
                                      self.project_id),
                            Condition(Column("project_id", gm), Op.EQ,
                                      self.project_id),
                            Condition(Column("timestamp", ev), Op.GTE,
                                      self.base_time),
                            Condition(Column("timestamp", ev), Op.LT,
                                      self.next_time),
                        ]))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200
        assert data["data"] == []
Ejemplo n.º 20
0
def _to_column(
        query_func: SessionsQueryFunction,
        column_condition: SelectableExpression = 1) -> SelectableExpression:
    """
    Converts query a function into an expression that can be directly plugged into anywhere
    columns are used (like the select argument of a Query)
    """

    parameters = (Column("value"), column_condition)

    # distribution columns
    if query_func in _DURATION_PERCENTILES:
        return Function(
            alias="percentiles",
            function="quantilesIf(0.5,0.75,0.9,0.95,0.99)",
            parameters=parameters,
        )
    if query_func == "avg(session.duration)":
        return Function(
            alias="avg",
            function="avgIf",
            parameters=parameters,
        )
    if query_func == "max(session.duration)":
        return Function(
            alias="max",
            function="maxIf",
            parameters=parameters,
        )
    # counters
    if query_func == "sum(session)":
        return Function(
            alias="sum",
            function="sumIf",
            parameters=parameters,
        )
    # sets
    if query_func == "count_unique(user)":
        return Function(
            alias="count_unique",
            function="uniqIf",
            parameters=parameters,
        )

    raise ValueError("Unmapped metrics column", query_func)
Ejemplo n.º 21
0
    def _resolve_web_vital_function(
        self,
        args: Mapping[str, Union[str, Column, SelectType, int, float]],
        alias: str,
    ) -> SelectType:
        column = args["column"]
        metric_id = args["metric_id"]
        quality = args["quality"].lower()

        if column not in [
                "measurements.lcp",
                "measurements.fcp",
                "measurements.fp",
                "measurements.fid",
                "measurements.cls",
        ]:
            raise InvalidSearchQuery(
                "count_web_vitals only supports measurements")

        measurement_rating = self.builder.resolve_column("measurement_rating")

        quality_id = indexer.resolve(quality)
        if quality_id is None:
            return Function(
                # This matches the type from doing `select toTypeName(count()) ...` from clickhouse
                "toUInt64",
                [0],
                alias,
            )

        return Function(
            "countIf",
            [
                Column("value"),
                Function(
                    "and",
                    [
                        Function("equals", [measurement_rating, quality_id]),
                        Function("equals", [Column("metric_id"), metric_id]),
                    ],
                ),
            ],
            alias,
        )
Ejemplo n.º 22
0
    def _resolve_count_miserable_function(
        self,
        args: Mapping[str, Union[str, Column, SelectType, int, float]],
        alias: Optional[str] = None,
    ) -> SelectType:
        metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE)

        # Nobody is miserable, we can return 0
        if metric_true is None:
            return Function(
                "toUInt64",
                [0],
                alias,
            )

        return Function(
            "uniqIf",
            [
                Column("value"),
                Function(
                    "and",
                    [
                        Function(
                            "equals",
                            [
                                Column("metric_id"),
                                args["metric_id"],
                            ],
                        ),
                        Function(
                            "equals",
                            [
                                self.builder.column(
                                    constants.METRIC_MISERABLE_TAG_KEY),
                                metric_true
                            ],
                        ),
                    ],
                ),
            ],
            alias,
        )
Ejemplo n.º 23
0
 def _resolve_count_if(
     self,
     metric_condition: Function,
     condition: Function,
     alias: Optional[str] = None,
 ) -> SelectType:
     return Function(
         "countIf",
         [
             Column("value"),
             Function(
                 "and",
                 [
                     metric_condition,
                     condition,
                 ],
             ),
         ],
         alias,
     )
Ejemplo n.º 24
0
 def _resolve_percentile(
     self,
     args: Mapping[str, Union[str, Column, SelectType, int, float]],
     alias: str,
     fixed_percentile: float,
 ) -> SelectType:
     return Function(
         "arrayElement",
         [
             Function(
                 f"quantilesIf({fixed_percentile})",
                 [
                     Column("value"),
                     Function("equals", [Column("metric_id"), args["metric_id"]]),
                 ],
             ),
             1,
         ],
         alias,
     )
Ejemplo n.º 25
0
    def test_invalid_time_conditions(self) -> None:
        query = (Query("events", Entity("events")).set_select(
            [Function("count", [], "count")]).set_where([
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.next_time),
                Condition(Column("timestamp"), Op.LT, self.base_time),
            ]))

        response = self.post("/events/snql", data=query.snuba())
        resp = json.loads(response.data)
        assert response.status_code == 400, resp
Ejemplo n.º 26
0
    def test_escape_edge_cases(self) -> None:
        query = (Query("events", Entity("events")).set_select(
            [Function("count", [], "times_seen")]).set_where([
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
                Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"),
            ]))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
Ejemplo n.º 27
0
    def test_tags_in_groupby(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("tags[k8s-app]")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project_id),
            Condition(Column("timestamp"), Op.GTE, self.base_time),
            Condition(Column("timestamp"), Op.LT, self.next_time),
            Condition(Column("tags[k8s-app]"), Op.NEQ, ""),
            Condition(Column("type"), Op.NEQ, "transaction"),
        ]).set_orderby([
            OrderBy(
                Function("max", [Column("timestamp")], "last_seen"),
                Direction.DESC,
            )
        ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
Ejemplo n.º 28
0
    def _resolve_apdex_function(
        self,
        _: Mapping[str, Union[str, Column, SelectType, int, float]],
        alias: Optional[str] = None,
    ) -> SelectType:
        metric_true = indexer.resolve(constants.METRIC_TRUE_TAG_VALUE)

        # Nothing is satisfied or tolerated, the score must be 0
        if metric_true is None:
            return Function(
                "toUInt64",
                [0],
                alias,
            )

        satisfied = Function("equals", [
            self.builder.column(constants.METRIC_SATISFIED_TAG_KEY),
            metric_true
        ])
        tolerable = Function("equals", [
            self.builder.column(constants.METRIC_TOLERATED_TAG_KEY),
            metric_true
        ])
        metric_condition = Function(
            "equals",
            [Column("metric_id"),
             self.resolve_metric("transaction.duration")])

        return Function(
            "divide",
            [
                Function(
                    "plus",
                    [
                        self._resolve_count_if(metric_condition, satisfied),
                        Function(
                            "divide",
                            [
                                self._resolve_count_if(metric_condition,
                                                       tolerable), 2
                            ],
                        ),
                    ],
                ),
                Function("countIf", [metric_condition]),
            ],
            alias,
        )
Ejemplo n.º 29
0
    def test_arrayjoin(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("exception_frames.filename")]).set_array_join(
            [Column("exception_frames.filename")]).set_where([
                Condition(Column("exception_frames.filename"), Op.LIKE,
                          "%.java"),
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
            ]).set_orderby([
                OrderBy(
                    Function("max", [Column("timestamp")], "last_seen"),
                    Direction.DESC,
                )
            ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert len(data["data"]) == 6
Ejemplo n.º 30
0
    def test_simple_query(self) -> None:
        query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")]).set_groupby(
                [Column("project_id"),
                 Column("tags[custom_tag]")]).set_where([
                     Condition(Column("type"), Op.NEQ, "transaction"),
                     Condition(Column("project_id"), Op.EQ, self.project_id),
                     Condition(Column("timestamp"), Op.GTE, self.base_time),
                     Condition(Column("timestamp"), Op.LT, self.next_time),
                 ]).set_orderby([
                     OrderBy(Function("count", [], "count"), Direction.ASC)
                 ]).set_limit(1000).set_consistent(True).set_debug(True))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200, data
        assert data["stats"]["consistent"]
        assert data["data"] == [{
            "count": 1,
            "tags[custom_tag]": "custom_value",
            "project_id": self.project_id,
        }]