예제 #1
0
    def test_simple_orderby(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.ASC)
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["-user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.DESC)
            ],
        )
        query.get_snql_query().validate()
예제 #2
0
def test_orderby(exp: Any, direction: Any,
                 exception: Optional[Exception]) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            OrderBy(exp, direction)
    else:
        assert OrderBy(exp, direction)
예제 #3
0
    def test_sort_count(self, mock_raw_snql_query):
        event = self.create_event()

        mock_raw_snql_query.side_effect = [
            {
                "data": [
                    self.suspect_span_group_snuba_results(
                        "django.view", event),
                    self.suspect_span_group_snuba_results(
                        "django.middleware", event),
                    self.suspect_span_group_snuba_results(
                        "http.server", event),
                ],
            },
        ]

        with self.feature(self.FEATURES):
            response = self.client.get(
                self.url,
                data={
                    "project": self.project.id,
                    "sort": "-count",
                },
                format="json",
            )

        assert response.status_code == 200, response.content
        self.assert_suspect_span(
            response.data,
            [
                self.suspect_span_results("django.view", event),
                self.suspect_span_results("django.middleware", event),
                self.suspect_span_results("http.server", event),
            ],
        )

        assert mock_raw_snql_query.call_count == 1

        # the first call is the get the suspects, and should be using the specified sort
        assert mock_raw_snql_query.call_args_list[0][0][0].orderby == [
            OrderBy(exp=Function("count", [], "count"),
                    direction=Direction.DESC),
            OrderBy(
                exp=Function(
                    "sum",
                    [Function("arrayJoin", [Column("spans.exclusive_time")])],
                    "sumArray_spans_exclusive_time",
                ),
                direction=Direction.DESC,
            ),
        ]
        assert (mock_raw_snql_query.call_args_list[0][0][1] ==
                "api.organization-events-spans-performance-suspects")
예제 #4
0
    def test_orderby(self) -> None:
        self.project_id3 = next(self.id_iter)
        self.org_id2 = next(self.id_iter)
        self.generate_session_events(self.org_id2, self.project_id3)

        query = Query(
            dataset="sessions",
            match=Entity("org_sessions"),
            select=[Column("org_id"), Column("project_id")],
            groupby=[Column("org_id"), Column("project_id")],
            where=[
                Condition(Column("started"), Op.GTE,
                          datetime.utcnow() - timedelta(hours=6)),
                Condition(Column("started"), Op.LT, datetime.utcnow()),
            ],
            granularity=Granularity(3600),
            orderby=[OrderBy(Column("org_id"), Direction.ASC)],
        )

        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id
        assert data["data"][0]["project_id"] == self.project_id
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id2
        assert data["data"][2]["org_id"] == self.org_id2
        assert data["data"][2]["project_id"] == self.project_id3

        query = query.set_orderby(
            [OrderBy(Column("org_id"), Direction.DESC)], )
        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id2
        assert data["data"][0]["project_id"] == self.project_id3
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id
        assert data["data"][2]["org_id"] == self.org_id
        assert data["data"][2]["project_id"] == self.project_id2
예제 #5
0
    def test_sub_query(self) -> None:
        inner_query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")
             ]).set_groupby([Column("project_id"),
                             Column("tags[custom_tag]")]).set_where([
                                 Condition(Column("type"), Op.NEQ,
                                           "transaction"),
                                 Condition(Column("project_id"), Op.EQ,
                                           self.project_id),
                                 Condition(Column("timestamp"), Op.GTE,
                                           self.base_time),
                                 Condition(Column("timestamp"), Op.LT,
                                           self.next_time),
                             ]))

        query = (Query("discover", inner_query).set_select(
            [Function("avg", [Column("count")], "avg_count")]).set_orderby([
                OrderBy(Function("avg", [Column("count")], "avg_count"),
                        Direction.ASC)
            ]).set_limit(1000))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert data["data"] == [{"avg_count": 1.0}]
예제 #6
0
 def generate_orderby_clause(self, direction: Direction,
                             **kwargs: Any) -> List[OrderBy]:
     return [
         OrderBy(
             self.generate_select_statements(entity=self.get_entity())[0],
             direction,
         )
     ]
예제 #7
0
    def _build_orderby(
        self, query_definition: QueryDefinition, entity: str
    ) -> Optional[List[OrderBy]]:
        if query_definition.orderby is None:
            return None
        (op, metric_name), direction = query_definition.orderby
        snuba_field = _OP_TO_FIELD[entity][op]

        return [OrderBy(Column(snuba_field.snuba_alias), direction)]
예제 #8
0
 def test_orderby_duplicate_columns(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         selected_columns=["user.email", "user.email"],
         orderby=["user.email"],
     )
     self.assertCountEqual(
         query.orderby,
         [OrderBy(Column("email"), Direction.ASC)],
     )
예제 #9
0
 def generate_orderby_clause(self, direction: Direction,
                             projects: Sequence[Project]) -> List[OrderBy]:
     if not projects:
         self.__raise_entity_validation_exception("generate_orderby_clause")
     self.get_entity(projects=projects)
     return [
         OrderBy(
             self.generate_select_statements(projects=projects)[0],
             direction,
         )
     ]
예제 #10
0
 def get_snql_query(self) -> Query:
     return Query(
         dataset=self.dataset.value,
         match=Entity(self.dataset.value),
         select=self.select,
         where=self.where,
         having=self.having,
         groupby=self.groupby,
         orderby=[OrderBy(self.time_column, Direction.ASC)],
         granularity=self.granularity,
         limit=self.limit,
     )
예제 #11
0
    def __init__(
        self,
        num_buckets: int,
        histogram_column: str,
        histogram_rows: Optional[int],
        histogram_params: HistogramParams,
        key_column: Optional[str],
        field_names: Optional[List[Union[str, Any, None]]],
        groupby: Optional[List[str]],
        *args: Any,
        **kwargs: Any,
    ):
        kwargs["functions_acl"] = kwargs.get("functions_acl",
                                             []) + self.base_function_acl
        super().__init__(*args, **kwargs)
        self.additional_groupby = groupby
        selected_columns = kwargs["selected_columns"]

        resolved_histogram = self.resolve_column(histogram_column)

        # Reset&Ignore the columns from the QueryBuilder
        self.aggregates: List[CurriedFunction] = []
        self.columns = [self.resolve_column("count()"), resolved_histogram]

        if key_column is not None and field_names is not None:
            key_values: List[str] = [
                field for field in field_names if isinstance(field, str)
            ]
            self.where.append(
                Condition(self.resolve_column(key_column), Op.IN, key_values))

        # make sure to bound the bins to get the desired range of results
        min_bin = histogram_params.start_offset
        self.where.append(Condition(resolved_histogram, Op.GTE, min_bin))
        max_bin = histogram_params.start_offset + histogram_params.bucket_size * num_buckets
        self.where.append(Condition(resolved_histogram, Op.LTE, max_bin))

        if key_column is not None:
            self.columns.append(self.resolve_column(key_column))

        groups = len(
            selected_columns) if histogram_rows is None else histogram_rows
        self.limit = Limit(groups * num_buckets)
        self.orderby = (self.orderby if self.orderby else
                        []) + [OrderBy(resolved_histogram, Direction.ASC)]
예제 #12
0
def build_key_errors(interval, project):
    start, stop = interval

    # Take the 3 most frequently occuring events
    query = Query(
        dataset=Dataset.Events.value,
        match=Entity("events"),
        select=[Column("group_id"), Function("count", [])],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
        ],
        groupby=[Column("group_id")],
        orderby=[OrderBy(Function("count", []), Direction.DESC)],
        limit=Limit(3),
    )
    query_result = raw_snql_query(query, referrer="reports.key_errors")
    key_errors = query_result["data"]
    return [(e["group_id"], e["count()"]) for e in key_errors]
예제 #13
0
    def test_sessions_query(self) -> None:
        query = (Query("sessions", Entity("sessions")).set_select(
            [Column("project_id"), Column("release")]).set_groupby(
                [Column("project_id"), Column("release")]).set_where([
                    Condition(Column("project_id"), Op.IN, [self.project_id]),
                    Condition(Column("org_id"), Op.EQ, self.org_id),
                    Condition(
                        Column("started"),
                        Op.GTE,
                        datetime(2021, 1, 1, 17, 5, 59, 554860),
                    ),
                    Condition(Column("started"), Op.LT,
                              datetime(2022, 1, 1, 17, 6, 0, 554981)),
                ]).set_orderby([OrderBy(Column("sessions"),
                                        Direction.DESC)]).set_limit(100))

        response = self.post("/sessions/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200
        assert data["data"] == []
예제 #14
0
    def test_tags_in_groupby(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("tags[k8s-app]")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project_id),
            Condition(Column("timestamp"), Op.GTE, self.base_time),
            Condition(Column("timestamp"), Op.LT, self.next_time),
            Condition(Column("tags[k8s-app]"), Op.NEQ, ""),
            Condition(Column("type"), Op.NEQ, "transaction"),
        ]).set_orderby([
            OrderBy(
                Function("max", [Column("timestamp")], "last_seen"),
                Direction.DESC,
            )
        ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
예제 #15
0
    def test_arrayjoin(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("exception_frames.filename")]).set_array_join(
            Column("exception_frames.filename")).set_where([
                Condition(Column("exception_frames.filename"), Op.LIKE,
                          "%.java"),
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
            ]).set_orderby([
                OrderBy(
                    Function("max", [Column("timestamp")], "last_seen"),
                    Direction.DESC,
                )
            ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert len(data["data"]) == 6
예제 #16
0
    def test_simple_query(self) -> None:
        query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")]).set_groupby(
                [Column("project_id"),
                 Column("tags[custom_tag]")]).set_where([
                     Condition(Column("type"), Op.NEQ, "transaction"),
                     Condition(Column("project_id"), Op.EQ, self.project_id),
                     Condition(Column("timestamp"), Op.GTE, self.base_time),
                     Condition(Column("timestamp"), Op.LT, self.next_time),
                 ]).set_orderby([
                     OrderBy(Function("count", [], "count"), Direction.ASC)
                 ]).set_limit(1000).set_consistent(True).set_debug(True))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200, data
        assert data["stats"]["consistent"]
        assert data["data"] == [{
            "count": 1,
            "tags[custom_tag]": "custom_value",
            "project_id": self.project_id,
        }]
예제 #17
0
def build_project_series(start__stop, project):
    start, stop = start__stop
    rollup = ONE_DAY

    resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup)
    assert resolution == rollup, "resolution does not match requested value"

    clean = partial(clean_series, start, stop, rollup)

    def zerofill_clean(data):
        return clean(zerofill(data, start, stop, rollup, fill_default=0))

    # Note: this section can be removed
    issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED,
                                         resolved_at__gte=start,
                                         resolved_at__lt=stop).values_list(
                                             "id", flat=True)

    # TODO: The TSDB calls could be replaced with a SnQL call here
    tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids,
                                                     start, stop, rollup)
    resolved_error_series = reduce(
        merge_series,
        map(clean, tsdb_range_resolved.values()),
        clean([(timestamp, 0) for timestamp in series]),
    )
    # end

    # Use outcomes to compute total errors and transactions
    outcomes_query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("time"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("time"), Column("category")],
        granularity=Granularity(rollup),
        orderby=[OrderBy(Column("time"), Direction.ASC)],
    )
    outcome_series = raw_snql_query(outcomes_query,
                                    referrer="reports.outcome_series")
    total_error_series = OrderedDict()
    for v in outcome_series["data"]:
        if v["category"] in DataCategory.error_categories():
            timestamp = int(to_timestamp(parse_snuba_datetime(v["time"])))
            total_error_series[timestamp] = total_error_series.get(
                timestamp, 0) + v["total"]

    total_error_series = zerofill_clean(list(total_error_series.items()))
    transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))),
                           v["total"]) for v in outcome_series["data"]
                          if v["category"] == DataCategory.TRANSACTION]
    transaction_series = zerofill_clean(transaction_series)

    error_series = merge_series(
        resolved_error_series,
        total_error_series,
        lambda resolved, total:
        (resolved, total - resolved),  # Resolved, Unresolved
    )

    # Format of this series: [(resolved , unresolved, transactions)]
    return merge_series(
        error_series,
        transaction_series,
        lambda errors, transactions: errors + (transactions, ),
    )
예제 #18
0
    def test_sort_percentiles(self, mock_raw_snql_query):
        event = self.create_event()

        for i, sort in enumerate([
                "p50ExclusiveTime",
                "p75ExclusiveTime",
                "p95ExclusiveTime",
                "p99ExclusiveTime",
        ]):
            mock_raw_snql_query.side_effect = [
                {
                    "data": [
                        self.suspect_span_group_snuba_results(
                            "http.server", event),
                        self.suspect_span_group_snuba_results(
                            "django.middleware", event),
                        self.suspect_span_group_snuba_results(
                            "django.view", event),
                    ],
                },
            ]

            with self.feature(self.FEATURES):
                response = self.client.get(
                    self.url,
                    data={
                        "project": self.project.id,
                        "sort": f"-{sort}",
                    },
                    format="json",
                )

            assert response.status_code == 200, response.content
            self.assert_suspect_span(
                response.data,
                [
                    self.suspect_span_results("http.server", event),
                    self.suspect_span_results("django.middleware", event),
                    self.suspect_span_results("django.view", event),
                ],
            )

            percentile = sort[1:3]

            assert mock_raw_snql_query.call_count == i + 1

            # the first call is the get the suspects, and should be using the specified sort
            assert mock_raw_snql_query.call_args_list[i][0][0].orderby == [
                OrderBy(
                    exp=Function(
                        f"quantile(0.{percentile.rstrip('0')})",
                        [
                            Function("arrayJoin",
                                     [Column("spans.exclusive_time")])
                        ],
                        f"percentileArray_spans_exclusive_time_0_{percentile}",
                    ),
                    direction=Direction.DESC,
                )
            ]
            assert (mock_raw_snql_query.call_args_list[i][0][1] ==
                    "api.organization-events-spans-performance-suspects")
예제 #19
0
def __get_release_from_filters(
    org_id,
    project_id,
    release,
    scope,
    scope_value,
    stats_start,
    scope_operation,
    scope_direction,
    release_operation,
    release_direction,
    limit,
    crash_free_function=None,
    environments=None,
):
    """
    Helper function that based on the passed args, constructs a snuba query and runs
    it to fetch a release
    Inputs:
        * org_id: Organisation Id
        * project_id
        * release: release version
        * scope: Sort order criteria -> sessions, users, crash_free_sessions, crash_free_users
        * scope_value: The value/count of the scope argument
        * stats_period: duration
        * scope_operation: Indicates which operation should be used to compare the releases'
            scope value with current release scope value. either Op.GT or Op.LT
        * scope_direction: Indicates which ordering should be used to order releases'
            scope value by either ASC or DESC
        * release_operation: Indicates which operation should be used to compare the
            releases' version with current release version. either Op.GT or Op.LT
        * release_direction: Indicates which ordering should be used to
            order releases' version by either ASC or DESC
        * crash_free_function: optional arg that is passed when a function needs to be applied in query like in the
            case of crash_free_sessions and crash_free_users
        * environments
    Return:
        List of releases that either contains one release or none at all
    """
    release_conditions = [
        Condition(Column("started"), Op.GTE, stats_start),
        Condition(
            Column("started"),
            Op.LT,
            datetime.utcnow(),
        ),
        Condition(Column("project_id"), Op.EQ, project_id),
        Condition(Column("org_id"), Op.EQ, org_id),
    ]
    if environments is not None:
        release_conditions.append(Condition(Column("environment"), Op.IN, environments))

    # Get select statements and append to the select statement list a function if
    # crash_free_option whether it is crash_free_users or crash_free_sessions is picked
    select = [
        Column("project_id"),
        Column("release"),
    ]
    if crash_free_function is not None:
        select.append(crash_free_function)

    having = [
        Or(
            conditions=[
                Condition(Column(scope), scope_operation, scope_value),
                And(
                    conditions=[
                        Condition(Column(scope), Op.EQ, scope_value),
                        Condition(Column("release"), release_operation, release),
                    ]
                ),
            ]
        )
    ]
    orderby = [
        OrderBy(direction=scope_direction, exp=Column(scope)),
        OrderBy(direction=release_direction, exp=Column("release")),
    ]

    query = (
        Query(
            dataset=Dataset.Sessions.value,
            match=Entity("sessions"),
            select=select,
            where=release_conditions,
            having=having,
            groupby=[Column("release"), Column("project_id")],
            orderby=orderby,
        )
        .set_limit(limit)
        .set_offset(0)
    )
    return raw_snql_query(query, referrer="sessions.get_prev_or_next_release")["data"]
예제 #20
0
def json_to_snql(body: Mapping[str, Any], entity: str) -> Query:
    """
    This will output a Query object that matches the Legacy query body that was passed in.
    The entity is necessary since the SnQL API requires an explicit entity. This doesn't
    support subquery or joins.

    :param body: The legacy API body.
    :type body: Mapping[str, Any]
    :param entity: The name of the entity being queried.
    :type entity: str

    :raises InvalidExpressionError, InvalidQueryError: If the legacy body is invalid, the SDK will
        raise an exception.

    """

    dataset = body.get("dataset") or entity
    sample = body.get("sample")
    if sample is not None:
        sample = float(sample)
    query = Query(dataset, Entity(entity, None, sample))

    selected_columns = []
    for a in body.get("aggregations", []):
        selected_columns.append(parse_exp(list(a)))

    selected = []
    for s in body.get("selected_columns", []):
        if isinstance(s, tuple):
            selected.append(list(s))
        else:
            selected.append(s)

    selected_columns.extend(list(map(parse_exp, selected)))

    arrayjoin = body.get("arrayjoin")
    if arrayjoin:
        query = query.set_array_join([Column(arrayjoin)])

    query = query.set_select(selected_columns)

    groupby = body.get("groupby", [])
    if groupby and not isinstance(groupby, list):
        groupby = [groupby]

    parsed_groupby = []
    for g in groupby:
        if isinstance(g, tuple):
            g = list(g)
        parsed_groupby.append(parse_exp(g))
    query = query.set_groupby(parsed_groupby)

    conditions: list[Union[Or, Condition]] = []
    if body.get("organization"):
        org_cond = parse_extension_condition("org_id", body["organization"])
        if org_cond:
            conditions.append(org_cond)

    assert isinstance(query.match, Entity)
    time_column = get_required_time_column(query.match.name)
    if time_column:
        time_cols = (("from_date", Op.GTE), ("to_date", Op.LT))
        for col, op in time_cols:
            date_val = body.get(col)
            if date_val:
                conditions.append(
                    Condition(Column(time_column), op,
                              parse_datetime(date_val)))

    if body.get("project"):
        proj_cond = parse_extension_condition("project_id", body["project"],
                                              True)
        if proj_cond:
            conditions.append(proj_cond)

    for cond in body.get("conditions", []):
        if not is_condition(cond):
            or_conditions = []
            for or_cond in cond:
                or_conditions.append(parse_condition(or_cond))

            if len(or_conditions) > 1:
                conditions.append(Or(or_conditions))
            else:
                conditions.extend(or_conditions)
        else:
            conditions.append(parse_condition(cond))

    query = query.set_where(conditions)

    having: list[Union[Or, Condition]] = []
    for cond in body.get("having", []):
        if not is_condition(cond):
            or_conditions = []
            for or_cond in cond:
                or_conditions.append(parse_condition(or_cond))

            having.append(Or(or_conditions))
        else:
            having.append(parse_condition(cond))

    query = query.set_having(having)

    order_by = body.get("orderby")
    if order_by:
        if not isinstance(order_by, list):
            order_by = [order_by]

        order_bys = []
        for o in order_by:
            direction = Direction.ASC
            if isinstance(o, list):
                first = o[0]
                if isinstance(first, str) and first.startswith("-"):
                    o[0] = first.lstrip("-")
                    direction = Direction.DESC
                part = parse_exp(o)
            elif isinstance(o, str):
                if o.startswith("-"):
                    direction = Direction.DESC
                    part = parse_exp(o.lstrip("-"))
                else:
                    part = parse_exp(o)

            order_bys.append(OrderBy(part, direction))

        query = query.set_orderby(order_bys)

    limitby = body.get("limitby")
    if limitby:
        limit, name = limitby
        query = query.set_limitby(LimitBy([Column(name)], int(limit)))

    extras = (
        "limit",
        "offset",
        "granularity",
        "totals",
        "consistent",
        "turbo",
        "debug",
        "dry_run",
        "parent_api",
    )
    for extra in extras:
        if body.get(extra) is not None:
            query = getattr(query, f"set_{extra}")(body.get(extra))

    query.set_legacy(True)
    return query
예제 #21
0
def _render_trees(group: Group, user):
    materialized_hashes = list({
        gh.hash
        for gh in GroupHash.objects.filter(project=group.project, group=group)
    })

    # Evaluates to the index of the last hash that is in materialized_hashes,
    # or 1 otherwise.
    find_hash_expr = _construct_arraymax([1] + [  # type: ignore
        Function("indexOf", [Column("hierarchical_hashes"), hash])
        for hash in materialized_hashes
    ])

    # After much deliberation I (markus) decided that it would be best to
    # render the entire tree using one large Snuba query. A previous
    # implementation incurred n+1 queries on Snuba (n = number of materialized
    # hashes) and was very buggy when it came to missing materialized hashes
    # (which can happen if fallback/secondary grouping is turned on), events
    # were counted twice because those n+1 queries accidentally counted
    # overlapping sets of events, and the endpoint response time was kind of
    # bad because of n+1 query.
    #
    # It being one large query may also make it easier to add pagination down
    # the road.

    query = (
        Query("events", Entity("events")).set_select([
            Function("count", [], "event_count"),
            Function(
                "argMax",
                [Column("event_id"), Column("timestamp")], "event_id"),
            Function("max", [Column("timestamp")], "latest_event_timestamp"),
            # If hierarchical_hashes contains any of the materialized
            # hashes, find_hash_expr evaluates to the last found index and
            # arraySlice will give us this hash + the next child hash that
            # we use in groupby
            #
            # If hierarchical_hashes does not contain any of those hashes,
            # find_hash_expr will return 1 so we start slicing at the beginning.
            # This can happen when hierarchical_hashes is empty (=>
            # hash_slice = []), but we also try to recover gracefully from
            # a hypothetical case where we are missing some hashes in
            # postgres (unclear how this could be reached).
            #
            # We select some intermediate computation values here which we
            # definitely don't need the results of. It's just temp vars.
            Function(
                # First we find the materialized hash using find_hash_expr,
                # and subtract 1 which should be the parent hash if there
                # is one. If there isn't, this now can be an out-of-bounds
                # access by being 0 (arrays are indexed starting with 1)
                "minus",
                [find_hash_expr, 1],
                "parent_hash_i",
            ),
            # We clip the value to be at least 1, this will be where we
            # start slicing hierarchical_hashes. 0 would be an out of
            # bounds access.
            Function("greatest", [Column("parent_hash_i"), 1], "slice_start"),
            # This will return a slice of length 2 if the materialized hash
            # has been found at the beginning of the array, but return a
            # slice of length 3 if not.
            Function(
                "arraySlice",
                [
                    Column("hierarchical_hashes"),
                    Column("slice_start"),
                    Function(
                        "minus",
                        [
                            Function(
                                "plus",
                                [Column("parent_hash_i"), 3],
                            ),
                            Column("slice_start"),
                        ],
                    ),
                ],
                "hash_slice",
            ),
            Column("primary_hash"),
        ]).set_where(_get_group_filters(group)).set_groupby([
            Column("parent_hash_i"),
            Column("slice_start"),
            Column("hash_slice"),
            Column("primary_hash"),
        ]).set_orderby(
            [OrderBy(Column("latest_event_timestamp"), Direction.DESC)]))

    rv = []

    for row in snuba.raw_snql_query(
            query, referrer="api.group_split.render_grouping_tree")["data"]:
        if len(row["hash_slice"]) == 0:
            hash = row["primary_hash"]
            parent_hash = child_hash = None
        elif len(row["hash_slice"]) == 1:
            (hash, ) = row["hash_slice"]
            parent_hash = child_hash = None
        elif len(row["hash_slice"]) == 2:
            hash, child_hash = row["hash_slice"]
            parent_hash = None
        elif len(row["hash_slice"]) == 3:
            parent_hash, hash, child_hash = row["hash_slice"]
        else:
            raise ValueError("unexpected length of hash_slice")

        _add_hash(
            rv,
            group,
            user,
            parent_hash,
            hash,
            child_hash,
            row["event_count"],
            row["latest_event_timestamp"],
            row["event_id"],
        )

    rv.sort(key=lambda tree: (tree["id"] or "", tree["childId"] or ""))

    return rv
예제 #22
0
def _render_trees(group: Group, user):
    materialized_hashes = {
        gh.hash
        for gh in GroupHash.objects.filter(project=group.project, group=group)
    }

    rv = []

    common_where = _get_group_filters(group)

    for materialized_hash in materialized_hashes:
        # For every materialized hash we want to render parent and child
        # hashes, a limited view of the entire tree. We fetch one sample event
        # so we know how we need to slice hierarchical_hashes.
        hierarchical_hashes = _get_full_hierarchical_hashes(
            group, materialized_hash)

        if not hierarchical_hashes:
            # No hierarchical_hashes found, the materialized hash is probably
            # from flat grouping.
            parent_pos = None
            hash_pos = None
            child_pos = None
            slice_start = 0
        else:
            materialized_pos = hierarchical_hashes.index(materialized_hash)

            if materialized_pos == 0:
                parent_pos = None
                hash_pos = 0
                child_pos = 1
                slice_start = 1
            else:
                parent_pos = 0
                hash_pos = 1
                child_pos = 2
                slice_start = materialized_pos

        # Select sub-views of the trees that contain materialized_hash.
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "event_count"),
            Function(
                "argMax",
                [Column("event_id"), Column("timestamp")], "event_id"),
            Function("max", [Column("timestamp")], "latest_event_timestamp"),
            Function("arraySlice",
                     [Column("hierarchical_hashes"), slice_start, 3],
                     "hashes"),
        ]).set_where(common_where + [
            Condition(
                Function(
                    "has",
                    [
                        Column("hierarchical_hashes"),
                        materialized_hash,
                    ],
                ),
                Op.EQ,
                1,
            ),
        ]).set_groupby([Column("hashes")]).set_orderby(
            [OrderBy(Column("latest_event_timestamp"), Direction.DESC)]))

        for row in snuba.raw_snql_query(query)["data"]:
            assert not row["hashes"] or row["hashes"][
                hash_pos] == materialized_hash

            event_id = row["event_id"]
            event = eventstore.get_event_by_id(group.project_id, event_id)

            tree = {
                "parentId": _get_checked(row["hashes"], parent_pos),
                "id": materialized_hash,
                "childId": _get_checked(row["hashes"], child_pos),
                "eventCount": row["event_count"],
                "latestEvent": serialize(event, user, EventSerializer()),
            }

            rv.append(tree)

            if not row["hashes"]:
                continue

            try:
                for variant in event.get_grouping_variants().values():
                    if not isinstance(variant, ComponentVariant):
                        continue

                    if variant.get_hash() == tree["parentId"]:
                        tree["parentLabel"] = variant.component.tree_label

                    if variant.get_hash() == tree["childId"]:
                        tree["childLabel"] = variant.component.tree_label

                    if variant.get_hash() == tree["id"]:
                        tree["label"] = variant.component.tree_label
            except Exception:
                sentry_sdk.capture_exception()

    rv.sort(key=lambda tree:
            (tree["parentId"] or "", tree["id"] or "", tree["childId"] or ""))

    return rv
예제 #23
0
def _query_snuba(group: Group, id: int, offset=None, limit=None):
    query = (Query("events", Entity("events")).set_select([
        Function(
            "arrayElement",
            [
                Column("hierarchical_hashes"),
                Function("least", [
                    id + 1,
                    Function("length", [Column("hierarchical_hashes")])
                ]),
            ],
            "new_materialized_hash",
        ),
        Function("argMax",
                 [Column("event_id"), Column("timestamp")], "latest_event_id"),
        Function("max", [Column("timestamp")], "latest_event_timestamp"),
        Function("count", [], "event_count"),
    ]).set_groupby([Column("new_materialized_hash")]).set_orderby(
        [OrderBy(Column("latest_event_timestamp"), Direction.DESC)]))

    levels_overview = get_levels_overview(group)

    # These conditions are always valid
    common_where = [
        Condition(Column("primary_hash"), Op.EQ,
                  levels_overview.only_primary_hash),
        Condition(Column("project_id"), Op.EQ, group.project_id),
    ]

    if id >= levels_overview.current_level:
        # Good path: Since we increase the level we can easily constrain the
        # entire query by group_id and timerange
        query = query.set_where(common_where + _get_group_filters(group))
    else:
        # Bad path: We decreased the level and now we need to count events from
        # other groups. If we cannot filter by group_id, we can also not
        # restrict the timerange to anything at all. The Snuba API still
        # requires us to set a timerange, so we set it to the maximum of 90d.
        #
        # Luckily the minmax index on group_id alone is reasonably efficient so
        # that filtering by timerange (=primary key) is only a little bit
        # faster.
        now = datetime.datetime.now()
        new_materialized_hash = _get_hash_for_parent_level(
            group, id, levels_overview)
        query = query.set_where(common_where + [
            Condition(
                Function("arrayElement",
                         [Column("hierarchical_hashes"), id + 1]),
                Op.EQ,
                new_materialized_hash,
            ),
            Condition(Column("timestamp"), Op.GTE, now -
                      datetime.timedelta(days=90)),
            Condition(Column("timestamp"), Op.LT, now +
                      datetime.timedelta(seconds=10)),
        ])

    if offset is not None:
        query = query.set_offset(offset)

    if limit is not None:
        query = query.set_limit(limit)

    return snuba.raw_snql_query(
        query, referrer="api.group_hashes_levels.get_level_new_issues")["data"]
예제 #24
0
         Condition(Function("uniq", [Column("event_id")]), Op.GT, 1),
         BooleanCondition(
             BooleanOp.OR,
             [
                 Condition(Function("uniq", [Column("event_id")]),
                           Op.GTE, 10),
                 Condition(
                     CurriedFunction("quantile", [0.5],
                                     [Column("duration")]),
                     Op.GTE,
                     99,
                 ),
             ],
         ),
     ],
     orderby=[OrderBy(Column("title"), Direction.ASC)],
     limitby=LimitBy([Column("title")], 5),
     limit=Limit(10),
     offset=Offset(1),
     granularity=Granularity(3600),
     totals=Totals(True),
 ),
 (
     "MATCH (events SAMPLE 1000.0)",
     "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50",
     "BY title",
     ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') "
      "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') "
      "AND project_id IN tuple(1, 2, 3) "
      "AND (event_id = 'abc' OR duration > 10)"),
     "HAVING uniq(event_id) > 1 AND (uniq(event_id) >= 10 OR quantile(0.5)(duration) >= 99)",
예제 #25
0
def query_example_transactions(
    params: ParamsType,
    query: Optional[str],
    direction: str,
    orderby: str,
    span: Span,
    per_suspect: int = 5,
    offset: Optional[int] = None,
) -> Dict[Span, List[EventID]]:
    # there aren't any suspects, early return to save an empty query
    if per_suspect == 0:
        return {}

    selected_columns: List[str] = [
        "id",
        "project.id",
    ]

    builder = SpanQueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        query=query,
        orderby=[],
        limit=per_suspect,
        offset=offset,
    )

    # Make sure to resolve the custom span functions and add it to the columns and order bys
    orderby_columns = [
        builder.resolve_span_function(function, span, f"{function}_span_time")
        for function in SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_functions
    ]
    builder.columns += orderby_columns
    builder.orderby += [
        OrderBy(column, Direction.DESC if direction == "-" else Direction.ASC)
        for column in orderby_columns
    ]

    # we are only interested in the specific op, group pairs from the suspects
    builder.add_conditions(
        [
            Condition(Function("has", [builder.column("spans_op"), span.op]), Op.EQ, 1),
            Condition(Function("has", [builder.column("spans_group"), span.group]), Op.EQ, 1),
            Condition(
                builder.resolve_span_function("count", span, "count_span_time"),
                Op.GT,
                0,
            ),
        ]
    )

    snql_query = builder.get_snql_query()
    results = raw_snql_query(snql_query, "api.organization-events-spans-performance-examples")

    examples: Dict[Span, List[EventID]] = {Span(span.op, span.group): []}

    for example in results["data"]:
        value = EventID(params["project_id"][0], example["id"])
        examples[span].append(value)

    return examples
예제 #26
0
def build_key_transactions(interval, project):
    start, stop = interval

    # Take the 3 most frequently occuring transactions
    query = Query(
        dataset=Dataset.Transactions.value,
        match=Entity("transactions"),
        select=[
            Column("transaction_name"),
            Function("count", []),
        ],
        where=[
            Condition(Column("finish_ts"), Op.GTE, start),
            Condition(Column("finish_ts"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
        ],
        groupby=[Column("transaction_name")],
        orderby=[OrderBy(Function("count", []), Direction.DESC)],
        limit=Limit(3),
    )
    query_result = raw_snql_query(query, referrer="reports.key_transactions")
    key_errors = query_result["data"]

    transaction_names = map(lambda p: p["transaction_name"], key_errors)

    def query_p95(interval):
        start, stop = interval
        query = Query(
            dataset=Dataset.Transactions.value,
            match=Entity("transactions"),
            select=[
                Column("transaction_name"),
                Function("quantile(0.95)", [Column("duration")], "p95"),
            ],
            where=[
                Condition(Column("finish_ts"), Op.GTE, start),
                Condition(Column("finish_ts"), Op.LT,
                          stop + timedelta(days=1)),
                Condition(Column("transaction_name"), Op.IN,
                          transaction_names),
                Condition(Column("project_id"), Op.EQ, project.id),
            ],
            groupby=[Column("transaction_name")],
        )
        return raw_snql_query(query, referrer="reports.key_transactions.p95")

    query_result = query_p95((start, stop))
    this_week_p95 = {}
    for point in query_result["data"]:
        this_week_p95[point["transaction_name"]] = point["p95"]

    query_result = query_p95(
        (start - timedelta(days=7), stop - timedelta(days=7)))
    last_week_p95 = {}
    for point in query_result["data"]:
        last_week_p95[point["transaction_name"]] = point["p95"]

    return [(
        e["transaction_name"],
        e["count()"],
        project.id,
        this_week_p95.get(e["transaction_name"], None),
        last_week_p95.get(e["transaction_name"], None),
    ) for e in key_errors]
예제 #27
0
 pytest.param(
     Query(
         "discover",
         Join([Relationship(Entity("events", "e"), "has", Entity("sessions", "s"))]),
     )
     .set_select(
         [
             Column("group_id", Entity("events", "e")),
             Column("span_id", Entity("sessions", "s")),
         ]
     )
     .set_where(
         [Condition(Column("timestamp", Entity("events", "e")), Op.IS_NOT_NULL)]
     )
     .set_orderby(
         [OrderBy(Column("timestamp", Entity("events", "e")), Direction.DESC)]
     )
     .set_limit(10)
     .set_offset(1)
     .set_granularity(3600)
     .set_consistent(True),
     (
         "MATCH (e: events) -[has]-> (s: sessions)",
         "SELECT e.group_id, s.span_id",
         "WHERE e.timestamp IS NOT NULL",
         "ORDER BY e.timestamp DESC",
         "LIMIT 10",
         "OFFSET 1",
         "GRANULARITY 3600",
     ),
     [("consistent", True)],