Example #1
0
    def _build_queries_for_entity(self, query_definition, entity, fields, where, groupby):
        totals_query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(entity),
            groupby=groupby,
            select=list(self._build_select(entity, fields)),
            where=where,
            limit=Limit(query_definition.limit or MAX_POINTS),
            offset=Offset(0),
            granularity=Granularity(query_definition.rollup),
            orderby=self._build_orderby(query_definition, entity),
        )

        if totals_query.orderby is None:
            series_query = totals_query.set_groupby(
                (totals_query.groupby or []) + [Column(TS_COL_GROUP)]
            )
        else:
            series_query = None

        return {
            "totals": totals_query,
            "series": series_query,
        }
Example #2
0
    def _build_queries_for_entity(self, query_definition, entity, fields,
                                  where, groupby):
        totals_query = Query(
            dataset="metrics",
            match=Entity(entity),
            groupby=groupby,
            select=list(
                map(
                    Column,
                    {_OP_TO_FIELD[entity][op]
                     for op, _ in fields},
                )),
            where=where,
            limit=Limit(MAX_POINTS),
            offset=Offset(0),
            granularity=Granularity(query_definition.rollup),
        )
        series_query = totals_query.set_groupby((totals_query.groupby or []) +
                                                [Column(TS_COL_GROUP)])

        return {
            "totals": totals_query,
            "series": series_query,
        }
Example #3
0
    def _build_totals_and_series_queries(entity, select, where, groupby,
                                         orderby, limit, offset, rollup,
                                         intervals_len):
        totals_query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(entity),
            groupby=groupby,
            select=select,
            where=where,
            limit=Limit(limit or MAX_POINTS),
            offset=Offset(offset or 0),
            granularity=Granularity(rollup),
            orderby=orderby,
        )
        series_query = totals_query.set_groupby((totals_query.groupby or []) +
                                                [Column(TS_COL_GROUP)])

        # In a series query, we also need to factor in the len of the intervals array
        series_limit = MAX_POINTS
        if limit:
            series_limit = limit * intervals_len
        series_query = series_query.set_limit(series_limit)

        return {"totals": totals_query, "series": series_query}
Example #4
0
    def get_oldest_health_data_for_releases(
        self,
        project_releases: Sequence[ProjectRelease],
    ) -> Mapping[ProjectRelease, str]:

        now = datetime.now(pytz.utc)
        start = now - timedelta(days=90)

        project_ids: List[ProjectId] = [x[0] for x in project_releases]
        org_id = self._get_org_id(project_ids)
        release_column_name = tag_key(org_id, "release")
        releases = [x[1] for x in project_releases]
        releases_ids = [
            release_id for release_id in
            [try_get_string_index(org_id, release) for release in releases]
            if release_id is not None
        ]

        query_cols = [
            Column("project_id"),
            Column(release_column_name),
            Function("min", [Column("bucketed_time")], "oldest"),
        ]

        group_by = [
            Column("project_id"),
            Column(release_column_name),
        ]

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
            Condition(Column(release_column_name), Op.IN, releases_ids),
        ]

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity("metrics_counters"),
            select=query_cols,
            where=where_clause,
            groupby=group_by,
            granularity=Granularity(3600),
        )
        rows = raw_snql_query(
            query,
            referrer=
            "release_health.metrics.get_oldest_health_data_for_releases",
            use_cache=False,
        )["data"]

        result = {}

        for row in rows:
            result[row["project_id"],
                   reverse_tag_value(org_id, row[release_column_name]
                                     )] = row["oldest"]

        return result
Example #5
0
    def check_has_health_data(
            self, projects_list: Sequence[ProjectOrRelease]
    ) -> Set[ProjectOrRelease]:
        now = datetime.now(pytz.utc)
        start = now - timedelta(days=3)

        projects_list = list(projects_list)

        if len(projects_list) == 0:
            return set()

        includes_releases = isinstance(projects_list[0], tuple)

        if includes_releases:
            project_ids: List[ProjectId] = [x[0] for x in projects_list
                                            ]  # type: ignore
        else:
            project_ids = projects_list  # type: ignore

        org_id = self._get_org_id(project_ids)

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
        ]

        if includes_releases:
            releases = [x[1] for x in projects_list]  # type: ignore
            release_column_name = tag_key(org_id, "release")
            releases_ids = get_tag_values_list(org_id, releases)
            where_clause.append(
                Condition(Column(release_column_name), Op.IN, releases_ids))
            column_names = ["project_id", release_column_name]

        else:
            column_names = ["project_id"]

        def extract_row_info_func(
            include_releases: bool,
        ) -> Callable[[Mapping[str, Union[int, str]]], ProjectOrRelease]:
            def f(row: Mapping[str, Union[int, str]]) -> ProjectOrRelease:
                if include_releases:
                    return row["project_id"], reverse_tag_value(
                        org_id, row.get(release_column_name))  # type: ignore
                else:
                    return row["project_id"]  # type: ignore

            return f

        extract_row_info = extract_row_info_func(includes_releases)

        query_cols = [Column(column_name) for column_name in column_names]
        group_by_clause = query_cols

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(EntityKey.MetricsCounters.value),
            select=query_cols,
            where=where_clause,
            groupby=group_by_clause,
        )

        result = raw_snql_query(
            query,
            referrer="release_health.metrics.check_has_health_data",
            use_cache=False)

        return {extract_row_info(row) for row in result["data"]}
Example #6
0
    def get_release_sessions_time_bounds(
        self,
        project_id: ProjectId,
        release: ReleaseName,
        org_id: OrganizationId,
        environments: Optional[Sequence[EnvironmentName]] = None,
    ) -> ReleaseSessionsTimeBounds:
        select: List[SelectableExpression] = [
            Function("min", [Column("timestamp")], "min"),
            Function("max", [Column("timestamp")], "max"),
        ]

        try:
            where: List[Union[BooleanCondition, Condition]] = [
                Condition(Column("org_id"), Op.EQ, org_id),
                Condition(Column("project_id"), Op.EQ, project_id),
                Condition(Column(tag_key(org_id, "release")), Op.EQ,
                          tag_value(org_id, release)),
                Condition(Column("timestamp"), Op.GTE, datetime.min),
                Condition(Column("timestamp"), Op.LT, datetime.now(pytz.utc)),
            ]

            if environments is not None:
                env_filter = get_tag_values_list(org_id, environments)
                if not env_filter:
                    raise MetricIndexNotFound()

                where.append(
                    Condition(Column(tag_key(org_id, "environment")), Op.IN,
                              env_filter))
        except MetricIndexNotFound:
            # Some filter condition can't be constructed and therefore can't be
            # satisfied.
            #
            # Ignore return type because of https://github.com/python/mypy/issues/8533
            return {
                "sessions_lower_bound": None,
                "sessions_upper_bound": None
            }  # type: ignore

        # XXX(markus): We know that this combination of queries is not fully
        # equivalent to the sessions-table based backend. Example:
        #
        # 1. Session sid=x is started with timestamp started=n
        # 2. Same sid=x is updated with new payload with timestamp started=n - 1
        #
        # Old sessions backend would return [n - 1 ; n - 1] as range.
        # New metrics backend would return [n ; n - 1] as range.
        #
        # We don't yet know if this case is relevant. Session's started
        # timestamp shouldn't really change as session status is updated
        # though.

        try:
            # Take care of initial values for session.started by querying the
            # init counter. This should take care of most cases on its own.
            init_sessions_query = Query(
                dataset=Dataset.Metrics.value,
                match=Entity(EntityKey.MetricsCounters.value),
                select=select,
                where=where + [
                    Condition(Column("metric_id"), Op.EQ,
                              metric_id(org_id, "session")),
                    Condition(Column(tag_key(org_id, "session.status")), Op.EQ,
                              tag_value(org_id, "init")),
                ],
            )

            rows = raw_snql_query(
                init_sessions_query,
                referrer=
                "release_health.metrics.get_release_sessions_time_bounds.init_sessions",
                use_cache=False,
            )["data"]
        except MetricIndexNotFound:
            rows = []

        try:
            # Take care of potential timestamp updates by looking at the metric
            # for session duration, which is emitted once a session is closed ("terminal state")
            #
            # There is a testcase checked in that tests specifically for a
            # session update that lowers session.started. We don't know if that
            # testcase matters particularly.
            terminal_sessions_query = Query(
                dataset=Dataset.Metrics.value,
                match=Entity(EntityKey.MetricsDistributions.value),
                select=select,
                where=where + [
                    Condition(Column("metric_id"), Op.EQ,
                              metric_id(org_id, "session.duration")),
                ],
            )
            rows.extend(
                raw_snql_query(
                    terminal_sessions_query,
                    referrer=
                    "release_health.metrics.get_release_sessions_time_bounds.terminal_sessions",
                    use_cache=False,
                )["data"])
        except MetricIndexNotFound:
            pass

        # This check is added because if there are no sessions found, then the
        # aggregations query return both the sessions_lower_bound and the
        # sessions_upper_bound as `0` timestamp and we do not want that behaviour
        # by default
        # P.S. To avoid confusion the `0` timestamp which is '1970-01-01 00:00:00'
        # is rendered as '0000-00-00 00:00:00' in clickhouse shell
        formatted_unix_start_time = datetime.utcfromtimestamp(0).strftime(
            "%Y-%m-%dT%H:%M:%S+00:00")

        lower_bound: Optional[str] = None
        upper_bound: Optional[str] = None

        for row in rows:
            if set(row.values()) == {formatted_unix_start_time}:
                continue
            if lower_bound is None or row["min"] < lower_bound:
                lower_bound = row["min"]
            if upper_bound is None or row["max"] > upper_bound:
                upper_bound = row["max"]

        if lower_bound is None or upper_bound is None:
            return {
                "sessions_lower_bound": None,
                "sessions_upper_bound": None
            }  # type: ignore

        def iso_format_snuba_datetime(date: str) -> str:
            return datetime.strptime(
                date, "%Y-%m-%dT%H:%M:%S+00:00").isoformat()[:19] + "Z"

        return {  # type: ignore
            "sessions_lower_bound": iso_format_snuba_datetime(lower_bound),
            "sessions_upper_bound": iso_format_snuba_datetime(upper_bound),
        }
Example #7
0
def _get_snuba_query(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_id: int,
    columns: List[SelectableExpression],
    series: bool,
    limit_state: _LimitState,
    extra_conditions: List[Condition],
) -> Optional[Query]:
    """Build the snuba query

    Return None if the results from the initial totals query was empty.
    """
    conditions = [
        Condition(Column("org_id"), Op.EQ, org_id),
        Condition(Column("project_id"), Op.IN,
                  query.filter_keys["project_id"]),
        Condition(Column("metric_id"), Op.EQ, metric_id),
        Condition(Column(TS_COL_QUERY), Op.GTE, query.start),
        Condition(Column(TS_COL_QUERY), Op.LT, query.end),
    ]
    conditions += _get_filter_conditions(org_id, query.conditions)
    conditions += extra_conditions

    groupby = {}
    for field in query.raw_groupby:
        if field == "session.status":
            # This will be handled by conditional aggregates
            continue

        if field == "project":
            groupby["project"] = Column("project_id")
            continue

        try:
            groupby[field] = Column(resolve_tag_key(field))
        except MetricIndexNotFound:
            # exclude unresolved keys from groupby
            pass

    full_groupby = list(set(groupby.values()))

    if series:
        full_groupby.append(Column(TS_COL_GROUP))

    query_args = dict(
        dataset=Dataset.Metrics.value,
        match=Entity(entity_key.value),
        select=columns,
        groupby=full_groupby,
        where=conditions,
        granularity=Granularity(query.rollup),
    )

    # In case of group by, either set a limit or use the groups from the
    # first query to limit the results:
    if query.raw_groupby:
        if not limit_state.initialized:
            # Set limit and order by to be consistent with sessions_v2
            max_groups = SNUBA_LIMIT // len(get_timestamps(query))
            query_args["limit"] = Limit(max_groups)
            query_args["orderby"] = [OrderBy(columns[0], Direction.DESC)]
        else:
            if limit_state.limiting_conditions is None:
                # Initial query returned no results, no need to run any more queries
                return None

            query_args["where"] += limit_state.limiting_conditions
            query_args["limit"] = Limit(SNUBA_LIMIT)

    return Query(**query_args)
Example #8
0
def sum_sessions_and_releases(org_id, project_ids):
    # Takes a single org id and a list of project ids
    # returns counts of releases and sessions across all environments and passed project_ids for the last 6 hours
    start_time = time.time()
    offset = 0
    totals = defaultdict(dict)
    with metrics.timer(
            "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.loop"
    ):
        while (time.time() - start_time) < MAX_SECONDS:
            with metrics.timer(
                    "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.query"
            ):
                query = (Query(
                    dataset="sessions",
                    match=Entity("sessions"),
                    select=[
                        Column("sessions"),
                    ],
                    groupby=[
                        Column("org_id"),
                        Column("project_id"),
                        Column("release"),
                        Column("environment"),
                    ],
                    where=[
                        Condition(Column("started"), Op.GTE,
                                  datetime.utcnow() - timedelta(hours=6)),
                        Condition(Column("started"), Op.LT, datetime.utcnow()),
                        Condition(Column("org_id"), Op.EQ, org_id),
                        Condition(Column("project_id"), Op.IN, project_ids),
                    ],
                    granularity=Granularity(21600),
                    orderby=[
                        OrderBy(Column("org_id"), Direction.ASC),
                        OrderBy(Column("project_id"), Direction.ASC),
                    ],
                ).set_limit(CHUNK_SIZE + 1).set_offset(offset))

                data = snuba.raw_snql_query(
                    query,
                    referrer="tasks.process_projects_with_sessions.session_count"
                )["data"]
                count = len(data)
                more_results = count > CHUNK_SIZE
                offset += CHUNK_SIZE

                if more_results:
                    data = data[:-1]

                for row in data:
                    row_totals = totals[row["project_id"]].setdefault(
                        row["environment"], {
                            "total_sessions": 0,
                            "releases": defaultdict(int)
                        })
                    row_totals["total_sessions"] += row["sessions"]
                    row_totals["releases"][row["release"]] += row["sessions"]

            if not more_results:
                break
        else:
            logger.info(
                "process_projects_with_sessions.loop_timeout",
                extra={
                    "org_id": org_id,
                    "project_ids": project_ids
                },
            )
    return totals
Example #9
0
    def test_suspect_spans_lambdas(self) -> None:
        query = (Query(
            "discover", Entity("discover_transactions")
        ).set_select([
            Column("spans.op"),
            Column("spans.group"),
            Function(
                "arrayReduce",
                [
                    "sumIf",
                    Column("spans.exclusive_time_32"),
                    Function(
                        "arrayMap",
                        [
                            Lambda(
                                ["x", "y"],
                                Function(
                                    "if",
                                    [
                                        Function(
                                            "equals",
                                            [
                                                Function(
                                                    "and",
                                                    [
                                                        Function(
                                                            "equals",
                                                            [
                                                                Identifier(
                                                                    "x"),
                                                                "db",
                                                            ],
                                                        ),
                                                        Function(
                                                            "equals",
                                                            [
                                                                Identifier(
                                                                    "y"),
                                                                "05029609156d8133",
                                                            ],
                                                        ),
                                                    ],
                                                ),
                                                1,
                                            ],
                                        ),
                                        1,
                                        0,
                                    ],
                                ),
                            ),
                            Column("spans.op"),
                            Column("spans.group"),
                        ],
                    ),
                ],
                "array_spans_exclusive_time",
            ),
        ]).set_where([
            Condition(Column("transaction_name"), Op.EQ, "/api/do_things"),
            Condition(Function("has", [Column("spans.op"), "db"]), Op.EQ, 1),
            Condition(
                Function("has", [Column("spans.group"), "05029609156d8133"]),
                Op.EQ,
                1,
            ),
            Condition(Column("duration"), Op.LT, 900000.0),
            Condition(Column("finish_ts"), Op.GTE, self.base_time),
            Condition(Column("finish_ts"), Op.LT, self.next_time),
            Condition(Column("project_id"), Op.IN, (self.project_id, )),
        ]).set_orderby(
            [OrderBy(Column("array_spans_exclusive_time"),
                     Direction.DESC)]).set_limit(10))

        response = self.post("/discover/snql", data=query.snuba())
        resp = json.loads(response.data)
        assert response.status_code == 200, resp
        data = resp["data"]
        assert len(data) == 1
        assert data[0]["array_spans_exclusive_time"] > 0
Example #10
0
    Limit,
    LimitBy,
    Offset,
    Op,
    OrderBy,
    Query,
)
from snuba_sdk.query_validation import InvalidMatchError
from snuba_sdk.query_visitors import InvalidQueryError

NOW = datetime(2021, 1, 2, 3, 4, 5, 6, timezone.utc)
tests = [
    pytest.param(
        Query(
            dataset="discover",
            match=Entity("events"),
            select=[Column("event_id")],
            groupby=None,
            where=[Condition(Column("timestamp"), Op.GT, NOW)],
            limit=Limit(10),
            offset=Offset(1),
            granularity=Granularity(3600),
        ),
        id="basic query",
    ),
    pytest.param(
        Query(
            dataset="discover",
            match=Entity("events", "ev", 0.2),
            select=[
                Column("title"),
Example #11
0
def test_build_snuba_query_orderby(mock_now, mock_now2, monkeypatch):
    monkeypatch.setattr("sentry.sentry_metrics.indexer.resolve",
                        MockIndexer().resolve)
    query_params = MultiValueDict({
        "query":
        ["release:staging"
         ],  # weird release but we need a string exising in mock indexer
        "groupBy": ["session.status", "environment"],
        "field": [
            "sum(sentry.sessions.session)",
        ],
        "orderBy": ["-sum(sentry.sessions.session)"],
    })
    query_definition = QueryDefinition(query_params,
                                       paginator_kwargs={"limit": 3})
    snuba_queries, _ = SnubaQueryBuilder([PseudoProject(1, 1)],
                                         query_definition).get_snuba_queries()

    counter_queries = snuba_queries.pop("metrics_counters")
    assert not snuba_queries

    op = "sum"
    metric_name = "sentry.sessions.session"
    select = Function(
        OP_TO_SNUBA_FUNCTION["metrics_counters"]["sum"],
        [
            Column("value"),
            Function("equals",
                     [Column("metric_id"),
                      resolve_weak(metric_name)])
        ],
        alias=f"{op}({metric_name})",
    )

    assert counter_queries["totals"] == Query(
        dataset="metrics",
        match=Entity("metrics_counters"),
        select=[select],
        groupby=[
            Column("tags[8]"),
            Column("tags[2]"),
        ],
        where=[
            Condition(Column("org_id"), Op.EQ, 1),
            Condition(Column("project_id"), Op.IN, [1]),
            Condition(Column("timestamp"), Op.GTE,
                      datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
            Condition(Column("timestamp"), Op.LT,
                      datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
            Condition(Column("metric_id"), Op.IN, [9]),
        ],
        orderby=[OrderBy(select, Direction.DESC)],
        limit=Limit(3),
        offset=Offset(0),
        granularity=Granularity(query_definition.rollup),
    )
    assert counter_queries["series"] == Query(
        dataset="metrics",
        match=Entity("metrics_counters"),
        select=[select],
        groupby=[
            Column("tags[8]"),
            Column("tags[2]"),
            Column("bucketed_time"),
        ],
        where=[
            Condition(Column("org_id"), Op.EQ, 1),
            Condition(Column("project_id"), Op.IN, [1]),
            Condition(Column("timestamp"), Op.GTE,
                      datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
            Condition(Column("timestamp"), Op.LT,
                      datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
            Condition(Column("metric_id"), Op.IN, [9]),
        ],
        orderby=[OrderBy(select, Direction.DESC)],
        limit=Limit(6480),
        offset=Offset(0),
        granularity=Granularity(query_definition.rollup),
    )
Example #12
0
def test_build_snuba_query_derived_metrics(mock_now, mock_now2, monkeypatch):
    monkeypatch.setattr("sentry.sentry_metrics.indexer.resolve",
                        MockIndexer().resolve)
    # Your typical release health query querying everything
    query_params = MultiValueDict({
        "groupBy": [],
        "field": [
            "session.errored",
            "session.crash_free_rate",
            "session.all",
        ],
        "interval": ["1d"],
        "statsPeriod": ["2d"],
    })
    query_definition = QueryDefinition(query_params)
    query_builder = SnubaQueryBuilder([PseudoProject(1, 1)], query_definition)
    snuba_queries, fields_in_entities = query_builder.get_snuba_queries()
    assert fields_in_entities == {
        "metrics_counters": [
            (None, "session.errored_preaggregated"),
            (None, "session.crash_free_rate"),
            (None, "session.all"),
        ],
        "metrics_sets": [
            (None, "session.errored_set"),
        ],
    }
    for key in ("totals", "series"):
        groupby = [] if key == "totals" else [Column("bucketed_time")]
        assert snuba_queries["metrics_counters"][key] == (Query(
            dataset="metrics",
            match=Entity("metrics_counters"),
            select=[
                errored_preaggr_sessions(
                    metric_ids=[resolve_weak("sentry.sessions.session")],
                    alias="session.errored_preaggregated",
                ),
                percentage(
                    crashed_sessions(
                        metric_ids=[resolve_weak("sentry.sessions.session")],
                        alias="session.crashed",
                    ),
                    all_sessions(
                        metric_ids=[resolve_weak("sentry.sessions.session")],
                        alias="session.all",
                    ),
                    alias="session.crash_free_rate",
                ),
                all_sessions(
                    metric_ids=[resolve_weak("sentry.sessions.session")],
                    alias="session.all"),
            ],
            groupby=groupby,
            where=[
                Condition(Column("org_id"), Op.EQ, 1),
                Condition(Column("project_id"), Op.IN, [1]),
                Condition(Column("timestamp"), Op.GTE,
                          datetime(2021, 8, 24, 0, tzinfo=pytz.utc)),
                Condition(Column("timestamp"), Op.LT,
                          datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
                Condition(Column("metric_id"), Op.IN,
                          [resolve_weak("sentry.sessions.session")]),
            ],
            limit=Limit(MAX_POINTS),
            offset=Offset(0),
            granularity=Granularity(query_definition.rollup),
        ))
        assert snuba_queries["metrics_sets"][key] == (Query(
            dataset="metrics",
            match=Entity("metrics_sets"),
            select=[
                sessions_errored_set(
                    metric_ids=[resolve_weak("sentry.sessions.session.error")],
                    alias="session.errored_set",
                ),
            ],
            groupby=groupby,
            where=[
                Condition(Column("org_id"), Op.EQ, 1),
                Condition(Column("project_id"), Op.IN, [1]),
                Condition(Column("timestamp"), Op.GTE,
                          datetime(2021, 8, 24, 0, tzinfo=pytz.utc)),
                Condition(Column("timestamp"), Op.LT,
                          datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
                Condition(Column("metric_id"), Op.IN,
                          [resolve_weak("sentry.sessions.session.error")]),
            ],
            limit=Limit(MAX_POINTS),
            offset=Offset(0),
            granularity=Granularity(query_definition.rollup),
        ))